GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_entail.cpp Lines: 350 397 88.2 %
Date: 2021-09-29 Branches: 727 1493 48.7 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving regular expressions.
14
 */
15
16
#include "theory/strings/regexp_entail.h"
17
18
#include "theory/rewriter.h"
19
#include "theory/strings/theory_strings_utils.h"
20
#include "theory/strings/word.h"
21
#include "util/rational.h"
22
#include "util/string.h"
23
24
using namespace std;
25
using namespace cvc5::kind;
26
27
namespace cvc5 {
28
namespace theory {
29
namespace strings {
30
31
13464
Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
32
                                       std::vector<Node>& children,
33
                                       int dir)
34
{
35
26928
  Trace("regexp-ext-rewrite-debug")
36
13464
      << "Simple reg exp consume, dir=" << dir << ":" << std::endl;
37
26928
  Trace("regexp-ext-rewrite-debug")
38
13464
      << "  mchildren : " << mchildren << std::endl;
39
13464
  Trace("regexp-ext-rewrite-debug") << "  children : " << children << std::endl;
40
13464
  NodeManager* nm = NodeManager::currentNM();
41
13464
  unsigned tmin = dir < 0 ? 0 : dir;
42
13464
  unsigned tmax = dir < 0 ? 1 : dir;
43
  // try to remove off front and back
44
37462
  for (unsigned t = 0; t < 2; t++)
45
  {
46
26312
    if (tmin <= t && t <= tmax)
47
    {
48
16993
      bool do_next = true;
49
49909
      while (!children.empty() && !mchildren.empty() && do_next)
50
      {
51
18772
        do_next = false;
52
35230
        Node xc = mchildren[mchildren.size() - 1];
53
35230
        Node rc = children[children.size() - 1];
54
37544
        Trace("regexp-ext-rewrite-debug")
55
18772
            << "* " << xc << " in " << rc << std::endl;
56
18772
        Assert(rc.getKind() != REGEXP_CONCAT);
57
18772
        Assert(xc.getKind() != STRING_CONCAT);
58
18772
        if (rc.getKind() == STRING_TO_REGEXP)
59
        {
60
6976
          if (xc == rc[0])
61
          {
62
1015
            children.pop_back();
63
1015
            mchildren.pop_back();
64
1015
            do_next = true;
65
1015
            Trace("regexp-ext-rewrite-debug") << "- strip equal" << std::endl;
66
          }
67
5961
          else if (rc[0].isConst() && Word::isEmpty(rc[0]))
68
          {
69
206
            Trace("regexp-ext-rewrite-debug")
70
103
                << "- ignore empty RE" << std::endl;
71
            // ignore and continue
72
103
            children.pop_back();
73
103
            do_next = true;
74
          }
75
5858
          else if (xc.isConst() && rc[0].isConst())
76
          {
77
            // split the constant
78
            size_t index;
79
3213
            Node s = Word::splitConstant(xc, rc[0], index, t == 0);
80
5130
            Trace("regexp-ext-rewrite-debug")
81
5130
                << "- CRE: Regexp const split : " << xc << " " << rc[0]
82
2565
                << " -> " << s << " " << index << " " << t << std::endl;
83
2565
            if (s.isNull())
84
            {
85
3834
              Trace("regexp-ext-rewrite-debug")
86
1917
                  << "...return false" << std::endl;
87
1917
              return nm->mkConst(false);
88
            }
89
            else
90
            {
91
1296
              Trace("regexp-ext-rewrite-debug")
92
648
                  << "- strip equal const" << std::endl;
93
648
              children.pop_back();
94
648
              mchildren.pop_back();
95
648
              if (index == 0)
96
              {
97
590
                mchildren.push_back(s);
98
              }
99
              else
100
              {
101
58
                children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
102
              }
103
            }
104
648
            Trace("regexp-ext-rewrite-debug") << "- split const" << std::endl;
105
648
            do_next = true;
106
          }
107
        }
108
11796
        else if (xc.isConst())
109
        {
110
          // check for constants
111
8265
          cvc5::String s = xc.getConst<String>();
112
4331
          if (Word::isEmpty(xc))
113
          {
114
48
            Trace("regexp-ext-rewrite-debug") << "- ignore empty" << std::endl;
115
            // ignore and continue
116
48
            mchildren.pop_back();
117
48
            do_next = true;
118
          }
119
4283
          else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA)
120
          {
121
1413
            if (!isConstRegExp(rc))
122
            {
123
              // if a non-standard re.range term, abort
124
249
              return Node::null();
125
            }
126
2577
            std::vector<unsigned> ssVec;
127
1413
            ssVec.push_back(t == 0 ? s.back() : s.front());
128
2577
            cvc5::String ss(ssVec);
129
1413
            if (testConstStringInRegExp(ss, 0, rc))
130
            {
131
              // strip off one character
132
1164
              mchildren.pop_back();
133
1164
              if (s.size() > 1)
134
              {
135
769
                if (t == 0)
136
                {
137
397
                  mchildren.push_back(nm->mkConst(s.substr(0, s.size() - 1)));
138
                }
139
                else
140
                {
141
372
                  mchildren.push_back(nm->mkConst(s.substr(1)));
142
                }
143
              }
144
1164
              children.pop_back();
145
1164
              do_next = true;
146
            }
147
            else
148
            {
149
498
              Trace("regexp-ext-rewrite-debug")
150
249
                  << "...return false" << std::endl;
151
249
              return nm->mkConst(false);
152
            }
153
          }
154
2870
          else if (rc.getKind() == REGEXP_INTER || rc.getKind() == REGEXP_UNION)
155
          {
156
            // see if any/each child does not work
157
1340
            bool result_valid = true;
158
2532
            Node result;
159
2532
            Node emp_s = nm->mkConst(String(""));
160
4220
            for (unsigned i = 0; i < rc.getNumChildren(); i++)
161
            {
162
5760
              std::vector<Node> mchildren_s;
163
5760
              std::vector<Node> children_s;
164
2880
              mchildren_s.push_back(xc);
165
2880
              utils::getConcat(rc[i], children_s);
166
2880
              Trace("regexp-ext-rewrite-debug") << push;
167
5760
              Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
168
2880
              Trace("regexp-ext-rewrite-debug") << pop;
169
2880
              if (!ret.isNull())
170
              {
171
                // one conjunct cannot be satisfied, return false
172
1496
                if (rc.getKind() == REGEXP_INTER)
173
                {
174
                  Trace("regexp-ext-rewrite-debug")
175
                      << "...return " << ret << std::endl;
176
                  return ret;
177
                }
178
              }
179
              else
180
              {
181
1384
                if (children_s.empty())
182
                {
183
                  // if we were able to fully consume, store the result
184
955
                  Assert(mchildren_s.size() <= 1);
185
955
                  if (mchildren_s.empty())
186
                  {
187
468
                    mchildren_s.push_back(emp_s);
188
                  }
189
955
                  if (result.isNull())
190
                  {
191
888
                    result = mchildren_s[0];
192
                  }
193
67
                  else if (result != mchildren_s[0])
194
                  {
195
67
                    result_valid = false;
196
                  }
197
                }
198
                else
199
                {
200
429
                  result_valid = false;
201
                }
202
              }
203
            }
204
1340
            if (result_valid)
205
            {
206
896
              if (result.isNull())
207
              {
208
                // all disjuncts cannot be satisfied, return false
209
148
                Assert(rc.getKind() == REGEXP_UNION);
210
296
                Trace("regexp-ext-rewrite-debug")
211
148
                    << "...return false" << std::endl;
212
148
                return nm->mkConst(false);
213
              }
214
              else
215
              {
216
1496
                Trace("regexp-ext-rewrite-debug")
217
748
                    << "- same result, try again, children now " << children
218
748
                    << std::endl;
219
                // all branches led to the same result
220
748
                children.pop_back();
221
748
                mchildren.pop_back();
222
748
                if (result != emp_s)
223
                {
224
396
                  mchildren.push_back(result);
225
                }
226
748
                do_next = true;
227
              }
228
            }
229
          }
230
1530
          else if (rc.getKind() == REGEXP_STAR)
231
          {
232
            // check if there is no way that this star can be unrolled even once
233
2944
            std::vector<Node> mchildren_s;
234
1472
            mchildren_s.insert(
235
2944
                mchildren_s.end(), mchildren.begin(), mchildren.end());
236
1472
            if (t == 1)
237
            {
238
863
              std::reverse(mchildren_s.begin(), mchildren_s.end());
239
            }
240
2944
            std::vector<Node> children_s;
241
1472
            utils::getConcat(rc[0], children_s);
242
2944
            Trace("regexp-ext-rewrite-debug")
243
1472
                << "- recursive call on body of star" << std::endl;
244
1472
            Trace("regexp-ext-rewrite-debug") << push;
245
2944
            Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
246
1472
            Trace("regexp-ext-rewrite-debug") << pop;
247
1472
            if (!ret.isNull())
248
            {
249
500
              Trace("regexp-ext-rewrite-debug")
250
250
                  << "- CRE : regexp star infeasable " << xc << " " << rc
251
250
                  << std::endl;
252
250
              children.pop_back();
253
250
              if (!children.empty())
254
              {
255
195
                Trace("regexp-ext-rewrite-debug") << "- continue" << std::endl;
256
195
                do_next = true;
257
              }
258
            }
259
            else
260
            {
261
1222
              if (children_s.empty())
262
              {
263
                // Check if beyond this, we hit a conflict. In this case, we
264
                // must repeat.  Notice that we do not treat the case where
265
                // there are no more strings to consume as a failure, since
266
                // we may be within a recursive call, see issue #5510.
267
963
                bool can_skip = true;
268
963
                if (children.size() > 1)
269
                {
270
1552
                  std::vector<Node> mchildren_ss;
271
776
                  mchildren_ss.insert(
272
1552
                      mchildren_ss.end(), mchildren.begin(), mchildren.end());
273
1552
                  std::vector<Node> children_ss;
274
776
                  children_ss.insert(
275
1552
                      children_ss.end(), children.begin(), children.end() - 1);
276
776
                  if (t == 1)
277
                  {
278
317
                    std::reverse(mchildren_ss.begin(), mchildren_ss.end());
279
317
                    std::reverse(children_ss.begin(), children_ss.end());
280
                  }
281
1552
                  Trace("regexp-ext-rewrite-debug")
282
776
                      << "- recursive call required repeat star" << std::endl;
283
776
                  Trace("regexp-ext-rewrite-debug") << push;
284
1552
                  Node rets = simpleRegexpConsume(mchildren_ss, children_ss, t);
285
776
                  Trace("regexp-ext-rewrite-debug") << pop;
286
776
                  if (!rets.isNull())
287
                  {
288
443
                    can_skip = false;
289
                  }
290
                }
291
963
                if (!can_skip)
292
                {
293
886
                  TypeNode stype = nm->stringType();
294
886
                  Node prev = utils::mkConcat(mchildren, stype);
295
886
                  Trace("regexp-ext-rewrite-debug")
296
443
                      << "- can't skip" << std::endl;
297
                  // take the result of fully consuming once
298
443
                  if (t == 1)
299
                  {
300
234
                    std::reverse(mchildren_s.begin(), mchildren_s.end());
301
                  }
302
443
                  mchildren.clear();
303
443
                  mchildren.insert(
304
886
                      mchildren.end(), mchildren_s.begin(), mchildren_s.end());
305
886
                  Node curr = utils::mkConcat(mchildren, stype);
306
443
                  do_next = (prev != curr);
307
886
                  Trace("regexp-ext-rewrite-debug")
308
443
                      << "- do_next = " << do_next << std::endl;
309
                }
310
                else
311
                {
312
1040
                  Trace("regexp-ext-rewrite-debug")
313
520
                      << "- can skip " << rc << " from " << xc << std::endl;
314
                }
315
              }
316
            }
317
          }
318
        }
319
16458
        if (!do_next)
320
        {
321
24188
          Trace("regexp-ext-rewrite")
322
12094
              << "- cannot consume : " << xc << " " << rc << std::endl;
323
        }
324
      }
325
    }
326
23998
    if (dir != 0)
327
    {
328
16696
      std::reverse(children.begin(), children.end());
329
16696
      std::reverse(mchildren.begin(), mchildren.end());
330
    }
331
  }
332
11150
  Trace("regexp-ext-rewrite-debug") << "...finished, return null" << std::endl;
333
11150
  return Node::null();
334
}
335
336
4933
bool RegExpEntail::isConstRegExp(TNode t)
337
{
338
9866
  std::unordered_set<TNode> visited;
339
9866
  std::vector<TNode> visit;
340
9866
  TNode cur;
341
4933
  visit.push_back(t);
342
11951
  do
343
  {
344
16884
    cur = visit.back();
345
16884
    visit.pop_back();
346
16884
    if (visited.find(cur) == visited.end())
347
    {
348
14141
      visited.insert(cur);
349
14141
      Kind ck = cur.getKind();
350
14141
      if (ck == STRING_TO_REGEXP)
351
      {
352
4507
        if (!cur[0].isConst())
353
        {
354
362
          return false;
355
        }
356
      }
357
9634
      else if (ck == REGEXP_RANGE)
358
      {
359
5691
        for (const Node& cn : cur)
360
        {
361
3794
          if (!cn.isConst() || cn.getConst<String>().size() != 1)
362
          {
363
            return false;
364
          }
365
        }
366
      }
367
7737
      else if (ck == ITE)
368
      {
369
        return false;
370
      }
371
7737
      else if (cur.isVar())
372
      {
373
25
        return false;
374
      }
375
      else
376
      {
377
19897
        for (const Node& cn : cur)
378
        {
379
12185
          visit.push_back(cn);
380
        }
381
      }
382
    }
383
16497
  } while (!visit.empty());
384
4546
  return true;
385
}
386
387
877329
bool RegExpEntail::testConstStringInRegExp(cvc5::String& s,
388
                                           unsigned index_start,
389
                                           TNode r)
390
{
391
877329
  Assert(index_start <= s.size());
392
1754658
  Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at "
393
877329
                        << index_start << std::endl;
394
877329
  Assert(!r.isVar());
395
877329
  Kind k = r.getKind();
396
877329
  switch (k)
397
  {
398
22050
    case STRING_TO_REGEXP:
399
    {
400
44100
      cvc5::String s2 = s.substr(index_start, s.size() - index_start);
401
22050
      if (r[0].isConst())
402
      {
403
22050
        return (s2 == r[0].getConst<String>());
404
      }
405
      Assert(false) << "RegExp contains variables";
406
      return false;
407
    }
408
3364
    case REGEXP_CONCAT:
409
    {
410
3364
      if (s.size() != index_start)
411
      {
412
5394
        std::vector<int> vec_k(r.getNumChildren(), -1);
413
2697
        int start = 0;
414
2697
        int left = (int)s.size() - index_start;
415
2697
        int i = 0;
416
26545
        while (i < (int)r.getNumChildren())
417
        {
418
14621
          bool flag = true;
419
14621
          if (i == (int)r.getNumChildren() - 1)
420
          {
421
2668
            if (testConstStringInRegExp(s, index_start + start, r[i]))
422
            {
423
815
              return true;
424
            }
425
          }
426
11953
          else if (i == -1)
427
          {
428
1882
            return false;
429
          }
430
          else
431
          {
432
27566
            for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i])
433
            {
434
40714
              cvc5::String t = s.substr(index_start + start, vec_k[i]);
435
23219
              if (testConstStringInRegExp(t, 0, r[i]))
436
              {
437
5724
                start += vec_k[i];
438
5724
                left -= vec_k[i];
439
5724
                flag = false;
440
5724
                ++i;
441
5724
                vec_k[i] = -1;
442
5724
                break;
443
              }
444
            }
445
          }
446
447
11924
          if (flag)
448
          {
449
6200
            --i;
450
6200
            if (i >= 0)
451
            {
452
4318
              start -= vec_k[i];
453
4318
              left += vec_k[i];
454
            }
455
          }
456
        }
457
        return false;
458
      }
459
      else
460
      {
461
940
        for (unsigned i = 0; i < r.getNumChildren(); ++i)
462
        {
463
940
          if (!testConstStringInRegExp(s, index_start, r[i]))
464
          {
465
667
            return false;
466
          }
467
        }
468
        return true;
469
      }
470
    }
471
273806
    case REGEXP_UNION:
472
    {
473
794189
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
474
      {
475
539875
        if (testConstStringInRegExp(s, index_start, r[i]))
476
        {
477
19492
          return true;
478
        }
479
      }
480
254314
      return false;
481
    }
482
885
    case REGEXP_INTER:
483
    {
484
1154
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
485
      {
486
1033
        if (!testConstStringInRegExp(s, index_start, r[i]))
487
        {
488
764
          return false;
489
        }
490
      }
491
121
      return true;
492
    }
493
28369
    case REGEXP_STAR:
494
    {
495
28369
      if (s.size() != index_start)
496
      {
497
301276
        for (unsigned i = s.size() - index_start; i > 0; --i)
498
        {
499
559295
          cvc5::String t = s.substr(index_start, i);
500
284146
          if (testConstStringInRegExp(t, 0, r[0]))
501
          {
502
47382
            if (index_start + i == s.size()
503
47382
                || testConstStringInRegExp(s, index_start + i, r))
504
            {
505
8997
              return true;
506
            }
507
          }
508
        }
509
17130
        return false;
510
      }
511
      else
512
      {
513
2242
        return true;
514
      }
515
    }
516
4
    case REGEXP_EMPTY:
517
    {
518
4
      return false;
519
    }
520
10644
    case REGEXP_SIGMA:
521
    {
522
10644
      if (s.size() == index_start + 1)
523
      {
524
4520
        return true;
525
      }
526
      else
527
      {
528
6124
        return false;
529
      }
530
    }
531
537957
    case REGEXP_RANGE:
532
    {
533
537957
      if (s.size() == index_start + 1)
534
      {
535
34923
        unsigned a = r[0].getConst<String>().front();
536
34923
        unsigned b = r[1].getConst<String>().front();
537
34923
        unsigned c = s.back();
538
34923
        return (a <= c && c <= b);
539
      }
540
      else
541
      {
542
503034
        return false;
543
      }
544
    }
545
    case REGEXP_LOOP:
546
    {
547
      NodeManager* nm = NodeManager::currentNM();
548
      uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
549
      if (s.size() == index_start)
550
      {
551
        return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]);
552
      }
553
      else if (l == 0 && r[1] == r[2])
554
      {
555
        return false;
556
      }
557
      else
558
      {
559
        Assert(r.getNumChildren() == 3)
560
            << "String rewriter error: LOOP has 2 children";
561
        if (l == 0)
562
        {
563
          // R{0,u}
564
          uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
565
          for (unsigned len = s.size() - index_start; len >= 1; len--)
566
          {
567
            cvc5::String t = s.substr(index_start, len);
568
            if (testConstStringInRegExp(t, 0, r[0]))
569
            {
570
              if (len + index_start == s.size())
571
              {
572
                return true;
573
              }
574
              else
575
              {
576
                Node num2 = nm->mkConst(cvc5::Rational(u - 1));
577
                Node r2 = nm->mkNode(REGEXP_LOOP, r[0], r[1], num2);
578
                if (testConstStringInRegExp(s, index_start + len, r2))
579
                {
580
                  return true;
581
                }
582
              }
583
            }
584
          }
585
          return false;
586
        }
587
        else
588
        {
589
          // R{l,l}
590
          Assert(r[1] == r[2])
591
              << "String rewriter error: LOOP nums are not equal";
592
          if (l > s.size() - index_start)
593
          {
594
            if (testConstStringInRegExp(s, s.size(), r[0]))
595
            {
596
              l = s.size() - index_start;
597
            }
598
            else
599
            {
600
              return false;
601
            }
602
          }
603
          for (unsigned len = 1; len <= s.size() - index_start; len++)
604
          {
605
            cvc5::String t = s.substr(index_start, len);
606
            if (testConstStringInRegExp(t, 0, r[0]))
607
            {
608
              Node num2 = nm->mkConst(cvc5::Rational(l - 1));
609
              Node r2 = nm->mkNode(REGEXP_LOOP, r[0], num2, num2);
610
              if (testConstStringInRegExp(s, index_start + len, r2))
611
              {
612
                return true;
613
              }
614
            }
615
          }
616
          return false;
617
        }
618
      }
619
    }
620
250
    case REGEXP_COMPLEMENT:
621
    {
622
250
      return !testConstStringInRegExp(s, index_start, r[0]);
623
      break;
624
    }
625
    default:
626
    {
627
      Assert(!utils::isRegExpKind(k));
628
      return false;
629
    }
630
  }
631
}
632
633
188
bool RegExpEntail::hasEpsilonNode(TNode node)
634
{
635
581
  for (const Node& nc : node)
636
  {
637
421
    if (nc.getKind() == STRING_TO_REGEXP && Word::isEmpty(nc[0]))
638
    {
639
28
      return true;
640
    }
641
  }
642
160
  return false;
643
}
644
645
3005
Node RegExpEntail::getFixedLengthForRegexp(Node n)
646
{
647
3005
  NodeManager* nm = NodeManager::currentNM();
648
3005
  if (n.getKind() == STRING_TO_REGEXP)
649
  {
650
978
    Node ret = nm->mkNode(STRING_LENGTH, n[0]);
651
967
    ret = Rewriter::rewrite(ret);
652
967
    if (ret.isConst())
653
    {
654
956
      return ret;
655
    }
656
  }
657
2038
  else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
658
  {
659
644
    return nm->mkConst(Rational(1));
660
  }
661
1394
  else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
662
  {
663
984
    Node ret;
664
1127
    for (const Node& nc : n)
665
    {
666
1579
      Node flc = getFixedLengthForRegexp(nc);
667
944
      if (flc.isNull() || (!ret.isNull() && ret != flc))
668
      {
669
309
        return Node::null();
670
      }
671
635
      else if (ret.isNull())
672
      {
673
        // first time
674
430
        ret = flc;
675
      }
676
    }
677
183
    return ret;
678
  }
679
902
  else if (n.getKind() == REGEXP_CONCAT)
680
  {
681
854
    NodeBuilder nb(PLUS);
682
815
    for (const Node& nc : n)
683
    {
684
1202
      Node flc = getFixedLengthForRegexp(nc);
685
814
      if (flc.isNull())
686
      {
687
426
        return flc;
688
      }
689
388
      nb << flc;
690
    }
691
2
    Node ret = nb.constructNode();
692
1
    ret = Rewriter::rewrite(ret);
693
1
    return ret;
694
  }
695
486
  return Node::null();
696
}
697
698
495
bool RegExpEntail::regExpIncludes(Node r1, Node r2)
699
{
700
495
  Assert(Rewriter::rewrite(r1) == r1);
701
495
  Assert(Rewriter::rewrite(r2) == r2);
702
495
  if (r1 == r2)
703
  {
704
10
    return true;
705
  }
706
707
  // This method only works on a fragment of regular expressions
708
485
  if (!utils::isSimpleRegExp(r1) || !utils::isSimpleRegExp(r2))
709
  {
710
353
    return false;
711
  }
712
132
  NodeManager* nm = NodeManager::currentNM();
713
264
  Node sigma = nm->mkNode(REGEXP_SIGMA, std::vector<Node>{});
714
264
  Node sigmaStar = nm->mkNode(REGEXP_STAR, sigma);
715
716
264
  std::vector<Node> v1, v2;
717
132
  utils::getRegexpComponents(r1, v1);
718
132
  utils::getRegexpComponents(r2, v2);
719
720
  // In the following, we iterate over `r2` (the "includee") and try to
721
  // match it with `r1`. `idxs`/`newIdxs` keep track of all the possible
722
  // positions in `r1` that we could currently be at.
723
264
  std::unordered_set<size_t> newIdxs = {0};
724
264
  std::unordered_set<size_t> idxs;
725
776
  for (const Node& n2 : v2)
726
  {
727
    // Transfer elements from `newIdxs` to `idxs`. Out-of-bound indices are
728
    // removed and for (re.* re.allchar), we additionally include the option of
729
    // skipping it. Indices must be smaller than the size of `v1`.
730
686
    idxs.clear();
731
2283
    for (size_t idx : newIdxs)
732
    {
733
1597
      if (idx < v1.size())
734
      {
735
1568
        idxs.insert(idx);
736
1568
        if (idx + 1 < v1.size() && v1[idx] == sigmaStar)
737
        {
738
868
          Assert(idx + 1 == v1.size() || v1[idx + 1] != sigmaStar);
739
868
          idxs.insert(idx + 1);
740
        }
741
      }
742
    }
743
686
    newIdxs.clear();
744
745
686
    if (n2.getKind() == STRING_TO_REGEXP || n2 == sigma)
746
    {
747
497
      Assert(n2 == sigma
748
             || (n2[0].isConst() && n2[0].getConst<String>().size() == 1));
749
2346
      for (size_t idx : idxs)
750
      {
751
1849
        if (v1[idx] == sigma || v1[idx] == n2)
752
        {
753
          // Given a character or an re.allchar in `r2`, we can either
754
          // match it with a corresponding character in `r1` or an
755
          // re.allchar
756
529
          newIdxs.insert(idx + 1);
757
        }
758
      }
759
    }
760
761
3122
    for (size_t idx : idxs)
762
    {
763
2436
      if (v1[idx] == sigmaStar)
764
      {
765
        // (re.* re.allchar) can match an arbitrary amount of `r2`
766
967
        newIdxs.insert(idx);
767
      }
768
1469
      else if (utils::isUnboundedWildcard(v1, idx))
769
      {
770
        // If a series of re.allchar is followed by (re.* re.allchar), we
771
        // can decide not to "waste" the re.allchar because the order of
772
        // the two wildcards is not observable (i.e. it does not change
773
        // the sequences matched by the regular expression)
774
438
        newIdxs.insert(idx);
775
      }
776
    }
777
778
686
    if (newIdxs.empty())
779
    {
780
      // If there are no candidates, we can't match the remainder of r2
781
42
      return false;
782
    }
783
  }
784
785
  // We have processed all of `r2`. We are now looking if there was also a
786
  // path to the end in `r1`. This makes sure that we don't have leftover
787
  // bits in `r1` that don't match anything in `r2`.
788
90
  bool result = false;
789
250
  for (size_t idx : newIdxs)
790
  {
791
202
    if (idx == v1.size() || (idx == v1.size() - 1 && v1[idx] == sigmaStar))
792
    {
793
42
      result = true;
794
42
      break;
795
    }
796
  }
797
798
90
  return result;
799
}
800
801
}  // namespace strings
802
}  // namespace theory
803
22746
}  // namespace cvc5