GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_entail.cpp Lines: 346 395 87.6 %
Date: 2021-05-24 Branches: 718 1493 48.1 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving regular expressions.
14
 */
15
16
#include "theory/strings/regexp_entail.h"
17
18
#include "theory/rewriter.h"
19
#include "theory/strings/theory_strings_utils.h"
20
#include "theory/strings/word.h"
21
22
using namespace std;
23
using namespace cvc5::kind;
24
25
namespace cvc5 {
26
namespace theory {
27
namespace strings {
28
29
8185
Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
30
                                       std::vector<Node>& children,
31
                                       int dir)
32
{
33
16370
  Trace("regexp-ext-rewrite-debug")
34
8185
      << "Simple reg exp consume, dir=" << dir << ":" << std::endl;
35
16370
  Trace("regexp-ext-rewrite-debug")
36
8185
      << "  mchildren : " << mchildren << std::endl;
37
8185
  Trace("regexp-ext-rewrite-debug") << "  children : " << children << std::endl;
38
8185
  NodeManager* nm = NodeManager::currentNM();
39
8185
  unsigned tmin = dir < 0 ? 0 : dir;
40
8185
  unsigned tmax = dir < 0 ? 1 : dir;
41
  // try to remove off front and back
42
23390
  for (unsigned t = 0; t < 2; t++)
43
  {
44
15990
    if (tmin <= t && t <= tmax)
45
    {
46
10784
      bool do_next = true;
47
32308
      while (!children.empty() && !mchildren.empty() && do_next)
48
      {
49
11547
        do_next = false;
50
22309
        Node xc = mchildren[mchildren.size() - 1];
51
22309
        Node rc = children[children.size() - 1];
52
23094
        Trace("regexp-ext-rewrite-debug")
53
11547
            << "* " << xc << " in " << rc << std::endl;
54
11547
        Assert(rc.getKind() != REGEXP_CONCAT);
55
11547
        Assert(xc.getKind() != STRING_CONCAT);
56
11547
        if (rc.getKind() == STRING_TO_REGEXP)
57
        {
58
4130
          if (xc == rc[0])
59
          {
60
352
            children.pop_back();
61
352
            mchildren.pop_back();
62
352
            do_next = true;
63
352
            Trace("regexp-ext-rewrite-debug") << "- strip equal" << std::endl;
64
          }
65
3778
          else if (rc[0].isConst() && Word::isEmpty(rc[0]))
66
          {
67
116
            Trace("regexp-ext-rewrite-debug")
68
58
                << "- ignore empty RE" << std::endl;
69
            // ignore and continue
70
58
            children.pop_back();
71
58
            do_next = true;
72
          }
73
3720
          else if (xc.isConst() && rc[0].isConst())
74
          {
75
            // split the constant
76
            size_t index;
77
804
            Node s = Word::splitConstant(xc, rc[0], index, t == 0);
78
1348
            Trace("regexp-ext-rewrite-debug")
79
1348
                << "- CRE: Regexp const split : " << xc << " " << rc[0]
80
674
                << " -> " << s << " " << index << " " << t << std::endl;
81
674
            if (s.isNull())
82
            {
83
1088
              Trace("regexp-ext-rewrite-debug")
84
544
                  << "...return false" << std::endl;
85
544
              return nm->mkConst(false);
86
            }
87
            else
88
            {
89
260
              Trace("regexp-ext-rewrite-debug")
90
130
                  << "- strip equal const" << std::endl;
91
130
              children.pop_back();
92
130
              mchildren.pop_back();
93
130
              if (index == 0)
94
              {
95
116
                mchildren.push_back(s);
96
              }
97
              else
98
              {
99
14
                children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
100
              }
101
            }
102
130
            Trace("regexp-ext-rewrite-debug") << "- split const" << std::endl;
103
130
            do_next = true;
104
          }
105
        }
106
7417
        else if (xc.isConst())
107
        {
108
          // check for constants
109
3683
          cvc5::String s = xc.getConst<String>();
110
1962
          if (Word::isEmpty(xc))
111
          {
112
46
            Trace("regexp-ext-rewrite-debug") << "- ignore empty" << std::endl;
113
            // ignore and continue
114
46
            mchildren.pop_back();
115
46
            do_next = true;
116
          }
117
1916
          else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA)
118
          {
119
825
            if (!isConstRegExp(rc))
120
            {
121
              // if a non-standard re.range term, abort
122
225
              return Node::null();
123
            }
124
1425
            std::vector<unsigned> ssVec;
125
825
            ssVec.push_back(t == 0 ? s.back() : s.front());
126
1425
            cvc5::String ss(ssVec);
127
825
            if (testConstStringInRegExp(ss, 0, rc))
128
            {
129
              // strip off one character
130
600
              mchildren.pop_back();
131
600
              if (s.size() > 1)
132
              {
133
409
                if (t == 0)
134
                {
135
260
                  mchildren.push_back(nm->mkConst(s.substr(0, s.size() - 1)));
136
                }
137
                else
138
                {
139
149
                  mchildren.push_back(nm->mkConst(s.substr(1)));
140
                }
141
              }
142
600
              children.pop_back();
143
600
              do_next = true;
144
            }
145
            else
146
            {
147
450
              Trace("regexp-ext-rewrite-debug")
148
225
                  << "...return false" << std::endl;
149
225
              return nm->mkConst(false);
150
            }
151
          }
152
1091
          else if (rc.getKind() == REGEXP_INTER || rc.getKind() == REGEXP_UNION)
153
          {
154
            // see if any/each child does not work
155
402
            bool result_valid = true;
156
788
            Node result;
157
788
            Node emp_s = nm->mkConst(String(""));
158
1238
            for (unsigned i = 0; i < rc.getNumChildren(); i++)
159
            {
160
1672
              std::vector<Node> mchildren_s;
161
1672
              std::vector<Node> children_s;
162
836
              mchildren_s.push_back(xc);
163
836
              utils::getConcat(rc[i], children_s);
164
836
              Trace("regexp-ext-rewrite-debug") << push;
165
1672
              Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
166
836
              Trace("regexp-ext-rewrite-debug") << pop;
167
836
              if (!ret.isNull())
168
              {
169
                // one conjunct cannot be satisfied, return false
170
386
                if (rc.getKind() == REGEXP_INTER)
171
                {
172
                  Trace("regexp-ext-rewrite-debug")
173
                      << "...return " << ret << std::endl;
174
                  return ret;
175
                }
176
              }
177
              else
178
              {
179
450
                if (children_s.empty())
180
                {
181
                  // if we were able to fully consume, store the result
182
316
                  Assert(mchildren_s.size() <= 1);
183
316
                  if (mchildren_s.empty())
184
                  {
185
84
                    mchildren_s.push_back(emp_s);
186
                  }
187
316
                  if (result.isNull())
188
                  {
189
304
                    result = mchildren_s[0];
190
                  }
191
12
                  else if (result != mchildren_s[0])
192
                  {
193
12
                    result_valid = false;
194
                  }
195
                }
196
                else
197
                {
198
134
                  result_valid = false;
199
                }
200
              }
201
            }
202
402
            if (result_valid)
203
            {
204
274
              if (result.isNull())
205
              {
206
                // all disjuncts cannot be satisfied, return false
207
16
                Assert(rc.getKind() == REGEXP_UNION);
208
32
                Trace("regexp-ext-rewrite-debug")
209
16
                    << "...return false" << std::endl;
210
16
                return nm->mkConst(false);
211
              }
212
              else
213
              {
214
516
                Trace("regexp-ext-rewrite-debug")
215
258
                    << "- same result, try again, children now " << children
216
258
                    << std::endl;
217
                // all branches led to the same result
218
258
                children.pop_back();
219
258
                mchildren.pop_back();
220
258
                if (result != emp_s)
221
                {
222
194
                  mchildren.push_back(result);
223
                }
224
258
                do_next = true;
225
              }
226
            }
227
          }
228
689
          else if (rc.getKind() == REGEXP_STAR)
229
          {
230
            // check if there is no way that this star can be unrolled even once
231
1238
            std::vector<Node> mchildren_s;
232
619
            mchildren_s.insert(
233
1238
                mchildren_s.end(), mchildren.begin(), mchildren.end());
234
619
            if (t == 1)
235
            {
236
267
              std::reverse(mchildren_s.begin(), mchildren_s.end());
237
            }
238
1238
            std::vector<Node> children_s;
239
619
            utils::getConcat(rc[0], children_s);
240
1238
            Trace("regexp-ext-rewrite-debug")
241
619
                << "- recursive call on body of star" << std::endl;
242
619
            Trace("regexp-ext-rewrite-debug") << push;
243
1238
            Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
244
619
            Trace("regexp-ext-rewrite-debug") << pop;
245
619
            if (!ret.isNull())
246
            {
247
76
              Trace("regexp-ext-rewrite-debug")
248
38
                  << "- CRE : regexp star infeasable " << xc << " " << rc
249
38
                  << std::endl;
250
38
              children.pop_back();
251
38
              if (!children.empty())
252
              {
253
24
                Trace("regexp-ext-rewrite-debug") << "- continue" << std::endl;
254
24
                do_next = true;
255
              }
256
            }
257
            else
258
            {
259
581
              if (children_s.empty())
260
              {
261
                // Check if beyond this, we hit a conflict. In this case, we
262
                // must repeat.  Notice that we do not treat the case where
263
                // there are no more strings to consume as a failure, since
264
                // we may be within a recursive call, see issue #5510.
265
518
                bool can_skip = true;
266
518
                if (children.size() > 1)
267
                {
268
954
                  std::vector<Node> mchildren_ss;
269
477
                  mchildren_ss.insert(
270
954
                      mchildren_ss.end(), mchildren.begin(), mchildren.end());
271
954
                  std::vector<Node> children_ss;
272
477
                  children_ss.insert(
273
954
                      children_ss.end(), children.begin(), children.end() - 1);
274
477
                  if (t == 1)
275
                  {
276
192
                    std::reverse(mchildren_ss.begin(), mchildren_ss.end());
277
192
                    std::reverse(children_ss.begin(), children_ss.end());
278
                  }
279
954
                  Trace("regexp-ext-rewrite-debug")
280
477
                      << "- recursive call required repeat star" << std::endl;
281
477
                  Trace("regexp-ext-rewrite-debug") << push;
282
954
                  Node rets = simpleRegexpConsume(mchildren_ss, children_ss, t);
283
477
                  Trace("regexp-ext-rewrite-debug") << pop;
284
477
                  if (!rets.isNull())
285
                  {
286
335
                    can_skip = false;
287
                  }
288
                }
289
518
                if (!can_skip)
290
                {
291
670
                  TypeNode stype = nm->stringType();
292
670
                  Node prev = utils::mkConcat(mchildren, stype);
293
670
                  Trace("regexp-ext-rewrite-debug")
294
335
                      << "- can't skip" << std::endl;
295
                  // take the result of fully consuming once
296
335
                  if (t == 1)
297
                  {
298
140
                    std::reverse(mchildren_s.begin(), mchildren_s.end());
299
                  }
300
335
                  mchildren.clear();
301
335
                  mchildren.insert(
302
670
                      mchildren.end(), mchildren_s.begin(), mchildren_s.end());
303
670
                  Node curr = utils::mkConcat(mchildren, stype);
304
335
                  do_next = (prev != curr);
305
670
                  Trace("regexp-ext-rewrite-debug")
306
335
                      << "- do_next = " << do_next << std::endl;
307
                }
308
                else
309
                {
310
366
                  Trace("regexp-ext-rewrite-debug")
311
183
                      << "- can skip " << rc << " from " << xc << std::endl;
312
                }
313
              }
314
            }
315
          }
316
        }
317
10762
        if (!do_next)
318
        {
319
17918
          Trace("regexp-ext-rewrite")
320
8959
              << "- cannot consume : " << xc << " " << rc << std::endl;
321
        }
322
      }
323
    }
324
15205
    if (dir != 0)
325
    {
326
10315
      std::reverse(children.begin(), children.end());
327
10315
      std::reverse(mchildren.begin(), mchildren.end());
328
    }
329
  }
330
7400
  Trace("regexp-ext-rewrite-debug") << "...finished, return null" << std::endl;
331
7400
  return Node::null();
332
}
333
334
3225
bool RegExpEntail::isConstRegExp(TNode t)
335
{
336
6450
  std::unordered_set<TNode> visited;
337
6450
  std::vector<TNode> visit;
338
6450
  TNode cur;
339
3225
  visit.push_back(t);
340
3889
  do
341
  {
342
7114
    cur = visit.back();
343
7114
    visit.pop_back();
344
7114
    if (visited.find(cur) == visited.end())
345
    {
346
6360
      visited.insert(cur);
347
6360
      Kind ck = cur.getKind();
348
6360
      if (ck == STRING_TO_REGEXP)
349
      {
350
1667
        if (!cur[0].isConst())
351
        {
352
127
          return false;
353
        }
354
      }
355
4693
      else if (ck == REGEXP_RANGE)
356
      {
357
4224
        for (const Node& cn : cur)
358
        {
359
2816
          if (!cn.isConst() || cn.getConst<String>().size() != 1)
360
          {
361
            return false;
362
          }
363
        }
364
      }
365
3285
      else if (cur.isVar())
366
      {
367
25
        return false;
368
      }
369
      else
370
      {
371
7231
        for (const Node& cn : cur)
372
        {
373
3971
          visit.push_back(cn);
374
        }
375
      }
376
    }
377
6962
  } while (!visit.empty());
378
3073
  return true;
379
}
380
381
848475
bool RegExpEntail::testConstStringInRegExp(cvc5::String& s,
382
                                           unsigned index_start,
383
                                           TNode r)
384
{
385
848475
  Assert(index_start <= s.size());
386
1696950
  Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at "
387
848475
                        << index_start << std::endl;
388
848475
  Assert(!r.isVar());
389
848475
  Kind k = r.getKind();
390
848475
  switch (k)
391
  {
392
9244
    case STRING_TO_REGEXP:
393
    {
394
18488
      cvc5::String s2 = s.substr(index_start, s.size() - index_start);
395
9244
      if (r[0].isConst())
396
      {
397
9244
        return (s2 == r[0].getConst<String>());
398
      }
399
      Assert(false) << "RegExp contains variables";
400
      return false;
401
    }
402
1167
    case REGEXP_CONCAT:
403
    {
404
1167
      if (s.size() != index_start)
405
      {
406
1856
        std::vector<int> vec_k(r.getNumChildren(), -1);
407
928
        int start = 0;
408
928
        int left = (int)s.size() - index_start;
409
928
        int i = 0;
410
10396
        while (i < (int)r.getNumChildren())
411
        {
412
5662
          bool flag = true;
413
5662
          if (i == (int)r.getNumChildren() - 1)
414
          {
415
1753
            if (testConstStringInRegExp(s, index_start + start, r[i]))
416
            {
417
420
              return true;
418
            }
419
          }
420
3909
          else if (i == -1)
421
          {
422
508
            return false;
423
          }
424
          else
425
          {
426
11951
            for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i])
427
            {
428
19519
              cvc5::String t = s.substr(index_start + start, vec_k[i]);
429
10969
              if (testConstStringInRegExp(t, 0, r[i]))
430
              {
431
2419
                start += vec_k[i];
432
2419
                left -= vec_k[i];
433
2419
                flag = false;
434
2419
                ++i;
435
2419
                vec_k[i] = -1;
436
2419
                break;
437
              }
438
            }
439
          }
440
441
4734
          if (flag)
442
          {
443
2315
            --i;
444
2315
            if (i >= 0)
445
            {
446
1807
              start -= vec_k[i];
447
1807
              left += vec_k[i];
448
            }
449
          }
450
        }
451
        return false;
452
      }
453
      else
454
      {
455
258
        for (unsigned i = 0; i < r.getNumChildren(); ++i)
456
        {
457
258
          if (!testConstStringInRegExp(s, index_start, r[i]))
458
          {
459
239
            return false;
460
          }
461
        }
462
        return true;
463
      }
464
    }
465
271824
    case REGEXP_UNION:
466
    {
467
789200
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
468
      {
469
536198
        if (testConstStringInRegExp(s, index_start, r[i]))
470
        {
471
18822
          return true;
472
        }
473
      }
474
253002
      return false;
475
    }
476
207
    case REGEXP_INTER:
477
    {
478
253
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
479
      {
480
232
        if (!testConstStringInRegExp(s, index_start, r[i]))
481
        {
482
186
          return false;
483
        }
484
      }
485
21
      return true;
486
    }
487
23406
    case REGEXP_STAR:
488
    {
489
23406
      if (s.size() != index_start)
490
      {
491
293380
        for (unsigned i = s.size() - index_start; i > 0; --i)
492
        {
493
547540
          cvc5::String t = s.substr(index_start, i);
494
276935
          if (testConstStringInRegExp(t, 0, r[0]))
495
          {
496
41836
            if (index_start + i == s.size()
497
41836
                || testConstStringInRegExp(s, index_start + i, r))
498
            {
499
6330
              return true;
500
            }
501
          }
502
        }
503
16445
        return false;
504
      }
505
      else
506
      {
507
631
        return true;
508
      }
509
    }
510
10
    case REGEXP_EMPTY:
511
    {
512
10
      return false;
513
    }
514
3653
    case REGEXP_SIGMA:
515
    {
516
3653
      if (s.size() == index_start + 1)
517
      {
518
1403
        return true;
519
      }
520
      else
521
      {
522
2250
        return false;
523
      }
524
    }
525
538892
    case REGEXP_RANGE:
526
    {
527
538892
      if (s.size() == index_start + 1)
528
      {
529
35131
        unsigned a = r[0].getConst<String>().front();
530
35131
        unsigned b = r[1].getConst<String>().front();
531
35131
        unsigned c = s.back();
532
35131
        return (a <= c && c <= b);
533
      }
534
      else
535
      {
536
503761
        return false;
537
      }
538
    }
539
    case REGEXP_LOOP:
540
    {
541
      NodeManager* nm = NodeManager::currentNM();
542
      uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
543
      if (s.size() == index_start)
544
      {
545
        return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]);
546
      }
547
      else if (l == 0 && r[1] == r[2])
548
      {
549
        return false;
550
      }
551
      else
552
      {
553
        Assert(r.getNumChildren() == 3)
554
            << "String rewriter error: LOOP has 2 children";
555
        if (l == 0)
556
        {
557
          // R{0,u}
558
          uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
559
          for (unsigned len = s.size() - index_start; len >= 1; len--)
560
          {
561
            cvc5::String t = s.substr(index_start, len);
562
            if (testConstStringInRegExp(t, 0, r[0]))
563
            {
564
              if (len + index_start == s.size())
565
              {
566
                return true;
567
              }
568
              else
569
              {
570
                Node num2 = nm->mkConst(cvc5::Rational(u - 1));
571
                Node r2 = nm->mkNode(REGEXP_LOOP, r[0], r[1], num2);
572
                if (testConstStringInRegExp(s, index_start + len, r2))
573
                {
574
                  return true;
575
                }
576
              }
577
            }
578
          }
579
          return false;
580
        }
581
        else
582
        {
583
          // R{l,l}
584
          Assert(r[1] == r[2])
585
              << "String rewriter error: LOOP nums are not equal";
586
          if (l > s.size() - index_start)
587
          {
588
            if (testConstStringInRegExp(s, s.size(), r[0]))
589
            {
590
              l = s.size() - index_start;
591
            }
592
            else
593
            {
594
              return false;
595
            }
596
          }
597
          for (unsigned len = 1; len <= s.size() - index_start; len++)
598
          {
599
            cvc5::String t = s.substr(index_start, len);
600
            if (testConstStringInRegExp(t, 0, r[0]))
601
            {
602
              Node num2 = nm->mkConst(cvc5::Rational(l - 1));
603
              Node r2 = nm->mkNode(REGEXP_LOOP, r[0], num2, num2);
604
              if (testConstStringInRegExp(s, index_start + len, r2))
605
              {
606
                return true;
607
              }
608
            }
609
          }
610
          return false;
611
        }
612
      }
613
    }
614
72
    case REGEXP_COMPLEMENT:
615
    {
616
72
      return !testConstStringInRegExp(s, index_start, r[0]);
617
      break;
618
    }
619
    default:
620
    {
621
      Assert(!utils::isRegExpKind(k));
622
      return false;
623
    }
624
  }
625
}
626
627
168
bool RegExpEntail::hasEpsilonNode(TNode node)
628
{
629
520
  for (const Node& nc : node)
630
  {
631
374
    if (nc.getKind() == STRING_TO_REGEXP && Word::isEmpty(nc[0]))
632
    {
633
22
      return true;
634
    }
635
  }
636
146
  return false;
637
}
638
639
1049
Node RegExpEntail::getFixedLengthForRegexp(Node n)
640
{
641
1049
  NodeManager* nm = NodeManager::currentNM();
642
1049
  if (n.getKind() == STRING_TO_REGEXP)
643
  {
644
297
    Node ret = nm->mkNode(STRING_LENGTH, n[0]);
645
297
    ret = Rewriter::rewrite(ret);
646
297
    if (ret.isConst())
647
    {
648
297
      return ret;
649
    }
650
  }
651
752
  else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
652
  {
653
282
    return nm->mkConst(Rational(1));
654
  }
655
470
  else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
656
  {
657
328
    Node ret;
658
373
    for (const Node& nc : n)
659
    {
660
495
      Node flc = getFixedLengthForRegexp(nc);
661
286
      if (flc.isNull() || (!ret.isNull() && ret != flc))
662
      {
663
77
        return Node::null();
664
      }
665
209
      else if (ret.isNull())
666
      {
667
        // first time
668
122
        ret = flc;
669
      }
670
    }
671
87
    return ret;
672
  }
673
306
  else if (n.getKind() == REGEXP_CONCAT)
674
  {
675
244
    NodeBuilder nb(PLUS);
676
206
    for (const Node& nc : n)
677
    {
678
290
      Node flc = getFixedLengthForRegexp(nc);
679
206
      if (flc.isNull())
680
      {
681
122
        return flc;
682
      }
683
84
      nb << flc;
684
    }
685
    Node ret = nb.constructNode();
686
    ret = Rewriter::rewrite(ret);
687
    return ret;
688
  }
689
184
  return Node::null();
690
}
691
692
461
bool RegExpEntail::regExpIncludes(Node r1, Node r2)
693
{
694
461
  Assert(Rewriter::rewrite(r1) == r1);
695
461
  Assert(Rewriter::rewrite(r2) == r2);
696
461
  if (r1 == r2)
697
  {
698
11
    return true;
699
  }
700
701
  // This method only works on a fragment of regular expressions
702
450
  if (!utils::isSimpleRegExp(r1) || !utils::isSimpleRegExp(r2))
703
  {
704
312
    return false;
705
  }
706
138
  NodeManager* nm = NodeManager::currentNM();
707
276
  Node sigma = nm->mkNode(REGEXP_SIGMA, std::vector<Node>{});
708
276
  Node sigmaStar = nm->mkNode(REGEXP_STAR, sigma);
709
710
276
  std::vector<Node> v1, v2;
711
138
  utils::getRegexpComponents(r1, v1);
712
138
  utils::getRegexpComponents(r2, v2);
713
714
  // In the following, we iterate over `r2` (the "includee") and try to
715
  // match it with `r1`. `idxs`/`newIdxs` keep track of all the possible
716
  // positions in `r1` that we could currently be at.
717
276
  std::unordered_set<size_t> newIdxs = {0};
718
276
  std::unordered_set<size_t> idxs;
719
906
  for (const Node& n2 : v2)
720
  {
721
    // Transfer elements from `newIdxs` to `idxs`. Out-of-bound indices are
722
    // removed and for (re.* re.allchar), we additionally include the option of
723
    // skipping it. Indices must be smaller than the size of `v1`.
724
804
    idxs.clear();
725
2644
    for (size_t idx : newIdxs)
726
    {
727
1840
      if (idx < v1.size())
728
      {
729
1820
        idxs.insert(idx);
730
1820
        if (idx + 1 < v1.size() && v1[idx] == sigmaStar)
731
        {
732
1038
          Assert(idx + 1 == v1.size() || v1[idx + 1] != sigmaStar);
733
1038
          idxs.insert(idx + 1);
734
        }
735
      }
736
    }
737
804
    newIdxs.clear();
738
739
804
    if (n2.getKind() == STRING_TO_REGEXP || n2 == sigma)
740
    {
741
596
      Assert(n2 == sigma
742
             || (n2[0].isConst() && n2[0].getConst<String>().size() == 1));
743
2736
      for (size_t idx : idxs)
744
      {
745
2140
        if (v1[idx] == sigma || v1[idx] == n2)
746
        {
747
          // Given a character or an re.allchar in `r2`, we can either
748
          // match it with a corresponding character in `r1` or an
749
          // re.allchar
750
646
          newIdxs.insert(idx + 1);
751
        }
752
      }
753
    }
754
755
3662
    for (size_t idx : idxs)
756
    {
757
2858
      if (v1[idx] == sigmaStar)
758
      {
759
        // (re.* re.allchar) can match an arbitrary amount of `r2`
760
1144
        newIdxs.insert(idx);
761
      }
762
1714
      else if (utils::isUnboundedWildcard(v1, idx))
763
      {
764
        // If a series of re.allchar is followed by (re.* re.allchar), we
765
        // can decide not to "waste" the re.allchar because the order of
766
        // the two wildcards is not observable (i.e. it does not change
767
        // the sequences matched by the regular expression)
768
444
        newIdxs.insert(idx);
769
      }
770
    }
771
772
804
    if (newIdxs.empty())
773
    {
774
      // If there are no candidates, we can't match the remainder of r2
775
36
      return false;
776
    }
777
  }
778
779
  // We have processed all of `r2`. We are now looking if there was also a
780
  // path to the end in `r1`. This makes sure that we don't have leftover
781
  // bits in `r1` that don't match anything in `r2`.
782
102
  bool result = false;
783
288
  for (size_t idx : newIdxs)
784
  {
785
240
    if (idx == v1.size() || (idx == v1.size() - 1 && v1[idx] == sigmaStar))
786
    {
787
54
      result = true;
788
54
      break;
789
    }
790
  }
791
792
102
  return result;
793
}
794
795
}  // namespace strings
796
}  // namespace theory
797
28191
}  // namespace cvc5