GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_entail.cpp Lines: 349 395 88.4 %
Date: 2021-05-22 Branches: 728 1493 48.8 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving regular expressions.
14
 */
15
16
#include "theory/strings/regexp_entail.h"
17
18
#include "theory/rewriter.h"
19
#include "theory/strings/theory_strings_utils.h"
20
#include "theory/strings/word.h"
21
22
using namespace std;
23
using namespace cvc5::kind;
24
25
namespace cvc5 {
26
namespace theory {
27
namespace strings {
28
29
8883
Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
30
                                       std::vector<Node>& children,
31
                                       int dir)
32
{
33
17766
  Trace("regexp-ext-rewrite-debug")
34
8883
      << "Simple reg exp consume, dir=" << dir << ":" << std::endl;
35
17766
  Trace("regexp-ext-rewrite-debug")
36
8883
      << "  mchildren : " << mchildren << std::endl;
37
8883
  Trace("regexp-ext-rewrite-debug") << "  children : " << children << std::endl;
38
8883
  NodeManager* nm = NodeManager::currentNM();
39
8883
  unsigned tmin = dir < 0 ? 0 : dir;
40
8883
  unsigned tmax = dir < 0 ? 1 : dir;
41
  // try to remove off front and back
42
25295
  for (unsigned t = 0; t < 2; t++)
43
  {
44
17358
    if (tmin <= t && t <= tmax)
45
    {
46
11728
      bool do_next = true;
47
35072
      while (!children.empty() && !mchildren.empty() && do_next)
48
      {
49
12618
        do_next = false;
50
24290
        Node xc = mchildren[mchildren.size() - 1];
51
24290
        Node rc = children[children.size() - 1];
52
25236
        Trace("regexp-ext-rewrite-debug")
53
12618
            << "* " << xc << " in " << rc << std::endl;
54
12618
        Assert(rc.getKind() != REGEXP_CONCAT);
55
12618
        Assert(xc.getKind() != STRING_CONCAT);
56
12618
        if (rc.getKind() == STRING_TO_REGEXP)
57
        {
58
4417
          if (xc == rc[0])
59
          {
60
392
            children.pop_back();
61
392
            mchildren.pop_back();
62
392
            do_next = true;
63
392
            Trace("regexp-ext-rewrite-debug") << "- strip equal" << std::endl;
64
          }
65
4025
          else if (rc[0].isConst() && Word::isEmpty(rc[0]))
66
          {
67
116
            Trace("regexp-ext-rewrite-debug")
68
58
                << "- ignore empty RE" << std::endl;
69
            // ignore and continue
70
58
            children.pop_back();
71
58
            do_next = true;
72
          }
73
3967
          else if (xc.isConst() && rc[0].isConst())
74
          {
75
            // split the constant
76
            size_t index;
77
1012
            Node s = Word::splitConstant(xc, rc[0], index, t == 0);
78
1642
            Trace("regexp-ext-rewrite-debug")
79
1642
                << "- CRE: Regexp const split : " << xc << " " << rc[0]
80
821
                << " -> " << s << " " << index << " " << t << std::endl;
81
821
            if (s.isNull())
82
            {
83
1260
              Trace("regexp-ext-rewrite-debug")
84
630
                  << "...return false" << std::endl;
85
630
              return nm->mkConst(false);
86
            }
87
            else
88
            {
89
382
              Trace("regexp-ext-rewrite-debug")
90
191
                  << "- strip equal const" << std::endl;
91
191
              children.pop_back();
92
191
              mchildren.pop_back();
93
191
              if (index == 0)
94
              {
95
177
                mchildren.push_back(s);
96
              }
97
              else
98
              {
99
14
                children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
100
              }
101
            }
102
191
            Trace("regexp-ext-rewrite-debug") << "- split const" << std::endl;
103
191
            do_next = true;
104
          }
105
        }
106
8201
        else if (xc.isConst())
107
        {
108
          // check for constants
109
4514
          cvc5::String s = xc.getConst<String>();
110
2415
          if (Word::isEmpty(xc))
111
          {
112
46
            Trace("regexp-ext-rewrite-debug") << "- ignore empty" << std::endl;
113
            // ignore and continue
114
46
            mchildren.pop_back();
115
46
            do_next = true;
116
          }
117
2369
          else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA)
118
          {
119
973
            if (!isConstRegExp(rc))
120
            {
121
              // if a non-standard re.range term, abort
122
225
              return Node::null();
123
            }
124
1721
            std::vector<unsigned> ssVec;
125
973
            ssVec.push_back(t == 0 ? s.back() : s.front());
126
1721
            cvc5::String ss(ssVec);
127
973
            if (testConstStringInRegExp(ss, 0, rc))
128
            {
129
              // strip off one character
130
748
              mchildren.pop_back();
131
748
              if (s.size() > 1)
132
              {
133
520
                if (t == 0)
134
                {
135
285
                  mchildren.push_back(nm->mkConst(s.substr(0, s.size() - 1)));
136
                }
137
                else
138
                {
139
235
                  mchildren.push_back(nm->mkConst(s.substr(1)));
140
                }
141
              }
142
748
              children.pop_back();
143
748
              do_next = true;
144
            }
145
            else
146
            {
147
450
              Trace("regexp-ext-rewrite-debug")
148
225
                  << "...return false" << std::endl;
149
225
              return nm->mkConst(false);
150
            }
151
          }
152
1396
          else if (rc.getKind() == REGEXP_INTER || rc.getKind() == REGEXP_UNION)
153
          {
154
            // see if any/each child does not work
155
518
            bool result_valid = true;
156
945
            Node result;
157
945
            Node emp_s = nm->mkConst(String(""));
158
1436
            for (unsigned i = 0; i < rc.getNumChildren(); i++)
159
            {
160
1911
              std::vector<Node> mchildren_s;
161
1911
              std::vector<Node> children_s;
162
993
              mchildren_s.push_back(xc);
163
993
              utils::getConcat(rc[i], children_s);
164
993
              Trace("regexp-ext-rewrite-debug") << push;
165
1911
              Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
166
993
              Trace("regexp-ext-rewrite-debug") << pop;
167
993
              if (!ret.isNull())
168
              {
169
                // one conjunct cannot be satisfied, return false
170
461
                if (rc.getKind() == REGEXP_INTER)
171
                {
172
150
                  Trace("regexp-ext-rewrite-debug")
173
75
                      << "...return " << ret << std::endl;
174
75
                  return ret;
175
                }
176
              }
177
              else
178
              {
179
532
                if (children_s.empty())
180
                {
181
                  // if we were able to fully consume, store the result
182
316
                  Assert(mchildren_s.size() <= 1);
183
316
                  if (mchildren_s.empty())
184
                  {
185
84
                    mchildren_s.push_back(emp_s);
186
                  }
187
316
                  if (result.isNull())
188
                  {
189
304
                    result = mchildren_s[0];
190
                  }
191
12
                  else if (result != mchildren_s[0])
192
                  {
193
12
                    result_valid = false;
194
                  }
195
                }
196
                else
197
                {
198
216
                  result_valid = false;
199
                }
200
              }
201
            }
202
443
            if (result_valid)
203
            {
204
274
              if (result.isNull())
205
              {
206
                // all disjuncts cannot be satisfied, return false
207
16
                Assert(rc.getKind() == REGEXP_UNION);
208
32
                Trace("regexp-ext-rewrite-debug")
209
16
                    << "...return false" << std::endl;
210
16
                return nm->mkConst(false);
211
              }
212
              else
213
              {
214
516
                Trace("regexp-ext-rewrite-debug")
215
258
                    << "- same result, try again, children now " << children
216
258
                    << std::endl;
217
                // all branches led to the same result
218
258
                children.pop_back();
219
258
                mchildren.pop_back();
220
258
                if (result != emp_s)
221
                {
222
194
                  mchildren.push_back(result);
223
                }
224
258
                do_next = true;
225
              }
226
            }
227
          }
228
878
          else if (rc.getKind() == REGEXP_STAR)
229
          {
230
            // check if there is no way that this star can be unrolled even once
231
1534
            std::vector<Node> mchildren_s;
232
767
            mchildren_s.insert(
233
1534
                mchildren_s.end(), mchildren.begin(), mchildren.end());
234
767
            if (t == 1)
235
            {
236
382
              std::reverse(mchildren_s.begin(), mchildren_s.end());
237
            }
238
1534
            std::vector<Node> children_s;
239
767
            utils::getConcat(rc[0], children_s);
240
1534
            Trace("regexp-ext-rewrite-debug")
241
767
                << "- recursive call on body of star" << std::endl;
242
767
            Trace("regexp-ext-rewrite-debug") << push;
243
1534
            Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
244
767
            Trace("regexp-ext-rewrite-debug") << pop;
245
767
            if (!ret.isNull())
246
            {
247
76
              Trace("regexp-ext-rewrite-debug")
248
38
                  << "- CRE : regexp star infeasable " << xc << " " << rc
249
38
                  << std::endl;
250
38
              children.pop_back();
251
38
              if (!children.empty())
252
              {
253
24
                Trace("regexp-ext-rewrite-debug") << "- continue" << std::endl;
254
24
                do_next = true;
255
              }
256
            }
257
            else
258
            {
259
729
              if (children_s.empty())
260
              {
261
                // Check if beyond this, we hit a conflict. In this case, we
262
                // must repeat.  Notice that we do not treat the case where
263
                // there are no more strings to consume as a failure, since
264
                // we may be within a recursive call, see issue #5510.
265
666
                bool can_skip = true;
266
666
                if (children.size() > 1)
267
                {
268
1248
                  std::vector<Node> mchildren_ss;
269
624
                  mchildren_ss.insert(
270
1248
                      mchildren_ss.end(), mchildren.begin(), mchildren.end());
271
1248
                  std::vector<Node> children_ss;
272
624
                  children_ss.insert(
273
1248
                      children_ss.end(), children.begin(), children.end() - 1);
274
624
                  if (t == 1)
275
                  {
276
306
                    std::reverse(mchildren_ss.begin(), mchildren_ss.end());
277
306
                    std::reverse(children_ss.begin(), children_ss.end());
278
                  }
279
1248
                  Trace("regexp-ext-rewrite-debug")
280
624
                      << "- recursive call required repeat star" << std::endl;
281
624
                  Trace("regexp-ext-rewrite-debug") << push;
282
1248
                  Node rets = simpleRegexpConsume(mchildren_ss, children_ss, t);
283
624
                  Trace("regexp-ext-rewrite-debug") << pop;
284
624
                  if (!rets.isNull())
285
                  {
286
412
                    can_skip = false;
287
                  }
288
                }
289
666
                if (!can_skip)
290
                {
291
824
                  TypeNode stype = nm->stringType();
292
824
                  Node prev = utils::mkConcat(mchildren, stype);
293
824
                  Trace("regexp-ext-rewrite-debug")
294
412
                      << "- can't skip" << std::endl;
295
                  // take the result of fully consuming once
296
412
                  if (t == 1)
297
                  {
298
198
                    std::reverse(mchildren_s.begin(), mchildren_s.end());
299
                  }
300
412
                  mchildren.clear();
301
412
                  mchildren.insert(
302
824
                      mchildren.end(), mchildren_s.begin(), mchildren_s.end());
303
824
                  Node curr = utils::mkConcat(mchildren, stype);
304
412
                  do_next = (prev != curr);
305
824
                  Trace("regexp-ext-rewrite-debug")
306
412
                      << "- do_next = " << do_next << std::endl;
307
                }
308
                else
309
                {
310
508
                  Trace("regexp-ext-rewrite-debug")
311
254
                      << "- can skip " << rc << " from " << xc << std::endl;
312
                }
313
              }
314
            }
315
          }
316
        }
317
11672
        if (!do_next)
318
        {
319
19086
          Trace("regexp-ext-rewrite")
320
9543
              << "- cannot consume : " << xc << " " << rc << std::endl;
321
        }
322
      }
323
    }
324
16412
    if (dir != 0)
325
    {
326
11404
      std::reverse(children.begin(), children.end());
327
11404
      std::reverse(mchildren.begin(), mchildren.end());
328
    }
329
  }
330
7937
  Trace("regexp-ext-rewrite-debug") << "...finished, return null" << std::endl;
331
7937
  return Node::null();
332
}
333
334
3592
bool RegExpEntail::isConstRegExp(TNode t)
335
{
336
7184
  std::unordered_set<TNode> visited;
337
7184
  std::vector<TNode> visit;
338
7184
  TNode cur;
339
3592
  visit.push_back(t);
340
5507
  do
341
  {
342
9099
    cur = visit.back();
343
9099
    visit.pop_back();
344
9099
    if (visited.find(cur) == visited.end())
345
    {
346
8123
      visited.insert(cur);
347
8123
      Kind ck = cur.getKind();
348
8123
      if (ck == STRING_TO_REGEXP)
349
      {
350
2235
        if (!cur[0].isConst())
351
        {
352
127
          return false;
353
        }
354
      }
355
5888
      else if (ck == REGEXP_RANGE)
356
      {
357
4224
        for (const Node& cn : cur)
358
        {
359
2816
          if (!cn.isConst() || cn.getConst<String>().size() != 1)
360
          {
361
            return false;
362
          }
363
        }
364
      }
365
4480
      else if (cur.isVar())
366
      {
367
25
        return false;
368
      }
369
      else
370
      {
371
10044
        for (const Node& cn : cur)
372
        {
373
5589
          visit.push_back(cn);
374
        }
375
      }
376
    }
377
8947
  } while (!visit.empty());
378
3440
  return true;
379
}
380
381
856098
bool RegExpEntail::testConstStringInRegExp(cvc5::String& s,
382
                                           unsigned index_start,
383
                                           TNode r)
384
{
385
856098
  Assert(index_start <= s.size());
386
1712196
  Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at "
387
856098
                        << index_start << std::endl;
388
856098
  Assert(!r.isVar());
389
856098
  Kind k = r.getKind();
390
856098
  switch (k)
391
  {
392
12659
    case STRING_TO_REGEXP:
393
    {
394
25318
      cvc5::String s2 = s.substr(index_start, s.size() - index_start);
395
12659
      if (r[0].isConst())
396
      {
397
12659
        return (s2 == r[0].getConst<String>());
398
      }
399
      Assert(false) << "RegExp contains variables";
400
      return false;
401
    }
402
2371
    case REGEXP_CONCAT:
403
    {
404
2371
      if (s.size() != index_start)
405
      {
406
3586
        std::vector<int> vec_k(r.getNumChildren(), -1);
407
1793
        int start = 0;
408
1793
        int left = (int)s.size() - index_start;
409
1793
        int i = 0;
410
15419
        while (i < (int)r.getNumChildren())
411
        {
412
8606
          bool flag = true;
413
8606
          if (i == (int)r.getNumChildren() - 1)
414
          {
415
2132
            if (testConstStringInRegExp(s, index_start + start, r[i]))
416
            {
417
529
              return true;
418
            }
419
          }
420
6474
          else if (i == -1)
421
          {
422
1264
            return false;
423
          }
424
          else
425
          {
426
17156
            for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i])
427
            {
428
27077
              cvc5::String t = s.substr(index_start + start, vec_k[i]);
429
15131
              if (testConstStringInRegExp(t, 0, r[i]))
430
              {
431
3185
                start += vec_k[i];
432
3185
                left -= vec_k[i];
433
3185
                flag = false;
434
3185
                ++i;
435
3185
                vec_k[i] = -1;
436
3185
                break;
437
              }
438
            }
439
          }
440
441
6813
          if (flag)
442
          {
443
3628
            --i;
444
3628
            if (i >= 0)
445
            {
446
2364
              start -= vec_k[i];
447
2364
              left += vec_k[i];
448
            }
449
          }
450
        }
451
        return false;
452
      }
453
      else
454
      {
455
616
        for (unsigned i = 0; i < r.getNumChildren(); ++i)
456
        {
457
616
          if (!testConstStringInRegExp(s, index_start, r[i]))
458
          {
459
578
            return false;
460
          }
461
        }
462
        return true;
463
      }
464
    }
465
271824
    case REGEXP_UNION:
466
    {
467
789200
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
468
      {
469
536198
        if (testConstStringInRegExp(s, index_start, r[i]))
470
        {
471
18822
          return true;
472
        }
473
      }
474
253002
      return false;
475
    }
476
1203
    case REGEXP_INTER:
477
    {
478
1339
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
479
      {
480
1273
        if (!testConstStringInRegExp(s, index_start, r[i]))
481
        {
482
1137
          return false;
483
        }
484
      }
485
66
      return true;
486
    }
487
24218
    case REGEXP_STAR:
488
    {
489
24218
      if (s.size() != index_start)
490
      {
491
294383
        for (unsigned i = s.size() - index_start; i > 0; --i)
492
        {
493
549046
          cvc5::String t = s.substr(index_start, i);
494
277938
          if (testConstStringInRegExp(t, 0, r[0]))
495
          {
496
42836
            if (index_start + i == s.size()
497
42836
                || testConstStringInRegExp(s, index_start + i, r))
498
            {
499
6830
              return true;
500
            }
501
          }
502
        }
503
16445
        return false;
504
      }
505
      else
506
      {
507
943
        return true;
508
      }
509
    }
510
10
    case REGEXP_EMPTY:
511
    {
512
10
      return false;
513
    }
514
4804
    case REGEXP_SIGMA:
515
    {
516
4804
      if (s.size() == index_start + 1)
517
      {
518
2051
        return true;
519
      }
520
      else
521
      {
522
2753
        return false;
523
      }
524
    }
525
538892
    case REGEXP_RANGE:
526
    {
527
538892
      if (s.size() == index_start + 1)
528
      {
529
35131
        unsigned a = r[0].getConst<String>().front();
530
35131
        unsigned b = r[1].getConst<String>().front();
531
35131
        unsigned c = s.back();
532
35131
        return (a <= c && c <= b);
533
      }
534
      else
535
      {
536
503761
        return false;
537
      }
538
    }
539
    case REGEXP_LOOP:
540
    {
541
      NodeManager* nm = NodeManager::currentNM();
542
      uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
543
      if (s.size() == index_start)
544
      {
545
        return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]);
546
      }
547
      else if (l == 0 && r[1] == r[2])
548
      {
549
        return false;
550
      }
551
      else
552
      {
553
        Assert(r.getNumChildren() == 3)
554
            << "String rewriter error: LOOP has 2 children";
555
        if (l == 0)
556
        {
557
          // R{0,u}
558
          uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
559
          for (unsigned len = s.size() - index_start; len >= 1; len--)
560
          {
561
            cvc5::String t = s.substr(index_start, len);
562
            if (testConstStringInRegExp(t, 0, r[0]))
563
            {
564
              if (len + index_start == s.size())
565
              {
566
                return true;
567
              }
568
              else
569
              {
570
                Node num2 = nm->mkConst(cvc5::Rational(u - 1));
571
                Node r2 = nm->mkNode(REGEXP_LOOP, r[0], r[1], num2);
572
                if (testConstStringInRegExp(s, index_start + len, r2))
573
                {
574
                  return true;
575
                }
576
              }
577
            }
578
          }
579
          return false;
580
        }
581
        else
582
        {
583
          // R{l,l}
584
          Assert(r[1] == r[2])
585
              << "String rewriter error: LOOP nums are not equal";
586
          if (l > s.size() - index_start)
587
          {
588
            if (testConstStringInRegExp(s, s.size(), r[0]))
589
            {
590
              l = s.size() - index_start;
591
            }
592
            else
593
            {
594
              return false;
595
            }
596
          }
597
          for (unsigned len = 1; len <= s.size() - index_start; len++)
598
          {
599
            cvc5::String t = s.substr(index_start, len);
600
            if (testConstStringInRegExp(t, 0, r[0]))
601
            {
602
              Node num2 = nm->mkConst(cvc5::Rational(l - 1));
603
              Node r2 = nm->mkNode(REGEXP_LOOP, r[0], num2, num2);
604
              if (testConstStringInRegExp(s, index_start + len, r2))
605
              {
606
                return true;
607
              }
608
            }
609
          }
610
          return false;
611
        }
612
      }
613
    }
614
117
    case REGEXP_COMPLEMENT:
615
    {
616
117
      return !testConstStringInRegExp(s, index_start, r[0]);
617
      break;
618
    }
619
    default:
620
    {
621
      Assert(!utils::isRegExpKind(k));
622
      return false;
623
    }
624
  }
625
}
626
627
168
bool RegExpEntail::hasEpsilonNode(TNode node)
628
{
629
520
  for (const Node& nc : node)
630
  {
631
374
    if (nc.getKind() == STRING_TO_REGEXP && Word::isEmpty(nc[0]))
632
    {
633
22
      return true;
634
    }
635
  }
636
146
  return false;
637
}
638
639
1059
Node RegExpEntail::getFixedLengthForRegexp(Node n)
640
{
641
1059
  NodeManager* nm = NodeManager::currentNM();
642
1059
  if (n.getKind() == STRING_TO_REGEXP)
643
  {
644
298
    Node ret = nm->mkNode(STRING_LENGTH, n[0]);
645
298
    ret = Rewriter::rewrite(ret);
646
298
    if (ret.isConst())
647
    {
648
298
      return ret;
649
    }
650
  }
651
761
  else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
652
  {
653
283
    return nm->mkConst(Rational(1));
654
  }
655
478
  else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
656
  {
657
330
    Node ret;
658
374
    for (const Node& nc : n)
659
    {
660
496
      Node flc = getFixedLengthForRegexp(nc);
661
287
      if (flc.isNull() || (!ret.isNull() && ret != flc))
662
      {
663
78
        return Node::null();
664
      }
665
209
      else if (ret.isNull())
666
      {
667
        // first time
668
122
        ret = flc;
669
      }
670
    }
671
87
    return ret;
672
  }
673
313
  else if (n.getKind() == REGEXP_CONCAT)
674
  {
675
246
    NodeBuilder nb(PLUS);
676
208
    for (const Node& nc : n)
677
    {
678
293
      Node flc = getFixedLengthForRegexp(nc);
679
208
      if (flc.isNull())
680
      {
681
123
        return flc;
682
      }
683
85
      nb << flc;
684
    }
685
    Node ret = nb.constructNode();
686
    ret = Rewriter::rewrite(ret);
687
    return ret;
688
  }
689
190
  return Node::null();
690
}
691
692
465
bool RegExpEntail::regExpIncludes(Node r1, Node r2)
693
{
694
465
  Assert(Rewriter::rewrite(r1) == r1);
695
465
  Assert(Rewriter::rewrite(r2) == r2);
696
465
  if (r1 == r2)
697
  {
698
11
    return true;
699
  }
700
701
  // This method only works on a fragment of regular expressions
702
454
  if (!utils::isSimpleRegExp(r1) || !utils::isSimpleRegExp(r2))
703
  {
704
314
    return false;
705
  }
706
140
  NodeManager* nm = NodeManager::currentNM();
707
280
  Node sigma = nm->mkNode(REGEXP_SIGMA, std::vector<Node>{});
708
280
  Node sigmaStar = nm->mkNode(REGEXP_STAR, sigma);
709
710
280
  std::vector<Node> v1, v2;
711
140
  utils::getRegexpComponents(r1, v1);
712
140
  utils::getRegexpComponents(r2, v2);
713
714
  // In the following, we iterate over `r2` (the "includee") and try to
715
  // match it with `r1`. `idxs`/`newIdxs` keep track of all the possible
716
  // positions in `r1` that we could currently be at.
717
280
  std::unordered_set<size_t> newIdxs = {0};
718
280
  std::unordered_set<size_t> idxs;
719
908
  for (const Node& n2 : v2)
720
  {
721
    // Transfer elements from `newIdxs` to `idxs`. Out-of-bound indices are
722
    // removed and for (re.* re.allchar), we additionally include the option of
723
    // skipping it. Indices must be smaller than the size of `v1`.
724
806
    idxs.clear();
725
2648
    for (size_t idx : newIdxs)
726
    {
727
1842
      if (idx < v1.size())
728
      {
729
1820
        idxs.insert(idx);
730
1820
        if (idx + 1 < v1.size() && v1[idx] == sigmaStar)
731
        {
732
1038
          Assert(idx + 1 == v1.size() || v1[idx + 1] != sigmaStar);
733
1038
          idxs.insert(idx + 1);
734
        }
735
      }
736
    }
737
806
    newIdxs.clear();
738
739
806
    if (n2.getKind() == STRING_TO_REGEXP || n2 == sigma)
740
    {
741
598
      Assert(n2 == sigma
742
             || (n2[0].isConst() && n2[0].getConst<String>().size() == 1));
743
2738
      for (size_t idx : idxs)
744
      {
745
2140
        if (v1[idx] == sigma || v1[idx] == n2)
746
        {
747
          // Given a character or an re.allchar in `r2`, we can either
748
          // match it with a corresponding character in `r1` or an
749
          // re.allchar
750
646
          newIdxs.insert(idx + 1);
751
        }
752
      }
753
    }
754
755
3664
    for (size_t idx : idxs)
756
    {
757
2858
      if (v1[idx] == sigmaStar)
758
      {
759
        // (re.* re.allchar) can match an arbitrary amount of `r2`
760
1144
        newIdxs.insert(idx);
761
      }
762
1714
      else if (utils::isUnboundedWildcard(v1, idx))
763
      {
764
        // If a series of re.allchar is followed by (re.* re.allchar), we
765
        // can decide not to "waste" the re.allchar because the order of
766
        // the two wildcards is not observable (i.e. it does not change
767
        // the sequences matched by the regular expression)
768
444
        newIdxs.insert(idx);
769
      }
770
    }
771
772
806
    if (newIdxs.empty())
773
    {
774
      // If there are no candidates, we can't match the remainder of r2
775
38
      return false;
776
    }
777
  }
778
779
  // We have processed all of `r2`. We are now looking if there was also a
780
  // path to the end in `r1`. This makes sure that we don't have leftover
781
  // bits in `r1` that don't match anything in `r2`.
782
102
  bool result = false;
783
288
  for (size_t idx : newIdxs)
784
  {
785
240
    if (idx == v1.size() || (idx == v1.size() - 1 && v1[idx] == sigmaStar))
786
    {
787
54
      result = true;
788
54
      break;
789
    }
790
  }
791
792
102
  return result;
793
}
794
795
}  // namespace strings
796
}  // namespace theory
797
28194
}  // namespace cvc5