GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_entail.cpp Lines: 350 397 88.2 %
Date: 2021-11-07 Branches: 727 1493 48.7 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving regular expressions.
14
 */
15
16
#include "theory/strings/regexp_entail.h"
17
18
#include "theory/rewriter.h"
19
#include "theory/strings/theory_strings_utils.h"
20
#include "theory/strings/word.h"
21
#include "util/rational.h"
22
#include "util/string.h"
23
24
using namespace std;
25
using namespace cvc5::kind;
26
27
namespace cvc5 {
28
namespace theory {
29
namespace strings {
30
31
17535
Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
32
                                       std::vector<Node>& children,
33
                                       int dir)
34
{
35
35070
  Trace("regexp-ext-rewrite-debug")
36
17535
      << "Simple reg exp consume, dir=" << dir << ":" << std::endl;
37
35070
  Trace("regexp-ext-rewrite-debug")
38
17535
      << "  mchildren : " << mchildren << std::endl;
39
17535
  Trace("regexp-ext-rewrite-debug") << "  children : " << children << std::endl;
40
17535
  NodeManager* nm = NodeManager::currentNM();
41
17535
  unsigned tmin = dir < 0 ? 0 : dir;
42
17535
  unsigned tmax = dir < 0 ? 1 : dir;
43
  // try to remove off front and back
44
49691
  for (unsigned t = 0; t < 2; t++)
45
  {
46
34422
    if (tmin <= t && t <= tmax)
47
    {
48
22769
      bool do_next = true;
49
68093
      while (!children.empty() && !mchildren.empty() && do_next)
50
      {
51
24928
        do_next = false;
52
47590
        Node xc = mchildren[mchildren.size() - 1];
53
47590
        Node rc = children[children.size() - 1];
54
49856
        Trace("regexp-ext-rewrite-debug")
55
24928
            << "* " << xc << " in " << rc << std::endl;
56
24928
        Assert(rc.getKind() != REGEXP_CONCAT);
57
24928
        Assert(xc.getKind() != STRING_CONCAT);
58
24928
        if (rc.getKind() == STRING_TO_REGEXP)
59
        {
60
9080
          if (xc == rc[0])
61
          {
62
1330
            children.pop_back();
63
1330
            mchildren.pop_back();
64
1330
            do_next = true;
65
1330
            Trace("regexp-ext-rewrite-debug") << "- strip equal" << std::endl;
66
          }
67
7750
          else if (rc[0].isConst() && Word::isEmpty(rc[0]))
68
          {
69
302
            Trace("regexp-ext-rewrite-debug")
70
151
                << "- ignore empty RE" << std::endl;
71
            // ignore and continue
72
151
            children.pop_back();
73
151
            do_next = true;
74
          }
75
7599
          else if (xc.isConst() && rc[0].isConst())
76
          {
77
            // split the constant
78
            size_t index;
79
3403
            Node s = Word::splitConstant(xc, rc[0], index, t == 0);
80
5282
            Trace("regexp-ext-rewrite-debug")
81
5282
                << "- CRE: Regexp const split : " << xc << " " << rc[0]
82
2641
                << " -> " << s << " " << index << " " << t << std::endl;
83
2641
            if (s.isNull())
84
            {
85
3758
              Trace("regexp-ext-rewrite-debug")
86
1879
                  << "...return false" << std::endl;
87
1879
              return nm->mkConst(false);
88
            }
89
            else
90
            {
91
1524
              Trace("regexp-ext-rewrite-debug")
92
762
                  << "- strip equal const" << std::endl;
93
762
              children.pop_back();
94
762
              mchildren.pop_back();
95
762
              if (index == 0)
96
              {
97
704
                mchildren.push_back(s);
98
              }
99
              else
100
              {
101
58
                children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
102
              }
103
            }
104
762
            Trace("regexp-ext-rewrite-debug") << "- split const" << std::endl;
105
762
            do_next = true;
106
          }
107
        }
108
15848
        else if (xc.isConst())
109
        {
110
          // check for constants
111
10695
          cvc5::String s = xc.getConst<String>();
112
5541
          if (Word::isEmpty(xc))
113
          {
114
186
            Trace("regexp-ext-rewrite-debug") << "- ignore empty" << std::endl;
115
            // ignore and continue
116
186
            mchildren.pop_back();
117
186
            do_next = true;
118
          }
119
5355
          else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA)
120
          {
121
1988
            if (!isConstRegExp(rc))
122
            {
123
              // if a non-standard re.range term, abort
124
257
              return Node::null();
125
            }
126
3719
            std::vector<unsigned> ssVec;
127
1988
            ssVec.push_back(t == 0 ? s.back() : s.front());
128
3719
            cvc5::String ss(ssVec);
129
1988
            if (testConstStringInRegExp(ss, 0, rc))
130
            {
131
              // strip off one character
132
1731
              mchildren.pop_back();
133
1731
              if (s.size() > 1)
134
              {
135
1194
                if (t == 0)
136
                {
137
785
                  mchildren.push_back(nm->mkConst(s.substr(0, s.size() - 1)));
138
                }
139
                else
140
                {
141
409
                  mchildren.push_back(nm->mkConst(s.substr(1)));
142
                }
143
              }
144
1731
              children.pop_back();
145
1731
              do_next = true;
146
            }
147
            else
148
            {
149
514
              Trace("regexp-ext-rewrite-debug")
150
257
                  << "...return false" << std::endl;
151
257
              return nm->mkConst(false);
152
            }
153
          }
154
3367
          else if (rc.getKind() == REGEXP_INTER || rc.getKind() == REGEXP_UNION)
155
          {
156
            // see if any/each child does not work
157
1348
            bool result_valid = true;
158
2566
            Node result;
159
2566
            Node emp_s = nm->mkConst(String(""));
160
4270
            for (unsigned i = 0; i < rc.getNumChildren(); i++)
161
            {
162
5844
              std::vector<Node> mchildren_s;
163
5844
              std::vector<Node> children_s;
164
2922
              mchildren_s.push_back(xc);
165
2922
              utils::getConcat(rc[i], children_s);
166
2922
              Trace("regexp-ext-rewrite-debug") << push;
167
5844
              Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
168
2922
              Trace("regexp-ext-rewrite-debug") << pop;
169
2922
              if (!ret.isNull())
170
              {
171
                // one conjunct cannot be satisfied, return false
172
1438
                if (rc.getKind() == REGEXP_INTER)
173
                {
174
                  Trace("regexp-ext-rewrite-debug")
175
                      << "...return " << ret << std::endl;
176
                  return ret;
177
                }
178
              }
179
              else
180
              {
181
1484
                if (children_s.empty())
182
                {
183
                  // if we were able to fully consume, store the result
184
967
                  Assert(mchildren_s.size() <= 1);
185
967
                  if (mchildren_s.empty())
186
                  {
187
450
                    mchildren_s.push_back(emp_s);
188
                  }
189
967
                  if (result.isNull())
190
                  {
191
888
                    result = mchildren_s[0];
192
                  }
193
79
                  else if (result != mchildren_s[0])
194
                  {
195
79
                    result_valid = false;
196
                  }
197
                }
198
                else
199
                {
200
517
                  result_valid = false;
201
                }
202
              }
203
            }
204
1348
            if (result_valid)
205
            {
206
844
              if (result.isNull())
207
              {
208
                // all disjuncts cannot be satisfied, return false
209
130
                Assert(rc.getKind() == REGEXP_UNION);
210
260
                Trace("regexp-ext-rewrite-debug")
211
130
                    << "...return false" << std::endl;
212
130
                return nm->mkConst(false);
213
              }
214
              else
215
              {
216
1428
                Trace("regexp-ext-rewrite-debug")
217
714
                    << "- same result, try again, children now " << children
218
714
                    << std::endl;
219
                // all branches led to the same result
220
714
                children.pop_back();
221
714
                mchildren.pop_back();
222
714
                if (result != emp_s)
223
                {
224
404
                  mchildren.push_back(result);
225
                }
226
714
                do_next = true;
227
              }
228
            }
229
          }
230
2019
          else if (rc.getKind() == REGEXP_STAR)
231
          {
232
            // check if there is no way that this star can be unrolled even once
233
3802
            std::vector<Node> mchildren_s;
234
1901
            mchildren_s.insert(
235
3802
                mchildren_s.end(), mchildren.begin(), mchildren.end());
236
1901
            if (t == 1)
237
            {
238
891
              std::reverse(mchildren_s.begin(), mchildren_s.end());
239
            }
240
3802
            std::vector<Node> children_s;
241
1901
            utils::getConcat(rc[0], children_s);
242
3802
            Trace("regexp-ext-rewrite-debug")
243
1901
                << "- recursive call on body of star" << std::endl;
244
1901
            Trace("regexp-ext-rewrite-debug") << push;
245
3802
            Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
246
1901
            Trace("regexp-ext-rewrite-debug") << pop;
247
1901
            if (!ret.isNull())
248
            {
249
456
              Trace("regexp-ext-rewrite-debug")
250
228
                  << "- CRE : regexp star infeasable " << xc << " " << rc
251
228
                  << std::endl;
252
228
              children.pop_back();
253
228
              if (!children.empty())
254
              {
255
164
                Trace("regexp-ext-rewrite-debug") << "- continue" << std::endl;
256
164
                do_next = true;
257
              }
258
            }
259
            else
260
            {
261
1673
              if (children_s.empty())
262
              {
263
                // Check if beyond this, we hit a conflict. In this case, we
264
                // must repeat.  Notice that we do not treat the case where
265
                // there are no more strings to consume as a failure, since
266
                // we may be within a recursive call, see issue #5510.
267
1404
                bool can_skip = true;
268
1404
                if (children.size() > 1)
269
                {
270
2440
                  std::vector<Node> mchildren_ss;
271
1220
                  mchildren_ss.insert(
272
2440
                      mchildren_ss.end(), mchildren.begin(), mchildren.end());
273
2440
                  std::vector<Node> children_ss;
274
1220
                  children_ss.insert(
275
2440
                      children_ss.end(), children.begin(), children.end() - 1);
276
1220
                  if (t == 1)
277
                  {
278
363
                    std::reverse(mchildren_ss.begin(), mchildren_ss.end());
279
363
                    std::reverse(children_ss.begin(), children_ss.end());
280
                  }
281
2440
                  Trace("regexp-ext-rewrite-debug")
282
1220
                      << "- recursive call required repeat star" << std::endl;
283
1220
                  Trace("regexp-ext-rewrite-debug") << push;
284
2440
                  Node rets = simpleRegexpConsume(mchildren_ss, children_ss, t);
285
1220
                  Trace("regexp-ext-rewrite-debug") << pop;
286
1220
                  if (!rets.isNull())
287
                  {
288
484
                    can_skip = false;
289
                  }
290
                }
291
1404
                if (!can_skip)
292
                {
293
968
                  TypeNode stype = nm->stringType();
294
968
                  Node prev = utils::mkConcat(mchildren, stype);
295
968
                  Trace("regexp-ext-rewrite-debug")
296
484
                      << "- can't skip" << std::endl;
297
                  // take the result of fully consuming once
298
484
                  if (t == 1)
299
                  {
300
239
                    std::reverse(mchildren_s.begin(), mchildren_s.end());
301
                  }
302
484
                  mchildren.clear();
303
484
                  mchildren.insert(
304
968
                      mchildren.end(), mchildren_s.begin(), mchildren_s.end());
305
968
                  Node curr = utils::mkConcat(mchildren, stype);
306
484
                  do_next = (prev != curr);
307
968
                  Trace("regexp-ext-rewrite-debug")
308
484
                      << "- do_next = " << do_next << std::endl;
309
                }
310
                else
311
                {
312
1840
                  Trace("regexp-ext-rewrite-debug")
313
920
                      << "- can skip " << rc << " from " << xc << std::endl;
314
                }
315
              }
316
            }
317
          }
318
        }
319
22662
        if (!do_next)
320
        {
321
34280
          Trace("regexp-ext-rewrite")
322
17140
              << "- cannot consume : " << xc << " " << rc << std::endl;
323
        }
324
      }
325
    }
326
32156
    if (dir != 0)
327
    {
328
21634
      std::reverse(children.begin(), children.end());
329
21634
      std::reverse(mchildren.begin(), mchildren.end());
330
    }
331
  }
332
15269
  Trace("regexp-ext-rewrite-debug") << "...finished, return null" << std::endl;
333
15269
  return Node::null();
334
}
335
336
7329
bool RegExpEntail::isConstRegExp(TNode t)
337
{
338
14658
  std::unordered_set<TNode> visited;
339
14658
  std::vector<TNode> visit;
340
14658
  TNode cur;
341
7329
  visit.push_back(t);
342
14947
  do
343
  {
344
22276
    cur = visit.back();
345
22276
    visit.pop_back();
346
22276
    if (visited.find(cur) == visited.end())
347
    {
348
19357
      visited.insert(cur);
349
19357
      Kind ck = cur.getKind();
350
19357
      if (ck == STRING_TO_REGEXP)
351
      {
352
6210
        if (!cur[0].isConst())
353
        {
354
758
          return false;
355
        }
356
      }
357
13147
      else if (ck == REGEXP_RANGE)
358
      {
359
7341
        for (const Node& cn : cur)
360
        {
361
4894
          if (!cn.isConst() || cn.getConst<String>().size() != 1)
362
          {
363
            return false;
364
          }
365
        }
366
      }
367
10700
      else if (ck == ITE)
368
      {
369
        return false;
370
      }
371
10700
      else if (cur.isVar())
372
      {
373
50
        return false;
374
      }
375
      else
376
      {
377
26283
        for (const Node& cn : cur)
378
        {
379
15633
          visit.push_back(cn);
380
        }
381
      }
382
    }
383
21468
  } while (!visit.empty());
384
6521
  return true;
385
}
386
387
878060
bool RegExpEntail::testConstStringInRegExp(cvc5::String& s,
388
                                           unsigned index_start,
389
                                           TNode r)
390
{
391
878060
  Assert(index_start <= s.size());
392
1756120
  Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at "
393
878060
                        << index_start << std::endl;
394
878060
  Assert(!r.isVar());
395
878060
  Kind k = r.getKind();
396
878060
  switch (k)
397
  {
398
21168
    case STRING_TO_REGEXP:
399
    {
400
42336
      cvc5::String s2 = s.substr(index_start, s.size() - index_start);
401
21168
      if (r[0].isConst())
402
      {
403
21168
        return (s2 == r[0].getConst<String>());
404
      }
405
      Assert(false) << "RegExp contains variables";
406
      return false;
407
    }
408
3546
    case REGEXP_CONCAT:
409
    {
410
3546
      if (s.size() != index_start)
411
      {
412
5268
        std::vector<int> vec_k(r.getNumChildren(), -1);
413
2634
        int start = 0;
414
2634
        int left = (int)s.size() - index_start;
415
2634
        int i = 0;
416
24552
        while (i < (int)r.getNumChildren())
417
        {
418
13593
          bool flag = true;
419
13593
          if (i == (int)r.getNumChildren() - 1)
420
          {
421
2786
            if (testConstStringInRegExp(s, index_start + start, r[i]))
422
            {
423
964
              return true;
424
            }
425
          }
426
10807
          else if (i == -1)
427
          {
428
1670
            return false;
429
          }
430
          else
431
          {
432
25410
            for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i])
433
            {
434
38028
              cvc5::String t = s.substr(index_start + start, vec_k[i]);
435
21755
              if (testConstStringInRegExp(t, 0, r[i]))
436
              {
437
5482
                start += vec_k[i];
438
5482
                left -= vec_k[i];
439
5482
                flag = false;
440
5482
                ++i;
441
5482
                vec_k[i] = -1;
442
5482
                break;
443
              }
444
            }
445
          }
446
447
10959
          if (flag)
448
          {
449
5477
            --i;
450
5477
            if (i >= 0)
451
            {
452
3807
              start -= vec_k[i];
453
3807
              left += vec_k[i];
454
            }
455
          }
456
        }
457
        return false;
458
      }
459
      else
460
      {
461
1217
        for (unsigned i = 0; i < r.getNumChildren(); ++i)
462
        {
463
1217
          if (!testConstStringInRegExp(s, index_start, r[i]))
464
          {
465
912
            return false;
466
          }
467
        }
468
        return true;
469
      }
470
    }
471
273547
    case REGEXP_UNION:
472
    {
473
793478
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
474
      {
475
539371
        if (testConstStringInRegExp(s, index_start, r[i]))
476
        {
477
19440
          return true;
478
        }
479
      }
480
254107
      return false;
481
    }
482
853
    case REGEXP_INTER:
483
    {
484
1131
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
485
      {
486
1006
        if (!testConstStringInRegExp(s, index_start, r[i]))
487
        {
488
728
          return false;
489
        }
490
      }
491
125
      return true;
492
    }
493
28154
    case REGEXP_STAR:
494
    {
495
28154
      if (s.size() != index_start)
496
      {
497
301638
        for (unsigned i = s.size() - index_start; i > 0; --i)
498
        {
499
560300
          cvc5::String t = s.substr(index_start, i);
500
284622
          if (testConstStringInRegExp(t, 0, r[0]))
501
          {
502
47210
            if (index_start + i == s.size()
503
47210
                || testConstStringInRegExp(s, index_start + i, r))
504
            {
505
8944
              return true;
506
            }
507
          }
508
        }
509
17016
        return false;
510
      }
511
      else
512
      {
513
2194
        return true;
514
      }
515
    }
516
61
    case REGEXP_EMPTY:
517
    {
518
61
      return false;
519
    }
520
10959
    case REGEXP_SIGMA:
521
    {
522
10959
      if (s.size() == index_start + 1)
523
      {
524
4469
        return true;
525
      }
526
      else
527
      {
528
6490
        return false;
529
      }
530
    }
531
539470
    case REGEXP_RANGE:
532
    {
533
539470
      if (s.size() == index_start + 1)
534
      {
535
35699
        unsigned a = r[0].getConst<String>().front();
536
35699
        unsigned b = r[1].getConst<String>().front();
537
35699
        unsigned c = s.back();
538
35699
        return (a <= c && c <= b);
539
      }
540
      else
541
      {
542
503771
        return false;
543
      }
544
    }
545
    case REGEXP_LOOP:
546
    {
547
      NodeManager* nm = NodeManager::currentNM();
548
      uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
549
      if (s.size() == index_start)
550
      {
551
        return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]);
552
      }
553
      else if (l == 0 && r[1] == r[2])
554
      {
555
        return false;
556
      }
557
      else
558
      {
559
        Assert(r.getNumChildren() == 3)
560
            << "String rewriter error: LOOP has 2 children";
561
        if (l == 0)
562
        {
563
          // R{0,u}
564
          uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
565
          for (unsigned len = s.size() - index_start; len >= 1; len--)
566
          {
567
            cvc5::String t = s.substr(index_start, len);
568
            if (testConstStringInRegExp(t, 0, r[0]))
569
            {
570
              if (len + index_start == s.size())
571
              {
572
                return true;
573
              }
574
              else
575
              {
576
                Node num2 = nm->mkConst(cvc5::Rational(u - 1));
577
                Node r2 = nm->mkNode(REGEXP_LOOP, r[0], r[1], num2);
578
                if (testConstStringInRegExp(s, index_start + len, r2))
579
                {
580
                  return true;
581
                }
582
              }
583
            }
584
          }
585
          return false;
586
        }
587
        else
588
        {
589
          // R{l,l}
590
          Assert(r[1] == r[2])
591
              << "String rewriter error: LOOP nums are not equal";
592
          if (l > s.size() - index_start)
593
          {
594
            if (testConstStringInRegExp(s, s.size(), r[0]))
595
            {
596
              l = s.size() - index_start;
597
            }
598
            else
599
            {
600
              return false;
601
            }
602
          }
603
          for (unsigned len = 1; len <= s.size() - index_start; len++)
604
          {
605
            cvc5::String t = s.substr(index_start, len);
606
            if (testConstStringInRegExp(t, 0, r[0]))
607
            {
608
              Node num2 = nm->mkConst(cvc5::Rational(l - 1));
609
              Node r2 = nm->mkNode(REGEXP_LOOP, r[0], num2, num2);
610
              if (testConstStringInRegExp(s, index_start + len, r2))
611
              {
612
                return true;
613
              }
614
            }
615
          }
616
          return false;
617
        }
618
      }
619
    }
620
302
    case REGEXP_COMPLEMENT:
621
    {
622
302
      return !testConstStringInRegExp(s, index_start, r[0]);
623
      break;
624
    }
625
    default:
626
    {
627
      Assert(!utils::isRegExpKind(k));
628
      return false;
629
    }
630
  }
631
}
632
633
272
bool RegExpEntail::hasEpsilonNode(TNode node)
634
{
635
824
  for (const Node& nc : node)
636
  {
637
590
    if (nc.getKind() == STRING_TO_REGEXP && Word::isEmpty(nc[0]))
638
    {
639
38
      return true;
640
    }
641
  }
642
234
  return false;
643
}
644
645
3226
Node RegExpEntail::getFixedLengthForRegexp(Node n)
646
{
647
3226
  NodeManager* nm = NodeManager::currentNM();
648
3226
  if (n.getKind() == STRING_TO_REGEXP)
649
  {
650
1146
    Node ret = nm->mkNode(STRING_LENGTH, n[0]);
651
1091
    ret = Rewriter::rewrite(ret);
652
1091
    if (ret.isConst())
653
    {
654
1036
      return ret;
655
    }
656
  }
657
2135
  else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
658
  {
659
710
    return nm->mkConst(Rational(1));
660
  }
661
1425
  else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
662
  {
663
970
    Node ret;
664
1084
    for (const Node& nc : n)
665
    {
666
1515
      Node flc = getFixedLengthForRegexp(nc);
667
916
      if (flc.isNull() || (!ret.isNull() && ret != flc))
668
      {
669
317
        return Node::null();
670
      }
671
599
      else if (ret.isNull())
672
      {
673
        // first time
674
411
        ret = flc;
675
      }
676
    }
677
168
    return ret;
678
  }
679
940
  else if (n.getKind() == REGEXP_CONCAT)
680
  {
681
810
    NodeBuilder nb(PLUS);
682
771
    for (const Node& nc : n)
683
    {
684
1130
      Node flc = getFixedLengthForRegexp(nc);
685
764
      if (flc.isNull())
686
      {
687
398
        return flc;
688
      }
689
366
      nb << flc;
690
    }
691
14
    Node ret = nb.constructNode();
692
7
    ret = Rewriter::rewrite(ret);
693
7
    return ret;
694
  }
695
590
  return Node::null();
696
}
697
698
796
bool RegExpEntail::regExpIncludes(Node r1, Node r2)
699
{
700
796
  Assert(Rewriter::rewrite(r1) == r1);
701
796
  Assert(Rewriter::rewrite(r2) == r2);
702
796
  if (r1 == r2)
703
  {
704
18
    return true;
705
  }
706
707
  // This method only works on a fragment of regular expressions
708
778
  if (!utils::isSimpleRegExp(r1) || !utils::isSimpleRegExp(r2))
709
  {
710
610
    return false;
711
  }
712
168
  NodeManager* nm = NodeManager::currentNM();
713
336
  Node sigma = nm->mkNode(REGEXP_SIGMA, std::vector<Node>{});
714
336
  Node sigmaStar = nm->mkNode(REGEXP_STAR, sigma);
715
716
336
  std::vector<Node> v1, v2;
717
168
  utils::getRegexpComponents(r1, v1);
718
168
  utils::getRegexpComponents(r2, v2);
719
720
  // In the following, we iterate over `r2` (the "includee") and try to
721
  // match it with `r1`. `idxs`/`newIdxs` keep track of all the possible
722
  // positions in `r1` that we could currently be at.
723
336
  std::unordered_set<size_t> newIdxs = {0};
724
336
  std::unordered_set<size_t> idxs;
725
988
  for (const Node& n2 : v2)
726
  {
727
    // Transfer elements from `newIdxs` to `idxs`. Out-of-bound indices are
728
    // removed and for (re.* re.allchar), we additionally include the option of
729
    // skipping it. Indices must be smaller than the size of `v1`.
730
878
    idxs.clear();
731
2830
    for (size_t idx : newIdxs)
732
    {
733
1952
      if (idx < v1.size())
734
      {
735
1904
        idxs.insert(idx);
736
1904
        if (idx + 1 < v1.size() && v1[idx] == sigmaStar)
737
        {
738
1090
          Assert(idx + 1 == v1.size() || v1[idx + 1] != sigmaStar);
739
1090
          idxs.insert(idx + 1);
740
        }
741
      }
742
    }
743
878
    newIdxs.clear();
744
745
878
    if (n2.getKind() == STRING_TO_REGEXP || n2 == sigma)
746
    {
747
634
      Assert(n2 == sigma
748
             || (n2[0].isConst() && n2[0].getConst<String>().size() == 1));
749
2856
      for (size_t idx : idxs)
750
      {
751
2222
        if (v1[idx] == sigma || v1[idx] == n2)
752
        {
753
          // Given a character or an re.allchar in `r2`, we can either
754
          // match it with a corresponding character in `r1` or an
755
          // re.allchar
756
684
          newIdxs.insert(idx + 1);
757
        }
758
      }
759
    }
760
761
3872
    for (size_t idx : idxs)
762
    {
763
2994
      if (v1[idx] == sigmaStar)
764
      {
765
        // (re.* re.allchar) can match an arbitrary amount of `r2`
766
1200
        newIdxs.insert(idx);
767
      }
768
1794
      else if (utils::isUnboundedWildcard(v1, idx))
769
      {
770
        // If a series of re.allchar is followed by (re.* re.allchar), we
771
        // can decide not to "waste" the re.allchar because the order of
772
        // the two wildcards is not observable (i.e. it does not change
773
        // the sequences matched by the regular expression)
774
454
        newIdxs.insert(idx);
775
      }
776
    }
777
778
878
    if (newIdxs.empty())
779
    {
780
      // If there are no candidates, we can't match the remainder of r2
781
58
      return false;
782
    }
783
  }
784
785
  // We have processed all of `r2`. We are now looking if there was also a
786
  // path to the end in `r1`. This makes sure that we don't have leftover
787
  // bits in `r1` that don't match anything in `r2`.
788
110
  bool result = false;
789
310
  for (size_t idx : newIdxs)
790
  {
791
258
    if (idx == v1.size() || (idx == v1.size() - 1 && v1[idx] == sigmaStar))
792
    {
793
58
      result = true;
794
58
      break;
795
    }
796
  }
797
798
110
  return result;
799
}
800
801
}  // namespace strings
802
}  // namespace theory
803
31137
}  // namespace cvc5