GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_entail.cpp Lines: 350 397 88.2 %
Date: 2021-09-17 Branches: 727 1493 48.7 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving regular expressions.
14
 */
15
16
#include "theory/strings/regexp_entail.h"
17
18
#include "theory/rewriter.h"
19
#include "theory/strings/theory_strings_utils.h"
20
#include "theory/strings/word.h"
21
#include "util/rational.h"
22
#include "util/string.h"
23
24
using namespace std;
25
using namespace cvc5::kind;
26
27
namespace cvc5 {
28
namespace theory {
29
namespace strings {
30
31
17882
Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
32
                                       std::vector<Node>& children,
33
                                       int dir)
34
{
35
35764
  Trace("regexp-ext-rewrite-debug")
36
17882
      << "Simple reg exp consume, dir=" << dir << ":" << std::endl;
37
35764
  Trace("regexp-ext-rewrite-debug")
38
17882
      << "  mchildren : " << mchildren << std::endl;
39
17882
  Trace("regexp-ext-rewrite-debug") << "  children : " << children << std::endl;
40
17882
  NodeManager* nm = NodeManager::currentNM();
41
17882
  unsigned tmin = dir < 0 ? 0 : dir;
42
17882
  unsigned tmax = dir < 0 ? 1 : dir;
43
  // try to remove off front and back
44
50541
  for (unsigned t = 0; t < 2; t++)
45
  {
46
35102
    if (tmin <= t && t <= tmax)
47
    {
48
23065
      bool do_next = true;
49
69103
      while (!children.empty() && !mchildren.empty() && do_next)
50
      {
51
25462
        do_next = false;
52
48481
        Node xc = mchildren[mchildren.size() - 1];
53
48481
        Node rc = children[children.size() - 1];
54
50924
        Trace("regexp-ext-rewrite-debug")
55
25462
            << "* " << xc << " in " << rc << std::endl;
56
25462
        Assert(rc.getKind() != REGEXP_CONCAT);
57
25462
        Assert(xc.getKind() != STRING_CONCAT);
58
25462
        if (rc.getKind() == STRING_TO_REGEXP)
59
        {
60
9287
          if (xc == rc[0])
61
          {
62
1444
            children.pop_back();
63
1444
            mchildren.pop_back();
64
1444
            do_next = true;
65
1444
            Trace("regexp-ext-rewrite-debug") << "- strip equal" << std::endl;
66
          }
67
7843
          else if (rc[0].isConst() && Word::isEmpty(rc[0]))
68
          {
69
322
            Trace("regexp-ext-rewrite-debug")
70
161
                << "- ignore empty RE" << std::endl;
71
            // ignore and continue
72
161
            children.pop_back();
73
161
            do_next = true;
74
          }
75
7682
          else if (xc.isConst() && rc[0].isConst())
76
          {
77
            // split the constant
78
            size_t index;
79
3648
            Node s = Word::splitConstant(xc, rc[0], index, t == 0);
80
5686
            Trace("regexp-ext-rewrite-debug")
81
5686
                << "- CRE: Regexp const split : " << xc << " " << rc[0]
82
2843
                << " -> " << s << " " << index << " " << t << std::endl;
83
2843
            if (s.isNull())
84
            {
85
4076
              Trace("regexp-ext-rewrite-debug")
86
2038
                  << "...return false" << std::endl;
87
2038
              return nm->mkConst(false);
88
            }
89
            else
90
            {
91
1610
              Trace("regexp-ext-rewrite-debug")
92
805
                  << "- strip equal const" << std::endl;
93
805
              children.pop_back();
94
805
              mchildren.pop_back();
95
805
              if (index == 0)
96
              {
97
747
                mchildren.push_back(s);
98
              }
99
              else
100
              {
101
58
                children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
102
              }
103
            }
104
805
            Trace("regexp-ext-rewrite-debug") << "- split const" << std::endl;
105
805
            do_next = true;
106
          }
107
        }
108
16175
        else if (xc.isConst())
109
        {
110
          // check for constants
111
11097
          cvc5::String s = xc.getConst<String>();
112
5751
          if (Word::isEmpty(xc))
113
          {
114
80
            Trace("regexp-ext-rewrite-debug") << "- ignore empty" << std::endl;
115
            // ignore and continue
116
80
            mchildren.pop_back();
117
80
            do_next = true;
118
          }
119
5671
          else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA)
120
          {
121
2149
            if (!isConstRegExp(rc))
122
            {
123
              // if a non-standard re.range term, abort
124
257
              return Node::null();
125
            }
126
4041
            std::vector<unsigned> ssVec;
127
2149
            ssVec.push_back(t == 0 ? s.back() : s.front());
128
4041
            cvc5::String ss(ssVec);
129
2149
            if (testConstStringInRegExp(ss, 0, rc))
130
            {
131
              // strip off one character
132
1892
              mchildren.pop_back();
133
1892
              if (s.size() > 1)
134
              {
135
1296
                if (t == 0)
136
                {
137
806
                  mchildren.push_back(nm->mkConst(s.substr(0, s.size() - 1)));
138
                }
139
                else
140
                {
141
490
                  mchildren.push_back(nm->mkConst(s.substr(1)));
142
                }
143
              }
144
1892
              children.pop_back();
145
1892
              do_next = true;
146
            }
147
            else
148
            {
149
514
              Trace("regexp-ext-rewrite-debug")
150
257
                  << "...return false" << std::endl;
151
257
              return nm->mkConst(false);
152
            }
153
          }
154
3522
          else if (rc.getKind() == REGEXP_INTER || rc.getKind() == REGEXP_UNION)
155
          {
156
            // see if any/each child does not work
157
1460
            bool result_valid = true;
158
2772
            Node result;
159
2772
            Node emp_s = nm->mkConst(String(""));
160
4618
            for (unsigned i = 0; i < rc.getNumChildren(); i++)
161
            {
162
6316
              std::vector<Node> mchildren_s;
163
6316
              std::vector<Node> children_s;
164
3158
              mchildren_s.push_back(xc);
165
3158
              utils::getConcat(rc[i], children_s);
166
3158
              Trace("regexp-ext-rewrite-debug") << push;
167
6316
              Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
168
3158
              Trace("regexp-ext-rewrite-debug") << pop;
169
3158
              if (!ret.isNull())
170
              {
171
                // one conjunct cannot be satisfied, return false
172
1554
                if (rc.getKind() == REGEXP_INTER)
173
                {
174
                  Trace("regexp-ext-rewrite-debug")
175
                      << "...return " << ret << std::endl;
176
                  return ret;
177
                }
178
              }
179
              else
180
              {
181
1604
                if (children_s.empty())
182
                {
183
                  // if we were able to fully consume, store the result
184
1059
                  Assert(mchildren_s.size() <= 1);
185
1059
                  if (mchildren_s.empty())
186
                  {
187
514
                    mchildren_s.push_back(emp_s);
188
                  }
189
1059
                  if (result.isNull())
190
                  {
191
970
                    result = mchildren_s[0];
192
                  }
193
89
                  else if (result != mchildren_s[0])
194
                  {
195
89
                    result_valid = false;
196
                  }
197
                }
198
                else
199
                {
200
545
                  result_valid = false;
201
                }
202
              }
203
            }
204
1460
            if (result_valid)
205
            {
206
928
              if (result.isNull())
207
              {
208
                // all disjuncts cannot be satisfied, return false
209
148
                Assert(rc.getKind() == REGEXP_UNION);
210
296
                Trace("regexp-ext-rewrite-debug")
211
148
                    << "...return false" << std::endl;
212
148
                return nm->mkConst(false);
213
              }
214
              else
215
              {
216
1560
                Trace("regexp-ext-rewrite-debug")
217
780
                    << "- same result, try again, children now " << children
218
780
                    << std::endl;
219
                // all branches led to the same result
220
780
                children.pop_back();
221
780
                mchildren.pop_back();
222
780
                if (result != emp_s)
223
                {
224
420
                  mchildren.push_back(result);
225
                }
226
780
                do_next = true;
227
              }
228
            }
229
          }
230
2062
          else if (rc.getKind() == REGEXP_STAR)
231
          {
232
            // check if there is no way that this star can be unrolled even once
233
3984
            std::vector<Node> mchildren_s;
234
1992
            mchildren_s.insert(
235
3984
                mchildren_s.end(), mchildren.begin(), mchildren.end());
236
1992
            if (t == 1)
237
            {
238
969
              std::reverse(mchildren_s.begin(), mchildren_s.end());
239
            }
240
3984
            std::vector<Node> children_s;
241
1992
            utils::getConcat(rc[0], children_s);
242
3984
            Trace("regexp-ext-rewrite-debug")
243
1992
                << "- recursive call on body of star" << std::endl;
244
1992
            Trace("regexp-ext-rewrite-debug") << push;
245
3984
            Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
246
1992
            Trace("regexp-ext-rewrite-debug") << pop;
247
1992
            if (!ret.isNull())
248
            {
249
514
              Trace("regexp-ext-rewrite-debug")
250
257
                  << "- CRE : regexp star infeasable " << xc << " " << rc
251
257
                  << std::endl;
252
257
              children.pop_back();
253
257
              if (!children.empty())
254
              {
255
198
                Trace("regexp-ext-rewrite-debug") << "- continue" << std::endl;
256
198
                do_next = true;
257
              }
258
            }
259
            else
260
            {
261
1735
              if (children_s.empty())
262
              {
263
                // Check if beyond this, we hit a conflict. In this case, we
264
                // must repeat.  Notice that we do not treat the case where
265
                // there are no more strings to consume as a failure, since
266
                // we may be within a recursive call, see issue #5510.
267
1466
                bool can_skip = true;
268
1466
                if (children.size() > 1)
269
                {
270
2470
                  std::vector<Node> mchildren_ss;
271
1235
                  mchildren_ss.insert(
272
2470
                      mchildren_ss.end(), mchildren.begin(), mchildren.end());
273
2470
                  std::vector<Node> children_ss;
274
1235
                  children_ss.insert(
275
2470
                      children_ss.end(), children.begin(), children.end() - 1);
276
1235
                  if (t == 1)
277
                  {
278
365
                    std::reverse(mchildren_ss.begin(), mchildren_ss.end());
279
365
                    std::reverse(children_ss.begin(), children_ss.end());
280
                  }
281
2470
                  Trace("regexp-ext-rewrite-debug")
282
1235
                      << "- recursive call required repeat star" << std::endl;
283
1235
                  Trace("regexp-ext-rewrite-debug") << push;
284
2470
                  Node rets = simpleRegexpConsume(mchildren_ss, children_ss, t);
285
1235
                  Trace("regexp-ext-rewrite-debug") << pop;
286
1235
                  if (!rets.isNull())
287
                  {
288
482
                    can_skip = false;
289
                  }
290
                }
291
1466
                if (!can_skip)
292
                {
293
964
                  TypeNode stype = nm->stringType();
294
964
                  Node prev = utils::mkConcat(mchildren, stype);
295
964
                  Trace("regexp-ext-rewrite-debug")
296
482
                      << "- can't skip" << std::endl;
297
                  // take the result of fully consuming once
298
482
                  if (t == 1)
299
                  {
300
237
                    std::reverse(mchildren_s.begin(), mchildren_s.end());
301
                  }
302
482
                  mchildren.clear();
303
482
                  mchildren.insert(
304
964
                      mchildren.end(), mchildren_s.begin(), mchildren_s.end());
305
964
                  Node curr = utils::mkConcat(mchildren, stype);
306
482
                  do_next = (prev != curr);
307
964
                  Trace("regexp-ext-rewrite-debug")
308
482
                      << "- do_next = " << do_next << std::endl;
309
                }
310
                else
311
                {
312
1968
                  Trace("regexp-ext-rewrite-debug")
313
984
                      << "- can skip " << rc << " from " << xc << std::endl;
314
                }
315
              }
316
            }
317
          }
318
        }
319
23019
        if (!do_next)
320
        {
321
34354
          Trace("regexp-ext-rewrite")
322
17177
              << "- cannot consume : " << xc << " " << rc << std::endl;
323
        }
324
      }
325
    }
326
32659
    if (dir != 0)
327
    {
328
22017
      std::reverse(children.begin(), children.end());
329
22017
      std::reverse(mchildren.begin(), mchildren.end());
330
    }
331
  }
332
15439
  Trace("regexp-ext-rewrite-debug") << "...finished, return null" << std::endl;
333
15439
  return Node::null();
334
}
335
336
7470
bool RegExpEntail::isConstRegExp(TNode t)
337
{
338
14940
  std::unordered_set<TNode> visited;
339
14940
  std::vector<TNode> visit;
340
14940
  TNode cur;
341
7470
  visit.push_back(t);
342
16389
  do
343
  {
344
23859
    cur = visit.back();
345
23859
    visit.pop_back();
346
23859
    if (visited.find(cur) == visited.end())
347
    {
348
20506
      visited.insert(cur);
349
20506
      Kind ck = cur.getKind();
350
20506
      if (ck == STRING_TO_REGEXP)
351
      {
352
6395
        if (!cur[0].isConst())
353
        {
354
679
          return false;
355
        }
356
      }
357
14111
      else if (ck == REGEXP_RANGE)
358
      {
359
8346
        for (const Node& cn : cur)
360
        {
361
5564
          if (!cn.isConst() || cn.getConst<String>().size() != 1)
362
          {
363
            return false;
364
          }
365
        }
366
      }
367
11329
      else if (ck == ITE)
368
      {
369
        return false;
370
      }
371
11329
      else if (cur.isVar())
372
      {
373
25
        return false;
374
      }
375
      else
376
      {
377
28293
        for (const Node& cn : cur)
378
        {
379
16989
          visit.push_back(cn);
380
        }
381
      }
382
    }
383
23155
  } while (!visit.empty());
384
6766
  return true;
385
}
386
387
885215
bool RegExpEntail::testConstStringInRegExp(cvc5::String& s,
388
                                           unsigned index_start,
389
                                           TNode r)
390
{
391
885215
  Assert(index_start <= s.size());
392
1770430
  Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at "
393
885215
                        << index_start << std::endl;
394
885215
  Assert(!r.isVar());
395
885215
  Kind k = r.getKind();
396
885215
  switch (k)
397
  {
398
23874
    case STRING_TO_REGEXP:
399
    {
400
47748
      cvc5::String s2 = s.substr(index_start, s.size() - index_start);
401
23874
      if (r[0].isConst())
402
      {
403
23874
        return (s2 == r[0].getConst<String>());
404
      }
405
      Assert(false) << "RegExp contains variables";
406
      return false;
407
    }
408
4291
    case REGEXP_CONCAT:
409
    {
410
4291
      if (s.size() != index_start)
411
      {
412
6434
        std::vector<int> vec_k(r.getNumChildren(), -1);
413
3217
        int start = 0;
414
3217
        int left = (int)s.size() - index_start;
415
3217
        int i = 0;
416
28919
        while (i < (int)r.getNumChildren())
417
        {
418
16068
          bool flag = true;
419
16068
          if (i == (int)r.getNumChildren() - 1)
420
          {
421
3150
            if (testConstStringInRegExp(s, index_start + start, r[i]))
422
            {
423
1201
              return true;
424
            }
425
          }
426
12918
          else if (i == -1)
427
          {
428
2016
            return false;
429
          }
430
          else
431
          {
432
29314
            for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i])
433
            {
434
43236
              cvc5::String t = s.substr(index_start + start, vec_k[i]);
435
24824
              if (testConstStringInRegExp(t, 0, r[i]))
436
              {
437
6412
                start += vec_k[i];
438
6412
                left -= vec_k[i];
439
6412
                flag = false;
440
6412
                ++i;
441
6412
                vec_k[i] = -1;
442
6412
                break;
443
              }
444
            }
445
          }
446
447
12851
          if (flag)
448
          {
449
6439
            --i;
450
6439
            if (i >= 0)
451
            {
452
4423
              start -= vec_k[i];
453
4423
              left += vec_k[i];
454
            }
455
          }
456
        }
457
        return false;
458
      }
459
      else
460
      {
461
1416
        for (unsigned i = 0; i < r.getNumChildren(); ++i)
462
        {
463
1416
          if (!testConstStringInRegExp(s, index_start, r[i]))
464
          {
465
1074
            return false;
466
          }
467
        }
468
        return true;
469
      }
470
    }
471
274041
    case REGEXP_UNION:
472
    {
473
794718
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
474
      {
475
540294
        if (testConstStringInRegExp(s, index_start, r[i]))
476
        {
477
19617
          return true;
478
        }
479
      }
480
254424
      return false;
481
    }
482
1073
    case REGEXP_INTER:
483
    {
484
1433
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
485
      {
486
1268
        if (!testConstStringInRegExp(s, index_start, r[i]))
487
        {
488
908
          return false;
489
        }
490
      }
491
165
      return true;
492
    }
493
29679
    case REGEXP_STAR:
494
    {
495
29679
      if (s.size() != index_start)
496
      {
497
303213
        for (unsigned i = s.size() - index_start; i > 0; --i)
498
        {
499
562218
          cvc5::String t = s.substr(index_start, i);
500
286053
          if (testConstStringInRegExp(t, 0, r[0]))
501
          {
502
49164
            if (index_start + i == s.size()
503
49164
                || testConstStringInRegExp(s, index_start + i, r))
504
            {
505
9888
              return true;
506
            }
507
          }
508
        }
509
17160
        return false;
510
      }
511
      else
512
      {
513
2631
        return true;
514
      }
515
    }
516
10
    case REGEXP_EMPTY:
517
    {
518
10
      return false;
519
    }
520
11305
    case REGEXP_SIGMA:
521
    {
522
11305
      if (s.size() == index_start + 1)
523
      {
524
5000
        return true;
525
      }
526
      else
527
      {
528
6305
        return false;
529
      }
530
    }
531
540607
    case REGEXP_RANGE:
532
    {
533
540607
      if (s.size() == index_start + 1)
534
      {
535
36300
        unsigned a = r[0].getConst<String>().front();
536
36300
        unsigned b = r[1].getConst<String>().front();
537
36300
        unsigned c = s.back();
538
36300
        return (a <= c && c <= b);
539
      }
540
      else
541
      {
542
504307
        return false;
543
      }
544
    }
545
    case REGEXP_LOOP:
546
    {
547
      NodeManager* nm = NodeManager::currentNM();
548
      uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
549
      if (s.size() == index_start)
550
      {
551
        return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]);
552
      }
553
      else if (l == 0 && r[1] == r[2])
554
      {
555
        return false;
556
      }
557
      else
558
      {
559
        Assert(r.getNumChildren() == 3)
560
            << "String rewriter error: LOOP has 2 children";
561
        if (l == 0)
562
        {
563
          // R{0,u}
564
          uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
565
          for (unsigned len = s.size() - index_start; len >= 1; len--)
566
          {
567
            cvc5::String t = s.substr(index_start, len);
568
            if (testConstStringInRegExp(t, 0, r[0]))
569
            {
570
              if (len + index_start == s.size())
571
              {
572
                return true;
573
              }
574
              else
575
              {
576
                Node num2 = nm->mkConst(cvc5::Rational(u - 1));
577
                Node r2 = nm->mkNode(REGEXP_LOOP, r[0], r[1], num2);
578
                if (testConstStringInRegExp(s, index_start + len, r2))
579
                {
580
                  return true;
581
                }
582
              }
583
            }
584
          }
585
          return false;
586
        }
587
        else
588
        {
589
          // R{l,l}
590
          Assert(r[1] == r[2])
591
              << "String rewriter error: LOOP nums are not equal";
592
          if (l > s.size() - index_start)
593
          {
594
            if (testConstStringInRegExp(s, s.size(), r[0]))
595
            {
596
              l = s.size() - index_start;
597
            }
598
            else
599
            {
600
              return false;
601
            }
602
          }
603
          for (unsigned len = 1; len <= s.size() - index_start; len++)
604
          {
605
            cvc5::String t = s.substr(index_start, len);
606
            if (testConstStringInRegExp(t, 0, r[0]))
607
            {
608
              Node num2 = nm->mkConst(cvc5::Rational(l - 1));
609
              Node r2 = nm->mkNode(REGEXP_LOOP, r[0], num2, num2);
610
              if (testConstStringInRegExp(s, index_start + len, r2))
611
              {
612
                return true;
613
              }
614
            }
615
          }
616
          return false;
617
        }
618
      }
619
    }
620
335
    case REGEXP_COMPLEMENT:
621
    {
622
335
      return !testConstStringInRegExp(s, index_start, r[0]);
623
      break;
624
    }
625
    default:
626
    {
627
      Assert(!utils::isRegExpKind(k));
628
      return false;
629
    }
630
  }
631
}
632
633
256
bool RegExpEntail::hasEpsilonNode(TNode node)
634
{
635
776
  for (const Node& nc : node)
636
  {
637
558
    if (nc.getKind() == STRING_TO_REGEXP && Word::isEmpty(nc[0]))
638
    {
639
38
      return true;
640
    }
641
  }
642
218
  return false;
643
}
644
645
3602
Node RegExpEntail::getFixedLengthForRegexp(Node n)
646
{
647
3602
  NodeManager* nm = NodeManager::currentNM();
648
3602
  if (n.getKind() == STRING_TO_REGEXP)
649
  {
650
1206
    Node ret = nm->mkNode(STRING_LENGTH, n[0]);
651
1179
    ret = Rewriter::rewrite(ret);
652
1179
    if (ret.isConst())
653
    {
654
1152
      return ret;
655
    }
656
  }
657
2423
  else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
658
  {
659
858
    return nm->mkConst(Rational(1));
660
  }
661
1565
  else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
662
  {
663
1062
    Node ret;
664
1208
    for (const Node& nc : n)
665
    {
666
1699
      Node flc = getFixedLengthForRegexp(nc);
667
1022
      if (flc.isNull() || (!ret.isNull() && ret != flc))
668
      {
669
345
        return Node::null();
670
      }
671
677
      else if (ret.isNull())
672
      {
673
        // first time
674
469
        ret = flc;
675
      }
676
    }
677
186
    return ret;
678
  }
679
1034
  else if (n.getKind() == REGEXP_CONCAT)
680
  {
681
918
    NodeBuilder nb(PLUS);
682
879
    for (const Node& nc : n)
683
    {
684
1292
      Node flc = getFixedLengthForRegexp(nc);
685
872
      if (flc.isNull())
686
      {
687
452
        return flc;
688
      }
689
420
      nb << flc;
690
    }
691
14
    Node ret = nb.constructNode();
692
7
    ret = Rewriter::rewrite(ret);
693
7
    return ret;
694
  }
695
602
  return Node::null();
696
}
697
698
708
bool RegExpEntail::regExpIncludes(Node r1, Node r2)
699
{
700
708
  Assert(Rewriter::rewrite(r1) == r1);
701
708
  Assert(Rewriter::rewrite(r2) == r2);
702
708
  if (r1 == r2)
703
  {
704
18
    return true;
705
  }
706
707
  // This method only works on a fragment of regular expressions
708
690
  if (!utils::isSimpleRegExp(r1) || !utils::isSimpleRegExp(r2))
709
  {
710
528
    return false;
711
  }
712
162
  NodeManager* nm = NodeManager::currentNM();
713
324
  Node sigma = nm->mkNode(REGEXP_SIGMA, std::vector<Node>{});
714
324
  Node sigmaStar = nm->mkNode(REGEXP_STAR, sigma);
715
716
324
  std::vector<Node> v1, v2;
717
162
  utils::getRegexpComponents(r1, v1);
718
162
  utils::getRegexpComponents(r2, v2);
719
720
  // In the following, we iterate over `r2` (the "includee") and try to
721
  // match it with `r1`. `idxs`/`newIdxs` keep track of all the possible
722
  // positions in `r1` that we could currently be at.
723
324
  std::unordered_set<size_t> newIdxs = {0};
724
324
  std::unordered_set<size_t> idxs;
725
950
  for (const Node& n2 : v2)
726
  {
727
    // Transfer elements from `newIdxs` to `idxs`. Out-of-bound indices are
728
    // removed and for (re.* re.allchar), we additionally include the option of
729
    // skipping it. Indices must be smaller than the size of `v1`.
730
844
    idxs.clear();
731
2756
    for (size_t idx : newIdxs)
732
    {
733
1912
      if (idx < v1.size())
734
      {
735
1864
        idxs.insert(idx);
736
1864
        if (idx + 1 < v1.size() && v1[idx] == sigmaStar)
737
        {
738
1078
          Assert(idx + 1 == v1.size() || v1[idx + 1] != sigmaStar);
739
1078
          idxs.insert(idx + 1);
740
        }
741
      }
742
    }
743
844
    newIdxs.clear();
744
745
844
    if (n2.getKind() == STRING_TO_REGEXP || n2 == sigma)
746
    {
747
608
      Assert(n2 == sigma
748
             || (n2[0].isConst() && n2[0].getConst<String>().size() == 1));
749
2792
      for (size_t idx : idxs)
750
      {
751
2184
        if (v1[idx] == sigma || v1[idx] == n2)
752
        {
753
          // Given a character or an re.allchar in `r2`, we can either
754
          // match it with a corresponding character in `r1` or an
755
          // re.allchar
756
662
          newIdxs.insert(idx + 1);
757
        }
758
      }
759
    }
760
761
3786
    for (size_t idx : idxs)
762
    {
763
2942
      if (v1[idx] == sigmaStar)
764
      {
765
        // (re.* re.allchar) can match an arbitrary amount of `r2`
766
1188
        newIdxs.insert(idx);
767
      }
768
1754
      else if (utils::isUnboundedWildcard(v1, idx))
769
      {
770
        // If a series of re.allchar is followed by (re.* re.allchar), we
771
        // can decide not to "waste" the re.allchar because the order of
772
        // the two wildcards is not observable (i.e. it does not change
773
        // the sequences matched by the regular expression)
774
444
        newIdxs.insert(idx);
775
      }
776
    }
777
778
844
    if (newIdxs.empty())
779
    {
780
      // If there are no candidates, we can't match the remainder of r2
781
56
      return false;
782
    }
783
  }
784
785
  // We have processed all of `r2`. We are now looking if there was also a
786
  // path to the end in `r1`. This makes sure that we don't have leftover
787
  // bits in `r1` that don't match anything in `r2`.
788
106
  bool result = false;
789
300
  for (size_t idx : newIdxs)
790
  {
791
252
    if (idx == v1.size() || (idx == v1.size() - 1 && v1[idx] == sigmaStar))
792
    {
793
58
      result = true;
794
58
      break;
795
    }
796
  }
797
798
106
  return result;
799
}
800
801
}  // namespace strings
802
}  // namespace theory
803
29577
}  // namespace cvc5