GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_entail.cpp Lines: 347 397 87.4 %
Date: 2021-08-06 Branches: 721 1495 48.2 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving regular expressions.
14
 */
15
16
#include "theory/strings/regexp_entail.h"
17
18
#include "theory/rewriter.h"
19
#include "theory/strings/theory_strings_utils.h"
20
#include "theory/strings/word.h"
21
#include "util/rational.h"
22
#include "util/string.h"
23
24
using namespace std;
25
using namespace cvc5::kind;
26
27
namespace cvc5 {
28
namespace theory {
29
namespace strings {
30
31
15167
Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren,
32
                                       std::vector<Node>& children,
33
                                       int dir)
34
{
35
30334
  Trace("regexp-ext-rewrite-debug")
36
15167
      << "Simple reg exp consume, dir=" << dir << ":" << std::endl;
37
30334
  Trace("regexp-ext-rewrite-debug")
38
15167
      << "  mchildren : " << mchildren << std::endl;
39
15167
  Trace("regexp-ext-rewrite-debug") << "  children : " << children << std::endl;
40
15167
  NodeManager* nm = NodeManager::currentNM();
41
15167
  unsigned tmin = dir < 0 ? 0 : dir;
42
15167
  unsigned tmax = dir < 0 ? 1 : dir;
43
  // try to remove off front and back
44
42804
  for (unsigned t = 0; t < 2; t++)
45
  {
46
29764
    if (tmin <= t && t <= tmax)
47
    {
48
19379
      bool do_next = true;
49
57511
      while (!children.empty() && !mchildren.empty() && do_next)
50
      {
51
21193
        do_next = false;
52
40259
        Node xc = mchildren[mchildren.size() - 1];
53
40259
        Node rc = children[children.size() - 1];
54
42386
        Trace("regexp-ext-rewrite-debug")
55
21193
            << "* " << xc << " in " << rc << std::endl;
56
21193
        Assert(rc.getKind() != REGEXP_CONCAT);
57
21193
        Assert(xc.getKind() != STRING_CONCAT);
58
21193
        if (rc.getKind() == STRING_TO_REGEXP)
59
        {
60
8643
          if (xc == rc[0])
61
          {
62
1256
            children.pop_back();
63
1256
            mchildren.pop_back();
64
1256
            do_next = true;
65
1256
            Trace("regexp-ext-rewrite-debug") << "- strip equal" << std::endl;
66
          }
67
7387
          else if (rc[0].isConst() && Word::isEmpty(rc[0]))
68
          {
69
224
            Trace("regexp-ext-rewrite-debug")
70
112
                << "- ignore empty RE" << std::endl;
71
            // ignore and continue
72
112
            children.pop_back();
73
112
            do_next = true;
74
          }
75
7275
          else if (xc.isConst() && rc[0].isConst())
76
          {
77
            // split the constant
78
            size_t index;
79
3148
            Node s = Word::splitConstant(xc, rc[0], index, t == 0);
80
4936
            Trace("regexp-ext-rewrite-debug")
81
4936
                << "- CRE: Regexp const split : " << xc << " " << rc[0]
82
2468
                << " -> " << s << " " << index << " " << t << std::endl;
83
2468
            if (s.isNull())
84
            {
85
3576
              Trace("regexp-ext-rewrite-debug")
86
1788
                  << "...return false" << std::endl;
87
1788
              return nm->mkConst(false);
88
            }
89
            else
90
            {
91
1360
              Trace("regexp-ext-rewrite-debug")
92
680
                  << "- strip equal const" << std::endl;
93
680
              children.pop_back();
94
680
              mchildren.pop_back();
95
680
              if (index == 0)
96
              {
97
630
                mchildren.push_back(s);
98
              }
99
              else
100
              {
101
50
                children.push_back(nm->mkNode(STRING_TO_REGEXP, s));
102
              }
103
            }
104
680
            Trace("regexp-ext-rewrite-debug") << "- split const" << std::endl;
105
680
            do_next = true;
106
          }
107
        }
108
12550
        else if (xc.isConst())
109
        {
110
          // check for constants
111
8233
          cvc5::String s = xc.getConst<String>();
112
4286
          if (Word::isEmpty(xc))
113
          {
114
80
            Trace("regexp-ext-rewrite-debug") << "- ignore empty" << std::endl;
115
            // ignore and continue
116
80
            mchildren.pop_back();
117
80
            do_next = true;
118
          }
119
4206
          else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA)
120
          {
121
1318
            if (!isConstRegExp(rc))
122
            {
123
              // if a non-standard re.range term, abort
124
225
              return Node::null();
125
            }
126
2411
            std::vector<unsigned> ssVec;
127
1318
            ssVec.push_back(t == 0 ? s.back() : s.front());
128
2411
            cvc5::String ss(ssVec);
129
1318
            if (testConstStringInRegExp(ss, 0, rc))
130
            {
131
              // strip off one character
132
1093
              mchildren.pop_back();
133
1093
              if (s.size() > 1)
134
              {
135
923
                if (t == 0)
136
                {
137
747
                  mchildren.push_back(nm->mkConst(s.substr(0, s.size() - 1)));
138
                }
139
                else
140
                {
141
176
                  mchildren.push_back(nm->mkConst(s.substr(1)));
142
                }
143
              }
144
1093
              children.pop_back();
145
1093
              do_next = true;
146
            }
147
            else
148
            {
149
450
              Trace("regexp-ext-rewrite-debug")
150
225
                  << "...return false" << std::endl;
151
225
              return nm->mkConst(false);
152
            }
153
          }
154
2888
          else if (rc.getKind() == REGEXP_INTER || rc.getKind() == REGEXP_UNION)
155
          {
156
            // see if any/each child does not work
157
1196
            bool result_valid = true;
158
2278
            Node result;
159
2278
            Node emp_s = nm->mkConst(String(""));
160
3726
            for (unsigned i = 0; i < rc.getNumChildren(); i++)
161
            {
162
5060
              std::vector<Node> mchildren_s;
163
5060
              std::vector<Node> children_s;
164
2530
              mchildren_s.push_back(xc);
165
2530
              utils::getConcat(rc[i], children_s);
166
2530
              Trace("regexp-ext-rewrite-debug") << push;
167
5060
              Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
168
2530
              Trace("regexp-ext-rewrite-debug") << pop;
169
2530
              if (!ret.isNull())
170
              {
171
                // one conjunct cannot be satisfied, return false
172
1332
                if (rc.getKind() == REGEXP_INTER)
173
                {
174
                  Trace("regexp-ext-rewrite-debug")
175
                      << "...return " << ret << std::endl;
176
                  return ret;
177
                }
178
              }
179
              else
180
              {
181
1198
                if (children_s.empty())
182
                {
183
                  // if we were able to fully consume, store the result
184
830
                  Assert(mchildren_s.size() <= 1);
185
830
                  if (mchildren_s.empty())
186
                  {
187
378
                    mchildren_s.push_back(emp_s);
188
                  }
189
830
                  if (result.isNull())
190
                  {
191
786
                    result = mchildren_s[0];
192
                  }
193
44
                  else if (result != mchildren_s[0])
194
                  {
195
44
                    result_valid = false;
196
                  }
197
                }
198
                else
199
                {
200
368
                  result_valid = false;
201
                }
202
              }
203
            }
204
1196
            if (result_valid)
205
            {
206
802
              if (result.isNull())
207
              {
208
                // all disjuncts cannot be satisfied, return false
209
114
                Assert(rc.getKind() == REGEXP_UNION);
210
228
                Trace("regexp-ext-rewrite-debug")
211
114
                    << "...return false" << std::endl;
212
114
                return nm->mkConst(false);
213
              }
214
              else
215
              {
216
1376
                Trace("regexp-ext-rewrite-debug")
217
688
                    << "- same result, try again, children now " << children
218
688
                    << std::endl;
219
                // all branches led to the same result
220
688
                children.pop_back();
221
688
                mchildren.pop_back();
222
688
                if (result != emp_s)
223
                {
224
376
                  mchildren.push_back(result);
225
                }
226
688
                do_next = true;
227
              }
228
            }
229
          }
230
1692
          else if (rc.getKind() == REGEXP_STAR)
231
          {
232
            // check if there is no way that this star can be unrolled even once
233
3244
            std::vector<Node> mchildren_s;
234
1622
            mchildren_s.insert(
235
3244
                mchildren_s.end(), mchildren.begin(), mchildren.end());
236
1622
            if (t == 1)
237
            {
238
683
              std::reverse(mchildren_s.begin(), mchildren_s.end());
239
            }
240
3244
            std::vector<Node> children_s;
241
1622
            utils::getConcat(rc[0], children_s);
242
3244
            Trace("regexp-ext-rewrite-debug")
243
1622
                << "- recursive call on body of star" << std::endl;
244
1622
            Trace("regexp-ext-rewrite-debug") << push;
245
3244
            Node ret = simpleRegexpConsume(mchildren_s, children_s, t);
246
1622
            Trace("regexp-ext-rewrite-debug") << pop;
247
1622
            if (!ret.isNull())
248
            {
249
396
              Trace("regexp-ext-rewrite-debug")
250
198
                  << "- CRE : regexp star infeasable " << xc << " " << rc
251
198
                  << std::endl;
252
198
              children.pop_back();
253
198
              if (!children.empty())
254
              {
255
158
                Trace("regexp-ext-rewrite-debug") << "- continue" << std::endl;
256
158
                do_next = true;
257
              }
258
            }
259
            else
260
            {
261
1424
              if (children_s.empty())
262
              {
263
                // Check if beyond this, we hit a conflict. In this case, we
264
                // must repeat.  Notice that we do not treat the case where
265
                // there are no more strings to consume as a failure, since
266
                // we may be within a recursive call, see issue #5510.
267
1195
                bool can_skip = true;
268
1195
                if (children.size() > 1)
269
                {
270
2286
                  std::vector<Node> mchildren_ss;
271
1143
                  mchildren_ss.insert(
272
2286
                      mchildren_ss.end(), mchildren.begin(), mchildren.end());
273
2286
                  std::vector<Node> children_ss;
274
1143
                  children_ss.insert(
275
2286
                      children_ss.end(), children.begin(), children.end() - 1);
276
1143
                  if (t == 1)
277
                  {
278
323
                    std::reverse(mchildren_ss.begin(), mchildren_ss.end());
279
323
                    std::reverse(children_ss.begin(), children_ss.end());
280
                  }
281
2286
                  Trace("regexp-ext-rewrite-debug")
282
1143
                      << "- recursive call required repeat star" << std::endl;
283
1143
                  Trace("regexp-ext-rewrite-debug") << push;
284
2286
                  Node rets = simpleRegexpConsume(mchildren_ss, children_ss, t);
285
1143
                  Trace("regexp-ext-rewrite-debug") << pop;
286
1143
                  if (!rets.isNull())
287
                  {
288
460
                    can_skip = false;
289
                  }
290
                }
291
1195
                if (!can_skip)
292
                {
293
920
                  TypeNode stype = nm->stringType();
294
920
                  Node prev = utils::mkConcat(mchildren, stype);
295
920
                  Trace("regexp-ext-rewrite-debug")
296
460
                      << "- can't skip" << std::endl;
297
                  // take the result of fully consuming once
298
460
                  if (t == 1)
299
                  {
300
217
                    std::reverse(mchildren_s.begin(), mchildren_s.end());
301
                  }
302
460
                  mchildren.clear();
303
460
                  mchildren.insert(
304
920
                      mchildren.end(), mchildren_s.begin(), mchildren_s.end());
305
920
                  Node curr = utils::mkConcat(mchildren, stype);
306
460
                  do_next = (prev != curr);
307
920
                  Trace("regexp-ext-rewrite-debug")
308
460
                      << "- do_next = " << do_next << std::endl;
309
                }
310
                else
311
                {
312
1470
                  Trace("regexp-ext-rewrite-debug")
313
735
                      << "- can skip " << rc << " from " << xc << std::endl;
314
                }
315
              }
316
            }
317
          }
318
        }
319
19066
        if (!do_next)
320
        {
321
29078
          Trace("regexp-ext-rewrite")
322
14539
              << "- cannot consume : " << xc << " " << rc << std::endl;
323
        }
324
      }
325
    }
326
27637
    if (dir != 0)
327
    {
328
18427
      std::reverse(children.begin(), children.end());
329
18427
      std::reverse(mchildren.begin(), mchildren.end());
330
    }
331
  }
332
13040
  Trace("regexp-ext-rewrite-debug") << "...finished, return null" << std::endl;
333
13040
  return Node::null();
334
}
335
336
6010
bool RegExpEntail::isConstRegExp(TNode t)
337
{
338
12020
  std::unordered_set<TNode> visited;
339
12020
  std::vector<TNode> visit;
340
12020
  TNode cur;
341
6010
  visit.push_back(t);
342
14476
  do
343
  {
344
20486
    cur = visit.back();
345
20486
    visit.pop_back();
346
20486
    if (visited.find(cur) == visited.end())
347
    {
348
17765
      visited.insert(cur);
349
17765
      Kind ck = cur.getKind();
350
17765
      if (ck == STRING_TO_REGEXP)
351
      {
352
6004
        if (!cur[0].isConst())
353
        {
354
679
          return false;
355
        }
356
      }
357
11761
      else if (ck == REGEXP_RANGE)
358
      {
359
4194
        for (const Node& cn : cur)
360
        {
361
2796
          if (!cn.isConst() || cn.getConst<String>().size() != 1)
362
          {
363
            return false;
364
          }
365
        }
366
      }
367
10363
      else if (ck == ITE)
368
      {
369
        return false;
370
      }
371
10363
      else if (cur.isVar())
372
      {
373
25
        return false;
374
      }
375
      else
376
      {
377
25414
        for (const Node& cn : cur)
378
        {
379
15076
          visit.push_back(cn);
380
        }
381
      }
382
    }
383
19782
  } while (!visit.empty());
384
5306
  return true;
385
}
386
387
878541
bool RegExpEntail::testConstStringInRegExp(cvc5::String& s,
388
                                           unsigned index_start,
389
                                           TNode r)
390
{
391
878541
  Assert(index_start <= s.size());
392
1757082
  Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at "
393
878541
                        << index_start << std::endl;
394
878541
  Assert(!r.isVar());
395
878541
  Kind k = r.getKind();
396
878541
  switch (k)
397
  {
398
22303
    case STRING_TO_REGEXP:
399
    {
400
44606
      cvc5::String s2 = s.substr(index_start, s.size() - index_start);
401
22303
      if (r[0].isConst())
402
      {
403
22303
        return (s2 == r[0].getConst<String>());
404
      }
405
      Assert(false) << "RegExp contains variables";
406
      return false;
407
    }
408
3763
    case REGEXP_CONCAT:
409
    {
410
3763
      if (s.size() != index_start)
411
      {
412
5808
        std::vector<int> vec_k(r.getNumChildren(), -1);
413
2904
        int start = 0;
414
2904
        int left = (int)s.size() - index_start;
415
2904
        int i = 0;
416
26330
        while (i < (int)r.getNumChildren())
417
        {
418
14617
          bool flag = true;
419
14617
          if (i == (int)r.getNumChildren() - 1)
420
          {
421
2899
            if (testConstStringInRegExp(s, index_start + start, r[i]))
422
            {
423
1029
              return true;
424
            }
425
          }
426
11718
          else if (i == -1)
427
          {
428
1875
            return false;
429
          }
430
          else
431
          {
432
26912
            for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i])
433
            {
434
39909
              cvc5::String t = s.substr(index_start + start, vec_k[i]);
435
22840
              if (testConstStringInRegExp(t, 0, r[i]))
436
              {
437
5771
                start += vec_k[i];
438
5771
                left -= vec_k[i];
439
5771
                flag = false;
440
5771
                ++i;
441
5771
                vec_k[i] = -1;
442
5771
                break;
443
              }
444
            }
445
          }
446
447
11713
          if (flag)
448
          {
449
5942
            --i;
450
5942
            if (i >= 0)
451
            {
452
4067
              start -= vec_k[i];
453
4067
              left += vec_k[i];
454
            }
455
          }
456
        }
457
        return false;
458
      }
459
      else
460
      {
461
1181
        for (unsigned i = 0; i < r.getNumChildren(); ++i)
462
        {
463
1181
          if (!testConstStringInRegExp(s, index_start, r[i]))
464
          {
465
859
            return false;
466
          }
467
        }
468
        return true;
469
      }
470
    }
471
273757
    case REGEXP_UNION:
472
    {
473
794060
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
474
      {
475
539773
        if (testConstStringInRegExp(s, index_start, r[i]))
476
        {
477
19470
          return true;
478
        }
479
      }
480
254287
      return false;
481
    }
482
1009
    case REGEXP_INTER:
483
    {
484
1369
      for (unsigned i = 0; i < r.getNumChildren(); ++i)
485
      {
486
1204
        if (!testConstStringInRegExp(s, index_start, r[i]))
487
        {
488
844
          return false;
489
        }
490
      }
491
165
      return true;
492
    }
493
28652
    case REGEXP_STAR:
494
    {
495
28652
      if (s.size() != index_start)
496
      {
497
301334
        for (unsigned i = s.size() - index_start; i > 0; --i)
498
        {
499
559329
          cvc5::String t = s.substr(index_start, i);
500
284230
          if (testConstStringInRegExp(t, 0, r[0]))
501
          {
502
47614
            if (index_start + i == s.size()
503
47614
                || testConstStringInRegExp(s, index_start + i, r))
504
            {
505
9131
              return true;
506
            }
507
          }
508
        }
509
17104
        return false;
510
      }
511
      else
512
      {
513
2417
        return true;
514
      }
515
    }
516
10
    case REGEXP_EMPTY:
517
    {
518
10
      return false;
519
    }
520
10262
    case REGEXP_SIGMA:
521
    {
522
10262
      if (s.size() == index_start + 1)
523
      {
524
4565
        return true;
525
      }
526
      else
527
      {
528
5697
        return false;
529
      }
530
    }
531
538444
    case REGEXP_RANGE:
532
    {
533
538444
      if (s.size() == index_start + 1)
534
      {
535
34889
        unsigned a = r[0].getConst<String>().front();
536
34889
        unsigned b = r[1].getConst<String>().front();
537
34889
        unsigned c = s.back();
538
34889
        return (a <= c && c <= b);
539
      }
540
      else
541
      {
542
503555
        return false;
543
      }
544
    }
545
    case REGEXP_LOOP:
546
    {
547
      NodeManager* nm = NodeManager::currentNM();
548
      uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt();
549
      if (s.size() == index_start)
550
      {
551
        return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]);
552
      }
553
      else if (l == 0 && r[1] == r[2])
554
      {
555
        return false;
556
      }
557
      else
558
      {
559
        Assert(r.getNumChildren() == 3)
560
            << "String rewriter error: LOOP has 2 children";
561
        if (l == 0)
562
        {
563
          // R{0,u}
564
          uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt();
565
          for (unsigned len = s.size() - index_start; len >= 1; len--)
566
          {
567
            cvc5::String t = s.substr(index_start, len);
568
            if (testConstStringInRegExp(t, 0, r[0]))
569
            {
570
              if (len + index_start == s.size())
571
              {
572
                return true;
573
              }
574
              else
575
              {
576
                Node num2 = nm->mkConst(cvc5::Rational(u - 1));
577
                Node r2 = nm->mkNode(REGEXP_LOOP, r[0], r[1], num2);
578
                if (testConstStringInRegExp(s, index_start + len, r2))
579
                {
580
                  return true;
581
                }
582
              }
583
            }
584
          }
585
          return false;
586
        }
587
        else
588
        {
589
          // R{l,l}
590
          Assert(r[1] == r[2])
591
              << "String rewriter error: LOOP nums are not equal";
592
          if (l > s.size() - index_start)
593
          {
594
            if (testConstStringInRegExp(s, s.size(), r[0]))
595
            {
596
              l = s.size() - index_start;
597
            }
598
            else
599
            {
600
              return false;
601
            }
602
          }
603
          for (unsigned len = 1; len <= s.size() - index_start; len++)
604
          {
605
            cvc5::String t = s.substr(index_start, len);
606
            if (testConstStringInRegExp(t, 0, r[0]))
607
            {
608
              Node num2 = nm->mkConst(cvc5::Rational(l - 1));
609
              Node r2 = nm->mkNode(REGEXP_LOOP, r[0], num2, num2);
610
              if (testConstStringInRegExp(s, index_start + len, r2))
611
              {
612
                return true;
613
              }
614
            }
615
          }
616
          return false;
617
        }
618
      }
619
    }
620
341
    case REGEXP_COMPLEMENT:
621
    {
622
341
      return !testConstStringInRegExp(s, index_start, r[0]);
623
      break;
624
    }
625
    default:
626
    {
627
      Assert(!utils::isRegExpKind(k));
628
      return false;
629
    }
630
  }
631
}
632
633
256
bool RegExpEntail::hasEpsilonNode(TNode node)
634
{
635
776
  for (const Node& nc : node)
636
  {
637
558
    if (nc.getKind() == STRING_TO_REGEXP && Word::isEmpty(nc[0]))
638
    {
639
38
      return true;
640
    }
641
  }
642
218
  return false;
643
}
644
645
2903
Node RegExpEntail::getFixedLengthForRegexp(Node n)
646
{
647
2903
  NodeManager* nm = NodeManager::currentNM();
648
2903
  if (n.getKind() == STRING_TO_REGEXP)
649
  {
650
1144
    Node ret = nm->mkNode(STRING_LENGTH, n[0]);
651
1117
    ret = Rewriter::rewrite(ret);
652
1117
    if (ret.isConst())
653
    {
654
1090
      return ret;
655
    }
656
  }
657
1786
  else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE)
658
  {
659
350
    return nm->mkConst(Rational(1));
660
  }
661
1436
  else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER)
662
  {
663
982
    Node ret;
664
1127
    for (const Node& nc : n)
665
    {
666
1588
      Node flc = getFixedLengthForRegexp(nc);
667
952
      if (flc.isNull() || (!ret.isNull() && ret != flc))
668
      {
669
316
        return Node::null();
670
      }
671
636
      else if (ret.isNull())
672
      {
673
        // first time
674
437
        ret = flc;
675
      }
676
    }
677
175
    return ret;
678
  }
679
945
  else if (n.getKind() == REGEXP_CONCAT)
680
  {
681
820
    NodeBuilder nb(PLUS);
682
780
    for (const Node& nc : n)
683
    {
684
1150
      Node flc = getFixedLengthForRegexp(nc);
685
780
      if (flc.isNull())
686
      {
687
410
        return flc;
688
      }
689
370
      nb << flc;
690
    }
691
    Node ret = nb.constructNode();
692
    ret = Rewriter::rewrite(ret);
693
    return ret;
694
  }
695
562
  return Node::null();
696
}
697
698
672
bool RegExpEntail::regExpIncludes(Node r1, Node r2)
699
{
700
672
  Assert(Rewriter::rewrite(r1) == r1);
701
672
  Assert(Rewriter::rewrite(r2) == r2);
702
672
  if (r1 == r2)
703
  {
704
16
    return true;
705
  }
706
707
  // This method only works on a fragment of regular expressions
708
656
  if (!utils::isSimpleRegExp(r1) || !utils::isSimpleRegExp(r2))
709
  {
710
494
    return false;
711
  }
712
162
  NodeManager* nm = NodeManager::currentNM();
713
324
  Node sigma = nm->mkNode(REGEXP_SIGMA, std::vector<Node>{});
714
324
  Node sigmaStar = nm->mkNode(REGEXP_STAR, sigma);
715
716
324
  std::vector<Node> v1, v2;
717
162
  utils::getRegexpComponents(r1, v1);
718
162
  utils::getRegexpComponents(r2, v2);
719
720
  // In the following, we iterate over `r2` (the "includee") and try to
721
  // match it with `r1`. `idxs`/`newIdxs` keep track of all the possible
722
  // positions in `r1` that we could currently be at.
723
324
  std::unordered_set<size_t> newIdxs = {0};
724
324
  std::unordered_set<size_t> idxs;
725
950
  for (const Node& n2 : v2)
726
  {
727
    // Transfer elements from `newIdxs` to `idxs`. Out-of-bound indices are
728
    // removed and for (re.* re.allchar), we additionally include the option of
729
    // skipping it. Indices must be smaller than the size of `v1`.
730
844
    idxs.clear();
731
2756
    for (size_t idx : newIdxs)
732
    {
733
1912
      if (idx < v1.size())
734
      {
735
1864
        idxs.insert(idx);
736
1864
        if (idx + 1 < v1.size() && v1[idx] == sigmaStar)
737
        {
738
1078
          Assert(idx + 1 == v1.size() || v1[idx + 1] != sigmaStar);
739
1078
          idxs.insert(idx + 1);
740
        }
741
      }
742
    }
743
844
    newIdxs.clear();
744
745
844
    if (n2.getKind() == STRING_TO_REGEXP || n2 == sigma)
746
    {
747
608
      Assert(n2 == sigma
748
             || (n2[0].isConst() && n2[0].getConst<String>().size() == 1));
749
2792
      for (size_t idx : idxs)
750
      {
751
2184
        if (v1[idx] == sigma || v1[idx] == n2)
752
        {
753
          // Given a character or an re.allchar in `r2`, we can either
754
          // match it with a corresponding character in `r1` or an
755
          // re.allchar
756
662
          newIdxs.insert(idx + 1);
757
        }
758
      }
759
    }
760
761
3786
    for (size_t idx : idxs)
762
    {
763
2942
      if (v1[idx] == sigmaStar)
764
      {
765
        // (re.* re.allchar) can match an arbitrary amount of `r2`
766
1188
        newIdxs.insert(idx);
767
      }
768
1754
      else if (utils::isUnboundedWildcard(v1, idx))
769
      {
770
        // If a series of re.allchar is followed by (re.* re.allchar), we
771
        // can decide not to "waste" the re.allchar because the order of
772
        // the two wildcards is not observable (i.e. it does not change
773
        // the sequences matched by the regular expression)
774
444
        newIdxs.insert(idx);
775
      }
776
    }
777
778
844
    if (newIdxs.empty())
779
    {
780
      // If there are no candidates, we can't match the remainder of r2
781
56
      return false;
782
    }
783
  }
784
785
  // We have processed all of `r2`. We are now looking if there was also a
786
  // path to the end in `r1`. This makes sure that we don't have leftover
787
  // bits in `r1` that don't match anything in `r2`.
788
106
  bool result = false;
789
300
  for (size_t idx : newIdxs)
790
  {
791
252
    if (idx == v1.size() || (idx == v1.size() - 1 && v1[idx] == sigmaStar))
792
    {
793
58
      result = true;
794
58
      break;
795
    }
796
  }
797
798
106
  return result;
799
}
800
801
}  // namespace strings
802
}  // namespace theory
803
29322
}  // namespace cvc5