GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_elim.cpp Lines: 273 299 91.3 %
Date: 2021-09-29 Branches: 601 1372 43.8 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Mathias Preiner, Andres Noetzli
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of techniques for eliminating regular expressions.
14
 */
15
16
#include "theory/strings/regexp_elim.h"
17
18
#include "options/strings_options.h"
19
#include "proof/proof_node_manager.h"
20
#include "theory/rewriter.h"
21
#include "theory/strings/regexp_entail.h"
22
#include "theory/strings/theory_strings_utils.h"
23
#include "util/rational.h"
24
#include "util/string.h"
25
26
using namespace cvc5::kind;
27
28
namespace cvc5 {
29
namespace theory {
30
namespace strings {
31
32
6271
RegExpElimination::RegExpElimination(bool isAgg,
33
                                     ProofNodeManager* pnm,
34
6271
                                     context::Context* c)
35
    : d_isAggressive(isAgg),
36
      d_pnm(pnm),
37
      d_epg(pnm == nullptr
38
                ? nullptr
39
6271
                : new EagerProofGenerator(pnm, c, "RegExpElimination::epg"))
40
{
41
6271
}
42
43
68
Node RegExpElimination::eliminate(Node atom, bool isAgg)
44
{
45
68
  Assert(atom.getKind() == STRING_IN_REGEXP);
46
68
  if (atom[1].getKind() == REGEXP_CONCAT)
47
  {
48
45
    return eliminateConcat(atom, isAgg);
49
  }
50
23
  else if (atom[1].getKind() == REGEXP_STAR)
51
  {
52
23
    return eliminateStar(atom, isAgg);
53
  }
54
  return Node::null();
55
}
56
57
68
TrustNode RegExpElimination::eliminateTrusted(Node atom)
58
{
59
136
  Node eatom = eliminate(atom, d_isAggressive);
60
68
  if (!eatom.isNull())
61
  {
62
    // Currently aggressive doesnt work due to fresh bound variables
63
50
    if (isProofEnabled() && !d_isAggressive)
64
    {
65
      Node eq = atom.eqNode(eatom);
66
      Node aggn = NodeManager::currentNM()->mkConst(d_isAggressive);
67
      std::shared_ptr<ProofNode> pn =
68
          d_pnm->mkNode(PfRule::RE_ELIM, {}, {atom, aggn}, eq);
69
      d_epg->setProofFor(eq, pn);
70
      return TrustNode::mkTrustRewrite(atom, eatom, d_epg.get());
71
    }
72
50
    return TrustNode::mkTrustRewrite(atom, eatom, nullptr);
73
  }
74
18
  return TrustNode::null();
75
}
76
77
45
Node RegExpElimination::eliminateConcat(Node atom, bool isAgg)
78
{
79
45
  NodeManager* nm = NodeManager::currentNM();
80
90
  Node x = atom[0];
81
90
  Node lenx = nm->mkNode(STRING_LENGTH, x);
82
90
  Node re = atom[1];
83
90
  Node zero = nm->mkConst(Rational(0));
84
90
  std::vector<Node> children;
85
45
  utils::getConcat(re, children);
86
87
  // If it can be reduced to memberships in fixed length regular expressions.
88
  // This includes concatenations where at most one child is of the form
89
  // (re.* re.allchar), which we abbreviate _* below, and all other children
90
  // have a fixed length.
91
  // The intuition why this is a "non-aggressive" rewrite is that membership
92
  // into fixed length regular expressions are easy to handle.
93
  // the index of _* in re
94
45
  unsigned pivotIndex = 0;
95
45
  bool hasPivotIndex = false;
96
45
  bool hasFixedLength = true;
97
90
  std::vector<Node> childLengths;
98
90
  std::vector<Node> childLengthsPostPivot;
99
264
  for (unsigned i = 0, size = children.size(); i < size; i++)
100
  {
101
438
    Node c = children[i];
102
438
    Node fl = RegExpEntail::getFixedLengthForRegexp(c);
103
219
    if (fl.isNull())
104
    {
105
98
      if (!hasPivotIndex && c.getKind() == REGEXP_STAR
106
89
          && c[0].getKind() == REGEXP_SIGMA)
107
      {
108
7
        hasPivotIndex = true;
109
7
        pivotIndex = i;
110
        // zero is used in sum below and is used for concat-fixed-len
111
7
        fl = zero;
112
      }
113
      else
114
      {
115
27
        hasFixedLength = false;
116
      }
117
    }
118
219
    if (!fl.isNull())
119
    {
120
192
      childLengths.push_back(fl);
121
192
      if (hasPivotIndex)
122
      {
123
91
        childLengthsPostPivot.push_back(fl);
124
      }
125
    }
126
  }
127
45
  Node lenSum = childLengths.size() > 1
128
                    ? nm->mkNode(PLUS, childLengths)
129
90
                    : (childLengths.empty() ? zero : childLengths[0]);
130
  // if we have a fixed length
131
45
  if (hasFixedLength)
132
  {
133
23
    Assert(re.getNumChildren() == children.size());
134
46
    std::vector<Node> conc;
135
23
    conc.push_back(nm->mkNode(hasPivotIndex ? GEQ : EQUAL, lenx, lenSum));
136
46
    Node currEnd = zero;
137
178
    for (unsigned i = 0, size = childLengths.size(); i < size; i++)
138
    {
139
155
      if (hasPivotIndex && i == pivotIndex)
140
      {
141
4
        Node ppSum = childLengthsPostPivot.size() == 1
142
                         ? childLengthsPostPivot[0]
143
8
                         : nm->mkNode(PLUS, childLengthsPostPivot);
144
8
        currEnd = nm->mkNode(MINUS, lenx, ppSum);
145
      }
146
      else
147
      {
148
302
        Node curr = nm->mkNode(STRING_SUBSTR, x, currEnd, childLengths[i]);
149
        // We do not need to include memberships of the form
150
        //   (str.substr x n 1) in re.allchar
151
        // since we know that by construction, n < len( x ).
152
151
        if (re[i].getKind() != REGEXP_SIGMA)
153
        {
154
160
          Node currMem = nm->mkNode(STRING_IN_REGEXP, curr, re[i]);
155
80
          conc.push_back(currMem);
156
        }
157
151
        currEnd = nm->mkNode(PLUS, currEnd, childLengths[i]);
158
151
        currEnd = Rewriter::rewrite(currEnd);
159
      }
160
    }
161
46
    Node res = nm->mkNode(AND, conc);
162
    // For example:
163
    //   x in re.++(re.union(re.range("A", "J"), re.range("N", "Z")), "AB") -->
164
    //   len( x ) = 3 ^
165
    //   substr(x,0,1) in re.union(re.range("A", "J"), re.range("N", "Z")) ^
166
    //   substr(x,1,2) in "AB"
167
    // An example with a pivot index:
168
    //   x in re.++( "AB" ++ _* ++ "C" ) -->
169
    //   len( x ) >= 3 ^
170
    //   substr( x, 0, 2 ) in "AB" ^
171
    //   substr( x, len( x ) - 1, 1 ) in "C"
172
23
    return returnElim(atom, res, "concat-fixed-len");
173
  }
174
175
  // memberships of the form x in re.++ * s1 * ... * sn *, where * are
176
  // any number of repetitions (exact or indefinite) of re.allchar.
177
22
  Trace("re-elim-debug") << "Try re concat with gaps " << atom << std::endl;
178
22
  bool onlySigmasAndConsts = true;
179
44
  std::vector<Node> sep_children;
180
44
  std::vector<unsigned> gap_minsize;
181
44
  std::vector<bool> gap_exact;
182
  // the first gap is initially strict zero
183
22
  gap_minsize.push_back(0);
184
22
  gap_exact.push_back(true);
185
51
  for (const Node& c : children)
186
  {
187
48
    Trace("re-elim-debug") << "  " << c << std::endl;
188
48
    onlySigmasAndConsts = false;
189
48
    if (c.getKind() == STRING_TO_REGEXP)
190
    {
191
21
      onlySigmasAndConsts = true;
192
21
      sep_children.push_back(c[0]);
193
      // the next gap is initially strict zero
194
21
      gap_minsize.push_back(0);
195
21
      gap_exact.push_back(true);
196
    }
197
27
    else if (c.getKind() == REGEXP_STAR && c[0].getKind() == REGEXP_SIGMA)
198
    {
199
      // found a gap of any size
200
7
      onlySigmasAndConsts = true;
201
7
      gap_exact[gap_exact.size() - 1] = false;
202
    }
203
20
    else if (c.getKind() == REGEXP_SIGMA)
204
    {
205
      // add one to the minimum size of the gap
206
1
      onlySigmasAndConsts = true;
207
1
      gap_minsize[gap_minsize.size() - 1]++;
208
    }
209
48
    if (!onlySigmasAndConsts)
210
    {
211
19
      Trace("re-elim-debug") << "...cannot handle " << c << std::endl;
212
19
      break;
213
    }
214
  }
215
  // we should always rewrite concatenations that are purely re.allchar
216
  // and re.*( re.allchar ).
217
22
  Assert(!onlySigmasAndConsts || !sep_children.empty());
218
22
  if (onlySigmasAndConsts && !sep_children.empty())
219
  {
220
3
    bool canProcess = true;
221
3
    std::vector<Node> conj;
222
    // The following constructs a set of constraints that encodes that a
223
    // set of string terms are found, in order, in string x.
224
    // prev_end stores the current (symbolic) index in x that we are
225
    // searching.
226
3
    Node prev_end = zero;
227
    // the symbolic index we start searching, for each child in sep_children.
228
3
    std::vector<Node> prev_ends;
229
3
    unsigned gap_minsize_end = gap_minsize.back();
230
3
    bool gap_exact_end = gap_exact.back();
231
3
    std::vector<Node> non_greedy_find_vars;
232
12
    for (unsigned i = 0, size = sep_children.size(); i < size; i++)
233
    {
234
9
      if (gap_minsize[i] > 0)
235
      {
236
        // the gap to this child is at least gap_minsize[i]
237
1
        prev_end =
238
2
            nm->mkNode(PLUS, prev_end, nm->mkConst(Rational(gap_minsize[i])));
239
      }
240
9
      prev_ends.push_back(prev_end);
241
18
      Node sc = sep_children[i];
242
18
      Node lensc = nm->mkNode(STRING_LENGTH, sc);
243
9
      if (gap_exact[i])
244
      {
245
        // if the gap is exact, it is a substring constraint
246
6
        Node curr = prev_end;
247
6
        Node ss = nm->mkNode(STRING_SUBSTR, x, curr, lensc);
248
3
        conj.push_back(ss.eqNode(sc));
249
3
        prev_end = nm->mkNode(PLUS, curr, lensc);
250
      }
251
      else
252
      {
253
        // otherwise, we can use indexof to represent some next occurrence
254
6
        if (gap_exact[i + 1] && i + 1 != size)
255
        {
256
          if (!isAgg)
257
          {
258
            canProcess = false;
259
            break;
260
          }
261
          // if the gap after this one is strict, we need a non-greedy find
262
          // thus, we add a symbolic constant
263
          Node k = nm->mkBoundVar(nm->integerType());
264
          non_greedy_find_vars.push_back(k);
265
          prev_end = nm->mkNode(PLUS, prev_end, k);
266
        }
267
12
        Node curr = nm->mkNode(STRING_INDEXOF, x, sc, prev_end);
268
12
        Node idofFind = curr.eqNode(nm->mkConst(Rational(-1))).negate();
269
6
        conj.push_back(idofFind);
270
6
        prev_end = nm->mkNode(PLUS, curr, lensc);
271
      }
272
    }
273
274
3
    if (canProcess)
275
    {
276
      // since sep_children is non-empty, conj is non-empty
277
3
      Assert(!conj.empty());
278
      // Process the last gap, if necessary.
279
      // Notice that if the last gap is not exact and its minsize is zero,
280
      // then the last indexof/substr constraint entails the following
281
      // constraint, so it is not necessary to add.
282
      // Below, we may write "A" for (str.to.re "A") and _ for re.allchar:
283
6
      Node cEnd = nm->mkConst(Rational(gap_minsize_end));
284
3
      if (gap_exact_end)
285
      {
286
2
        Assert(!sep_children.empty());
287
        // if it is strict, it corresponds to a substr case.
288
        // For example:
289
        //     x in (re.++ "A" (re.* _) "B" _ _) --->
290
        //        ... ^ "B" = substr( x, len( x ) - 3, 1 )  ^ ...
291
4
        Node sc = sep_children.back();
292
4
        Node lenSc = nm->mkNode(STRING_LENGTH, sc);
293
4
        Node loc = nm->mkNode(MINUS, lenx, nm->mkNode(PLUS, lenSc, cEnd));
294
4
        Node scc = sc.eqNode(nm->mkNode(STRING_SUBSTR, x, loc, lenSc));
295
        // We also must ensure that we fit. This constraint is necessary in
296
        // addition to the constraint above. Take this example:
297
        //     x in (re.++ "A" _ (re.* _) "B" _) --->
298
        //       substr( x, 0, 1 ) = "A" ^             // find "A"
299
        //       indexof( x, "B", 2 ) != -1 ^          // find "B" >=1 after "A"
300
        //       substr( x, len(x)-2, 1 ) = "B" ^      // "B" is at end - 2.
301
        //       indexof( x, "B", 2 ) <= len( x ) - 2
302
        // The last constaint ensures that the second and third constraints
303
        // may refer to the same "B". If it were not for the last constraint, it
304
        // would have been the case than "ABB" would be a model for x, where
305
        // the second constraint refers to the third position, and the third
306
        // constraint refers to the second position.
307
        //
308
        // With respect to the above example, the following is an optimization.
309
        // For that example, we instead produce:
310
        //     x in (re.++ "A" _ (re.* _) "B" _) --->
311
        //       substr( x, 0, 1 ) = "A" ^          // find "A"
312
        //       substr( x, len(x)-2, 1 ) = "B" ^   // "B" is at end - 2
313
        //       2 <= len( x ) - 2
314
        // The intuition is that above, there are two constraints that insist
315
        // that "B" is found, whereas we only need one. The last constraint
316
        // above says that the "B" we find at end-2 can be found >=1 after
317
        // the "A".
318
2
        conj.pop_back();
319
4
        Node fit = nm->mkNode(gap_exact[sep_children.size() - 1] ? EQUAL : LEQ,
320
2
                              prev_ends.back(),
321
10
                              loc);
322
323
2
        conj.push_back(scc);
324
2
        conj.push_back(fit);
325
      }
326
1
      else if (gap_minsize_end > 0)
327
      {
328
        // if it is non-strict, we are in a "greedy find" situtation where
329
        // we just need to ensure that the next occurrence fits.
330
        // For example:
331
        //     x in (re.++ "A" (re.* _) "B" _ _ (re.* _)) --->
332
        //        ... ^ indexof( x, "B", 1 ) + 2 <= len( x )
333
        Node fit = nm->mkNode(LEQ, nm->mkNode(PLUS, prev_end, cEnd), lenx);
334
        conj.push_back(fit);
335
      }
336
6
      Node res = nm->mkAnd(conj);
337
      // process the non-greedy find variables
338
3
      if (!non_greedy_find_vars.empty())
339
      {
340
        std::vector<Node> children2;
341
        for (const Node& v : non_greedy_find_vars)
342
        {
343
          Node bound = nm->mkNode(
344
              AND, nm->mkNode(LEQ, zero, v), nm->mkNode(LT, v, lenx));
345
          children2.push_back(bound);
346
        }
347
        children2.push_back(res);
348
        Node body = nm->mkNode(AND, children2);
349
        Node bvl = nm->mkNode(BOUND_VAR_LIST, non_greedy_find_vars);
350
        res = utils::mkForallInternal(bvl, body.negate()).negate();
351
      }
352
      // must also give a minimum length requirement
353
3
      res = nm->mkNode(AND, res, nm->mkNode(GEQ, lenx, lenSum));
354
      // Examples of this elimination:
355
      //   x in (re.++ "A" (re.* _) "B" (re.* _)) --->
356
      //     substr(x,0,1)="A" ^ indexof(x,"B",1)!=-1
357
      //   x in (re.++ (re.* _) "A" _ _ _ (re.* _) "B" _ _ (re.* _)) --->
358
      //     indexof(x,"A",0)!=-1 ^
359
      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 ) != -1 ^
360
      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x) ^
361
      //     len(x) >= 7
362
363
      // An example of a non-greedy find:
364
      //   x in re.++( re.*( _ ), "A", _, "B", re.*( _ ) ) --->
365
      //     (exists k. 0 <= k < len( x ) ^
366
      //               indexof( x, "A", k ) != -1 ^
367
      //               substr( x, indexof( x, "A", k )+2, 1 ) = "B") ^
368
      //     len(x) >= 3
369
3
      return returnElim(atom, res, "concat-with-gaps");
370
    }
371
  }
372
373
19
  if (!isAgg)
374
  {
375
5
    return Node::null();
376
  }
377
  // only aggressive rewrites below here
378
379
  // if the first or last child is constant string, we can split the membership
380
  // into a conjunction of two memberships.
381
28
  Node sStartIndex = zero;
382
28
  Node sLength = lenx;
383
28
  std::vector<Node> sConstraints;
384
28
  std::vector<Node> rexpElimChildren;
385
14
  unsigned nchildren = children.size();
386
14
  Assert(nchildren > 1);
387
42
  for (unsigned r = 0; r < 2; r++)
388
  {
389
28
    unsigned index = r == 0 ? 0 : nchildren - 1;
390
56
    Node c = children[index];
391
28
    if (c.getKind() == STRING_TO_REGEXP)
392
    {
393
13
      Assert(children[index + (r == 0 ? 1 : -1)].getKind() != STRING_TO_REGEXP);
394
26
      Node s = c[0];
395
26
      Node lens = nm->mkNode(STRING_LENGTH, s);
396
26
      Node sss = r == 0 ? zero : nm->mkNode(MINUS, lenx, lens);
397
26
      Node ss = nm->mkNode(STRING_SUBSTR, x, sss, lens);
398
13
      sConstraints.push_back(ss.eqNode(s));
399
13
      if (r == 0)
400
      {
401
11
        sStartIndex = lens;
402
      }
403
2
      else if (r == 1 && sConstraints.size() == 2)
404
      {
405
        // first and last children cannot overlap
406
4
        Node bound = nm->mkNode(GEQ, sss, sStartIndex);
407
2
        sConstraints.push_back(bound);
408
      }
409
13
      sLength = nm->mkNode(MINUS, sLength, lens);
410
    }
411
28
    if (r == 1 && !sConstraints.empty())
412
    {
413
      // add the middle children
414
13
      for (unsigned i = 1; i < (nchildren - 1); i++)
415
      {
416
2
        rexpElimChildren.push_back(children[i]);
417
      }
418
    }
419
28
    if (c.getKind() != STRING_TO_REGEXP)
420
    {
421
15
      rexpElimChildren.push_back(c);
422
    }
423
  }
424
14
  if (!sConstraints.empty())
425
  {
426
22
    Node ss = nm->mkNode(STRING_SUBSTR, x, sStartIndex, sLength);
427
11
    Assert(!rexpElimChildren.empty());
428
22
    Node regElim = utils::mkConcat(rexpElimChildren, nm->regExpType());
429
11
    sConstraints.push_back(nm->mkNode(STRING_IN_REGEXP, ss, regElim));
430
22
    Node ret = nm->mkNode(AND, sConstraints);
431
    // e.g.
432
    // x in re.++( "A", R ) ---> substr(x,0,1)="A" ^ substr(x,1,len(x)-1) in R
433
11
    return returnElim(atom, ret, "concat-splice");
434
  }
435
3
  Assert(nchildren > 1);
436
6
  for (unsigned i = 0; i < nchildren; i++)
437
  {
438
6
    if (children[i].getKind() == STRING_TO_REGEXP)
439
    {
440
6
      Node s = children[i][0];
441
6
      Node lens = nm->mkNode(STRING_LENGTH, s);
442
      // there exists an index in this string such that the substring is this
443
6
      Node k;
444
6
      std::vector<Node> echildren;
445
3
      if (i == 0)
446
      {
447
        k = zero;
448
      }
449
3
      else if (i + 1 == nchildren)
450
      {
451
        k = nm->mkNode(MINUS, lenx, lens);
452
      }
453
      else
454
      {
455
3
        k = nm->mkBoundVar(nm->integerType());
456
        Node bound =
457
            nm->mkNode(AND,
458
6
                       nm->mkNode(LEQ, zero, k),
459
12
                       nm->mkNode(LEQ, k, nm->mkNode(MINUS, lenx, lens)));
460
3
        echildren.push_back(bound);
461
      }
462
6
      Node substrEq = nm->mkNode(STRING_SUBSTR, x, k, lens).eqNode(s);
463
3
      echildren.push_back(substrEq);
464
3
      if (i > 0)
465
      {
466
6
        std::vector<Node> rprefix;
467
3
        rprefix.insert(rprefix.end(), children.begin(), children.begin() + i);
468
6
        Node rpn = utils::mkConcat(rprefix, nm->regExpType());
469
        Node substrPrefix = nm->mkNode(
470
6
            STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, zero, k), rpn);
471
3
        echildren.push_back(substrPrefix);
472
      }
473
3
      if (i + 1 < nchildren)
474
      {
475
6
        std::vector<Node> rsuffix;
476
3
        rsuffix.insert(rsuffix.end(), children.begin() + i + 1, children.end());
477
6
        Node rps = utils::mkConcat(rsuffix, nm->regExpType());
478
6
        Node ks = nm->mkNode(PLUS, k, lens);
479
        Node substrSuffix = nm->mkNode(
480
            STRING_IN_REGEXP,
481
6
            nm->mkNode(STRING_SUBSTR, x, ks, nm->mkNode(MINUS, lenx, ks)),
482
12
            rps);
483
3
        echildren.push_back(substrSuffix);
484
      }
485
6
      Node body = nm->mkNode(AND, echildren);
486
3
      if (k.getKind() == BOUND_VARIABLE)
487
      {
488
6
        Node bvl = nm->mkNode(BOUND_VAR_LIST, k);
489
3
        body = utils::mkForallInternal(bvl, body.negate()).negate();
490
      }
491
      // e.g. x in re.++( R1, "AB", R2 ) --->
492
      //  exists k.
493
      //    0 <= k <= (len(x)-2) ^
494
      //    substr( x, k, 2 ) = "AB" ^
495
      //    substr( x, 0, k ) in R1 ^
496
      //    substr( x, k+2, len(x)-(k+2) ) in R2
497
3
      return returnElim(atom, body, "concat-find");
498
    }
499
  }
500
  return Node::null();
501
}
502
503
23
Node RegExpElimination::eliminateStar(Node atom, bool isAgg)
504
{
505
23
  if (!isAgg)
506
  {
507
8
    return Node::null();
508
  }
509
  // only aggressive rewrites below here
510
511
15
  NodeManager* nm = NodeManager::currentNM();
512
30
  Node x = atom[0];
513
30
  Node lenx = nm->mkNode(STRING_LENGTH, x);
514
30
  Node re = atom[1];
515
30
  Node zero = nm->mkConst(Rational(0));
516
  // for regular expression star,
517
  // if the period is a fixed constant, we can turn it into a bounded
518
  // quantifier
519
30
  std::vector<Node> disj;
520
15
  if (re[0].getKind() == REGEXP_UNION)
521
  {
522
3
    for (const Node& r : re[0])
523
    {
524
2
      disj.push_back(r);
525
    }
526
  }
527
  else
528
  {
529
14
    disj.push_back(re[0]);
530
  }
531
15
  bool lenOnePeriod = true;
532
30
  std::vector<Node> char_constraints;
533
30
  Node index = nm->mkBoundVar(nm->integerType());
534
  Node substr_ch =
535
30
      nm->mkNode(STRING_SUBSTR, x, index, nm->mkConst(Rational(1)));
536
15
  substr_ch = Rewriter::rewrite(substr_ch);
537
  // handle the case where it is purely characters
538
24
  for (const Node& r : disj)
539
  {
540
16
    Assert(r.getKind() != REGEXP_UNION);
541
16
    Assert(r.getKind() != REGEXP_SIGMA);
542
16
    lenOnePeriod = false;
543
    // lenOnePeriod is true if this regular expression is a single character
544
    // regular expression
545
16
    if (r.getKind() == STRING_TO_REGEXP)
546
    {
547
24
      Node s = r[0];
548
12
      if (s.isConst() && s.getConst<String>().size() == 1)
549
      {
550
6
        lenOnePeriod = true;
551
      }
552
    }
553
4
    else if (r.getKind() == REGEXP_RANGE)
554
    {
555
3
      lenOnePeriod = true;
556
    }
557
16
    if (!lenOnePeriod)
558
    {
559
7
      break;
560
    }
561
    else
562
    {
563
18
      Node regexp_ch = nm->mkNode(STRING_IN_REGEXP, substr_ch, r);
564
9
      regexp_ch = Rewriter::rewrite(regexp_ch);
565
9
      Assert(regexp_ch.getKind() != STRING_IN_REGEXP);
566
9
      char_constraints.push_back(regexp_ch);
567
    }
568
  }
569
15
  if (lenOnePeriod)
570
  {
571
8
    Assert(!char_constraints.empty());
572
    Node bound = nm->mkNode(
573
16
        AND, nm->mkNode(LEQ, zero, index), nm->mkNode(LT, index, lenx));
574
16
    Node conc = char_constraints.size() == 1 ? char_constraints[0]
575
24
                                             : nm->mkNode(OR, char_constraints);
576
16
    Node body = nm->mkNode(OR, bound.negate(), conc);
577
16
    Node bvl = nm->mkNode(BOUND_VAR_LIST, index);
578
16
    Node res = utils::mkForallInternal(bvl, body);
579
    // e.g.
580
    //   x in (re.* (re.union "A" "B" )) --->
581
    //   forall k. 0<=k<len(x) => (substr(x,k,1) in "A" OR substr(x,k,1) in "B")
582
8
    return returnElim(atom, res, "star-char");
583
  }
584
  // otherwise, for stars of constant length these are periodic
585
7
  if (disj.size() == 1)
586
  {
587
10
    Node r = disj[0];
588
6
    if (r.getKind() == STRING_TO_REGEXP)
589
    {
590
10
      Node s = r[0];
591
6
      if (s.isConst())
592
      {
593
4
        Node lens = nm->mkNode(STRING_LENGTH, s);
594
2
        lens = Rewriter::rewrite(lens);
595
2
        Assert(lens.isConst());
596
2
        Assert(lens.getConst<Rational>().sgn() > 0);
597
4
        std::vector<Node> conj;
598
        // lens is a positive constant, so it is safe to use total div/mod here.
599
        Node bound = nm->mkNode(
600
            AND,
601
4
            nm->mkNode(LEQ, zero, index),
602
8
            nm->mkNode(LT, index, nm->mkNode(INTS_DIVISION_TOTAL, lenx, lens)));
603
        Node conc =
604
4
            nm->mkNode(STRING_SUBSTR, x, nm->mkNode(MULT, index, lens), lens)
605
4
                .eqNode(s);
606
4
        Node body = nm->mkNode(OR, bound.negate(), conc);
607
4
        Node bvl = nm->mkNode(BOUND_VAR_LIST, index);
608
4
        Node res = utils::mkForallInternal(bvl, body);
609
6
        res = nm->mkNode(
610
4
            AND, nm->mkNode(INTS_MODULUS_TOTAL, lenx, lens).eqNode(zero), res);
611
        // e.g.
612
        //    x in ("abc")* --->
613
        //    forall k. 0 <= k < (len( x ) div 3) => substr(x,3*k,3) = "abc" ^
614
        //    len(x) mod 3 = 0
615
2
        return returnElim(atom, res, "star-constant");
616
      }
617
    }
618
  }
619
5
  return Node::null();
620
}
621
622
50
Node RegExpElimination::returnElim(Node atom, Node atomElim, const char* id)
623
{
624
100
  Trace("re-elim") << "re-elim: " << atom << " to " << atomElim << " by " << id
625
50
                   << "." << std::endl;
626
50
  return atomElim;
627
}
628
50
bool RegExpElimination::isProofEnabled() const { return d_pnm != nullptr; }
629
630
}  // namespace strings
631
}  // namespace theory
632
22746
}  // namespace cvc5