GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_elim.cpp Lines: 278 299 93.0 %
Date: 2021-08-06 Branches: 620 1374 45.1 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Mathias Preiner, Andres Noetzli
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of techniques for eliminating regular expressions.
14
 */
15
16
#include "theory/strings/regexp_elim.h"
17
18
#include "options/strings_options.h"
19
#include "proof/proof_node_manager.h"
20
#include "theory/rewriter.h"
21
#include "theory/strings/regexp_entail.h"
22
#include "theory/strings/theory_strings_utils.h"
23
#include "util/rational.h"
24
#include "util/string.h"
25
26
using namespace cvc5::kind;
27
28
namespace cvc5 {
29
namespace theory {
30
namespace strings {
31
32
9853
RegExpElimination::RegExpElimination(bool isAgg,
33
                                     ProofNodeManager* pnm,
34
9853
                                     context::Context* c)
35
    : d_isAggressive(isAgg),
36
      d_pnm(pnm),
37
      d_epg(pnm == nullptr
38
                ? nullptr
39
9853
                : new EagerProofGenerator(pnm, c, "RegExpElimination::epg"))
40
{
41
9853
}
42
43
138
Node RegExpElimination::eliminate(Node atom, bool isAgg)
44
{
45
138
  Assert(atom.getKind() == STRING_IN_REGEXP);
46
138
  if (atom[1].getKind() == REGEXP_CONCAT)
47
  {
48
96
    return eliminateConcat(atom, isAgg);
49
  }
50
42
  else if (atom[1].getKind() == REGEXP_STAR)
51
  {
52
42
    return eliminateStar(atom, isAgg);
53
  }
54
  return Node::null();
55
}
56
57
134
TrustNode RegExpElimination::eliminateTrusted(Node atom)
58
{
59
268
  Node eatom = eliminate(atom, d_isAggressive);
60
134
  if (!eatom.isNull())
61
  {
62
    // Currently aggressive doesnt work due to fresh bound variables
63
92
    if (isProofEnabled() && !d_isAggressive)
64
    {
65
8
      Node eq = atom.eqNode(eatom);
66
8
      Node aggn = NodeManager::currentNM()->mkConst(d_isAggressive);
67
      std::shared_ptr<ProofNode> pn =
68
8
          d_pnm->mkNode(PfRule::RE_ELIM, {}, {atom, aggn}, eq);
69
4
      d_epg->setProofFor(eq, pn);
70
4
      return TrustNode::mkTrustRewrite(atom, eatom, d_epg.get());
71
    }
72
88
    return TrustNode::mkTrustRewrite(atom, eatom, nullptr);
73
  }
74
42
  return TrustNode::null();
75
}
76
77
96
Node RegExpElimination::eliminateConcat(Node atom, bool isAgg)
78
{
79
96
  NodeManager* nm = NodeManager::currentNM();
80
192
  Node x = atom[0];
81
192
  Node lenx = nm->mkNode(STRING_LENGTH, x);
82
192
  Node re = atom[1];
83
192
  Node zero = nm->mkConst(Rational(0));
84
192
  std::vector<Node> children;
85
96
  utils::getConcat(re, children);
86
87
  // If it can be reduced to memberships in fixed length regular expressions.
88
  // This includes concatenations where at most one child is of the form
89
  // (re.* re.allchar), which we abbreviate _* below, and all other children
90
  // have a fixed length.
91
  // The intuition why this is a "non-aggressive" rewrite is that membership
92
  // into fixed length regular expressions are easy to handle.
93
  // the index of _* in re
94
96
  unsigned pivotIndex = 0;
95
96
  bool hasPivotIndex = false;
96
96
  bool hasFixedLength = true;
97
192
  std::vector<Node> childLengths;
98
192
  std::vector<Node> childLengthsPostPivot;
99
484
  for (unsigned i = 0, size = children.size(); i < size; i++)
100
  {
101
776
    Node c = children[i];
102
776
    Node fl = RegExpEntail::getFixedLengthForRegexp(c);
103
388
    if (fl.isNull())
104
    {
105
292
      if (!hasPivotIndex && c.getKind() == REGEXP_STAR
106
267
          && c[0].getKind() == REGEXP_SIGMA)
107
      {
108
21
        hasPivotIndex = true;
109
21
        pivotIndex = i;
110
        // zero is used in sum below and is used for concat-fixed-len
111
21
        fl = zero;
112
      }
113
      else
114
      {
115
83
        hasFixedLength = false;
116
      }
117
    }
118
388
    if (!fl.isNull())
119
    {
120
305
      childLengths.push_back(fl);
121
305
      if (hasPivotIndex)
122
      {
123
131
        childLengthsPostPivot.push_back(fl);
124
      }
125
    }
126
  }
127
96
  Node lenSum = childLengths.size() > 1
128
                    ? nm->mkNode(PLUS, childLengths)
129
192
                    : (childLengths.empty() ? zero : childLengths[0]);
130
  // if we have a fixed length
131
96
  if (hasFixedLength)
132
  {
133
29
    Assert(re.getNumChildren() == children.size());
134
58
    std::vector<Node> conc;
135
29
    conc.push_back(nm->mkNode(hasPivotIndex ? GEQ : EQUAL, lenx, lenSum));
136
58
    Node currEnd = zero;
137
204
    for (unsigned i = 0, size = childLengths.size(); i < size; i++)
138
    {
139
175
      if (hasPivotIndex && i == pivotIndex)
140
      {
141
6
        Node ppSum = childLengthsPostPivot.size() == 1
142
                         ? childLengthsPostPivot[0]
143
12
                         : nm->mkNode(PLUS, childLengthsPostPivot);
144
12
        currEnd = nm->mkNode(MINUS, lenx, ppSum);
145
      }
146
      else
147
      {
148
338
        Node curr = nm->mkNode(STRING_SUBSTR, x, currEnd, childLengths[i]);
149
        // We do not need to include memberships of the form
150
        //   (str.substr x n 1) in re.allchar
151
        // since we know that by construction, n < len( x ).
152
169
        if (re[i].getKind() != REGEXP_SIGMA)
153
        {
154
184
          Node currMem = nm->mkNode(STRING_IN_REGEXP, curr, re[i]);
155
92
          conc.push_back(currMem);
156
        }
157
169
        currEnd = nm->mkNode(PLUS, currEnd, childLengths[i]);
158
169
        currEnd = Rewriter::rewrite(currEnd);
159
      }
160
    }
161
58
    Node res = nm->mkNode(AND, conc);
162
    // For example:
163
    //   x in re.++(re.union(re.range("A", "J"), re.range("N", "Z")), "AB") -->
164
    //   len( x ) = 3 ^
165
    //   substr(x,0,1) in re.union(re.range("A", "J"), re.range("N", "Z")) ^
166
    //   substr(x,1,2) in "AB"
167
    // An example with a pivot index:
168
    //   x in re.++( "AB" ++ _* ++ "C" ) -->
169
    //   len( x ) >= 3 ^
170
    //   substr( x, 0, 2 ) in "AB" ^
171
    //   substr( x, len( x ) - 1, 1 ) in "C"
172
29
    return returnElim(atom, res, "concat-fixed-len");
173
  }
174
175
  // memberships of the form x in re.++ * s1 * ... * sn *, where * are
176
  // any number of repetitions (exact or indefinite) of re.allchar.
177
67
  Trace("re-elim-debug") << "Try re concat with gaps " << atom << std::endl;
178
67
  bool onlySigmasAndConsts = true;
179
134
  std::vector<Node> sep_children;
180
134
  std::vector<unsigned> gap_minsize;
181
134
  std::vector<bool> gap_exact;
182
  // the first gap is initially strict zero
183
67
  gap_minsize.push_back(0);
184
67
  gap_exact.push_back(true);
185
182
  for (const Node& c : children)
186
  {
187
167
    Trace("re-elim-debug") << "  " << c << std::endl;
188
167
    onlySigmasAndConsts = false;
189
167
    if (c.getKind() == STRING_TO_REGEXP)
190
    {
191
75
      onlySigmasAndConsts = true;
192
75
      sep_children.push_back(c[0]);
193
      // the next gap is initially strict zero
194
75
      gap_minsize.push_back(0);
195
75
      gap_exact.push_back(true);
196
    }
197
92
    else if (c.getKind() == REGEXP_STAR && c[0].getKind() == REGEXP_SIGMA)
198
    {
199
      // found a gap of any size
200
35
      onlySigmasAndConsts = true;
201
35
      gap_exact[gap_exact.size() - 1] = false;
202
    }
203
57
    else if (c.getKind() == REGEXP_SIGMA)
204
    {
205
      // add one to the minimum size of the gap
206
5
      onlySigmasAndConsts = true;
207
5
      gap_minsize[gap_minsize.size() - 1]++;
208
    }
209
167
    if (!onlySigmasAndConsts)
210
    {
211
52
      Trace("re-elim-debug") << "...cannot handle " << c << std::endl;
212
52
      break;
213
    }
214
  }
215
  // we should always rewrite concatenations that are purely re.allchar
216
  // and re.*( re.allchar ).
217
67
  Assert(!onlySigmasAndConsts || !sep_children.empty());
218
67
  if (onlySigmasAndConsts && !sep_children.empty())
219
  {
220
15
    bool canProcess = true;
221
15
    std::vector<Node> conj;
222
    // The following constructs a set of constraints that encodes that a
223
    // set of string terms are found, in order, in string x.
224
    // prev_end stores the current (symbolic) index in x that we are
225
    // searching.
226
15
    Node prev_end = zero;
227
    // the symbolic index we start searching, for each child in sep_children.
228
15
    std::vector<Node> prev_ends;
229
15
    unsigned gap_minsize_end = gap_minsize.back();
230
15
    bool gap_exact_end = gap_exact.back();
231
15
    std::vector<Node> non_greedy_find_vars;
232
60
    for (unsigned i = 0, size = sep_children.size(); i < size; i++)
233
    {
234
45
      if (gap_minsize[i] > 0)
235
      {
236
        // the gap to this child is at least gap_minsize[i]
237
5
        prev_end =
238
10
            nm->mkNode(PLUS, prev_end, nm->mkConst(Rational(gap_minsize[i])));
239
      }
240
45
      prev_ends.push_back(prev_end);
241
90
      Node sc = sep_children[i];
242
90
      Node lensc = nm->mkNode(STRING_LENGTH, sc);
243
45
      if (gap_exact[i])
244
      {
245
        // if the gap is exact, it is a substring constraint
246
30
        Node curr = prev_end;
247
30
        Node ss = nm->mkNode(STRING_SUBSTR, x, curr, lensc);
248
15
        conj.push_back(ss.eqNode(sc));
249
15
        prev_end = nm->mkNode(PLUS, curr, lensc);
250
      }
251
      else
252
      {
253
        // otherwise, we can use indexof to represent some next occurrence
254
30
        if (gap_exact[i + 1] && i + 1 != size)
255
        {
256
          if (!isAgg)
257
          {
258
            canProcess = false;
259
            break;
260
          }
261
          // if the gap after this one is strict, we need a non-greedy find
262
          // thus, we add a symbolic constant
263
          Node k = nm->mkBoundVar(nm->integerType());
264
          non_greedy_find_vars.push_back(k);
265
          prev_end = nm->mkNode(PLUS, prev_end, k);
266
        }
267
60
        Node curr = nm->mkNode(STRING_INDEXOF, x, sc, prev_end);
268
60
        Node idofFind = curr.eqNode(nm->mkConst(Rational(-1))).negate();
269
30
        conj.push_back(idofFind);
270
30
        prev_end = nm->mkNode(PLUS, curr, lensc);
271
      }
272
    }
273
274
15
    if (canProcess)
275
    {
276
      // since sep_children is non-empty, conj is non-empty
277
15
      Assert(!conj.empty());
278
      // Process the last gap, if necessary.
279
      // Notice that if the last gap is not exact and its minsize is zero,
280
      // then the last indexof/substr constraint entails the following
281
      // constraint, so it is not necessary to add.
282
      // Below, we may write "A" for (str.to.re "A") and _ for re.allchar:
283
30
      Node cEnd = nm->mkConst(Rational(gap_minsize_end));
284
15
      if (gap_exact_end)
285
      {
286
10
        Assert(!sep_children.empty());
287
        // if it is strict, it corresponds to a substr case.
288
        // For example:
289
        //     x in (re.++ "A" (re.* _) "B" _ _) --->
290
        //        ... ^ "B" = substr( x, len( x ) - 3, 1 )  ^ ...
291
20
        Node sc = sep_children.back();
292
20
        Node lenSc = nm->mkNode(STRING_LENGTH, sc);
293
20
        Node loc = nm->mkNode(MINUS, lenx, nm->mkNode(PLUS, lenSc, cEnd));
294
20
        Node scc = sc.eqNode(nm->mkNode(STRING_SUBSTR, x, loc, lenSc));
295
        // We also must ensure that we fit. This constraint is necessary in
296
        // addition to the constraint above. Take this example:
297
        //     x in (re.++ "A" _ (re.* _) "B" _) --->
298
        //       substr( x, 0, 1 ) = "A" ^             // find "A"
299
        //       indexof( x, "B", 2 ) != -1 ^          // find "B" >=1 after "A"
300
        //       substr( x, len(x)-2, 1 ) = "B" ^      // "B" is at end - 2.
301
        //       indexof( x, "B", 2 ) <= len( x ) - 2
302
        // The last constaint ensures that the second and third constraints
303
        // may refer to the same "B". If it were not for the last constraint, it
304
        // would have been the case than "ABB" would be a model for x, where
305
        // the second constraint refers to the third position, and the third
306
        // constraint refers to the second position.
307
        //
308
        // With respect to the above example, the following is an optimization.
309
        // For that example, we instead produce:
310
        //     x in (re.++ "A" _ (re.* _) "B" _) --->
311
        //       substr( x, 0, 1 ) = "A" ^          // find "A"
312
        //       substr( x, len(x)-2, 1 ) = "B" ^   // "B" is at end - 2
313
        //       2 <= len( x ) - 2
314
        // The intuition is that above, there are two constraints that insist
315
        // that "B" is found, whereas we only need one. The last constraint
316
        // above says that the "B" we find at end-2 can be found >=1 after
317
        // the "A".
318
10
        conj.pop_back();
319
20
        Node fit = nm->mkNode(gap_exact[sep_children.size() - 1] ? EQUAL : LEQ,
320
10
                              prev_ends.back(),
321
50
                              loc);
322
323
10
        conj.push_back(scc);
324
10
        conj.push_back(fit);
325
      }
326
5
      else if (gap_minsize_end > 0)
327
      {
328
        // if it is non-strict, we are in a "greedy find" situtation where
329
        // we just need to ensure that the next occurrence fits.
330
        // For example:
331
        //     x in (re.++ "A" (re.* _) "B" _ _ (re.* _)) --->
332
        //        ... ^ indexof( x, "B", 1 ) + 2 <= len( x )
333
        Node fit = nm->mkNode(LEQ, nm->mkNode(PLUS, prev_end, cEnd), lenx);
334
        conj.push_back(fit);
335
      }
336
30
      Node res = nm->mkAnd(conj);
337
      // process the non-greedy find variables
338
15
      if (!non_greedy_find_vars.empty())
339
      {
340
        std::vector<Node> children2;
341
        for (const Node& v : non_greedy_find_vars)
342
        {
343
          Node bound = nm->mkNode(
344
              AND, nm->mkNode(LEQ, zero, v), nm->mkNode(LT, v, lenx));
345
          children2.push_back(bound);
346
        }
347
        children2.push_back(res);
348
        Node body = nm->mkNode(AND, children2);
349
        Node bvl = nm->mkNode(BOUND_VAR_LIST, non_greedy_find_vars);
350
        res = utils::mkForallInternal(bvl, body.negate()).negate();
351
      }
352
      // must also give a minimum length requirement
353
15
      res = nm->mkNode(AND, res, nm->mkNode(GEQ, lenx, lenSum));
354
      // Examples of this elimination:
355
      //   x in (re.++ "A" (re.* _) "B" (re.* _)) --->
356
      //     substr(x,0,1)="A" ^ indexof(x,"B",1)!=-1
357
      //   x in (re.++ (re.* _) "A" _ _ _ (re.* _) "B" _ _ (re.* _)) --->
358
      //     indexof(x,"A",0)!=-1 ^
359
      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 ) != -1 ^
360
      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x) ^
361
      //     len(x) >= 7
362
363
      // An example of a non-greedy find:
364
      //   x in re.++( re.*( _ ), "A", _, "B", re.*( _ ) ) --->
365
      //     (exists k. 0 <= k < len( x ) ^
366
      //               indexof( x, "A", k ) != -1 ^
367
      //               substr( x, indexof( x, "A", k )+2, 1 ) = "B") ^
368
      //     len(x) >= 3
369
15
      return returnElim(atom, res, "concat-with-gaps");
370
    }
371
  }
372
373
52
  if (!isAgg)
374
  {
375
20
    return Node::null();
376
  }
377
  // only aggressive rewrites below here
378
379
  // if the first or last child is constant string, we can split the membership
380
  // into a conjunction of two memberships.
381
64
  Node sStartIndex = zero;
382
64
  Node sLength = lenx;
383
64
  std::vector<Node> sConstraints;
384
64
  std::vector<Node> rexpElimChildren;
385
32
  unsigned nchildren = children.size();
386
32
  Assert(nchildren > 1);
387
96
  for (unsigned r = 0; r < 2; r++)
388
  {
389
64
    unsigned index = r == 0 ? 0 : nchildren - 1;
390
128
    Node c = children[index];
391
64
    if (c.getKind() == STRING_TO_REGEXP)
392
    {
393
28
      Assert(children[index + (r == 0 ? 1 : -1)].getKind() != STRING_TO_REGEXP);
394
56
      Node s = c[0];
395
56
      Node lens = nm->mkNode(STRING_LENGTH, s);
396
56
      Node sss = r == 0 ? zero : nm->mkNode(MINUS, lenx, lens);
397
56
      Node ss = nm->mkNode(STRING_SUBSTR, x, sss, lens);
398
28
      sConstraints.push_back(ss.eqNode(s));
399
28
      if (r == 0)
400
      {
401
26
        sStartIndex = lens;
402
      }
403
2
      else if (r == 1 && sConstraints.size() == 2)
404
      {
405
        // first and last children cannot overlap
406
4
        Node bound = nm->mkNode(GEQ, sss, sStartIndex);
407
2
        sConstraints.push_back(bound);
408
      }
409
28
      sLength = nm->mkNode(MINUS, sLength, lens);
410
    }
411
64
    if (r == 1 && !sConstraints.empty())
412
    {
413
      // add the middle children
414
28
      for (unsigned i = 1; i < (nchildren - 1); i++)
415
      {
416
2
        rexpElimChildren.push_back(children[i]);
417
      }
418
    }
419
64
    if (c.getKind() != STRING_TO_REGEXP)
420
    {
421
36
      rexpElimChildren.push_back(c);
422
    }
423
  }
424
32
  if (!sConstraints.empty())
425
  {
426
52
    Node ss = nm->mkNode(STRING_SUBSTR, x, sStartIndex, sLength);
427
26
    Assert(!rexpElimChildren.empty());
428
52
    Node regElim = utils::mkConcat(rexpElimChildren, nm->regExpType());
429
26
    sConstraints.push_back(nm->mkNode(STRING_IN_REGEXP, ss, regElim));
430
52
    Node ret = nm->mkNode(AND, sConstraints);
431
    // e.g.
432
    // x in re.++( "A", R ) ---> substr(x,0,1)="A" ^ substr(x,1,len(x)-1) in R
433
26
    return returnElim(atom, ret, "concat-splice");
434
  }
435
6
  Assert(nchildren > 1);
436
12
  for (unsigned i = 0; i < nchildren; i++)
437
  {
438
12
    if (children[i].getKind() == STRING_TO_REGEXP)
439
    {
440
12
      Node s = children[i][0];
441
12
      Node lens = nm->mkNode(STRING_LENGTH, s);
442
      // there exists an index in this string such that the substring is this
443
12
      Node k;
444
12
      std::vector<Node> echildren;
445
6
      if (i == 0)
446
      {
447
        k = zero;
448
      }
449
6
      else if (i + 1 == nchildren)
450
      {
451
        k = nm->mkNode(MINUS, lenx, lens);
452
      }
453
      else
454
      {
455
6
        k = nm->mkBoundVar(nm->integerType());
456
        Node bound =
457
            nm->mkNode(AND,
458
12
                       nm->mkNode(LEQ, zero, k),
459
24
                       nm->mkNode(LEQ, k, nm->mkNode(MINUS, lenx, lens)));
460
6
        echildren.push_back(bound);
461
      }
462
12
      Node substrEq = nm->mkNode(STRING_SUBSTR, x, k, lens).eqNode(s);
463
6
      echildren.push_back(substrEq);
464
6
      if (i > 0)
465
      {
466
12
        std::vector<Node> rprefix;
467
6
        rprefix.insert(rprefix.end(), children.begin(), children.begin() + i);
468
12
        Node rpn = utils::mkConcat(rprefix, nm->regExpType());
469
        Node substrPrefix = nm->mkNode(
470
12
            STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, zero, k), rpn);
471
6
        echildren.push_back(substrPrefix);
472
      }
473
6
      if (i + 1 < nchildren)
474
      {
475
12
        std::vector<Node> rsuffix;
476
6
        rsuffix.insert(rsuffix.end(), children.begin() + i + 1, children.end());
477
12
        Node rps = utils::mkConcat(rsuffix, nm->regExpType());
478
12
        Node ks = nm->mkNode(PLUS, k, lens);
479
        Node substrSuffix = nm->mkNode(
480
            STRING_IN_REGEXP,
481
12
            nm->mkNode(STRING_SUBSTR, x, ks, nm->mkNode(MINUS, lenx, ks)),
482
24
            rps);
483
6
        echildren.push_back(substrSuffix);
484
      }
485
12
      Node body = nm->mkNode(AND, echildren);
486
6
      if (k.getKind() == BOUND_VARIABLE)
487
      {
488
12
        Node bvl = nm->mkNode(BOUND_VAR_LIST, k);
489
6
        body = utils::mkForallInternal(bvl, body.negate()).negate();
490
      }
491
      // e.g. x in re.++( R1, "AB", R2 ) --->
492
      //  exists k.
493
      //    0 <= k <= (len(x)-2) ^
494
      //    substr( x, k, 2 ) = "AB" ^
495
      //    substr( x, 0, k ) in R1 ^
496
      //    substr( x, k+2, len(x)-(k+2) ) in R2
497
6
      return returnElim(atom, body, "concat-find");
498
    }
499
  }
500
  return Node::null();
501
}
502
503
42
Node RegExpElimination::eliminateStar(Node atom, bool isAgg)
504
{
505
42
  if (!isAgg)
506
  {
507
14
    return Node::null();
508
  }
509
  // only aggressive rewrites below here
510
511
28
  NodeManager* nm = NodeManager::currentNM();
512
56
  Node x = atom[0];
513
56
  Node lenx = nm->mkNode(STRING_LENGTH, x);
514
56
  Node re = atom[1];
515
56
  Node zero = nm->mkConst(Rational(0));
516
  // for regular expression star,
517
  // if the period is a fixed constant, we can turn it into a bounded
518
  // quantifier
519
56
  std::vector<Node> disj;
520
28
  if (re[0].getKind() == REGEXP_UNION)
521
  {
522
12
    for (const Node& r : re[0])
523
    {
524
8
      disj.push_back(r);
525
    }
526
  }
527
  else
528
  {
529
24
    disj.push_back(re[0]);
530
  }
531
28
  bool lenOnePeriod = true;
532
56
  std::vector<Node> char_constraints;
533
56
  Node index = nm->mkBoundVar(nm->integerType());
534
  Node substr_ch =
535
56
      nm->mkNode(STRING_SUBSTR, x, index, nm->mkConst(Rational(1)));
536
28
  substr_ch = Rewriter::rewrite(substr_ch);
537
  // handle the case where it is purely characters
538
50
  for (const Node& r : disj)
539
  {
540
32
    Assert(r.getKind() != REGEXP_UNION);
541
32
    Assert(r.getKind() != REGEXP_SIGMA);
542
32
    lenOnePeriod = false;
543
    // lenOnePeriod is true if this regular expression is a single character
544
    // regular expression
545
32
    if (r.getKind() == STRING_TO_REGEXP)
546
    {
547
46
      Node s = r[0];
548
23
      if (s.isConst() && s.getConst<String>().size() == 1)
549
      {
550
17
        lenOnePeriod = true;
551
      }
552
    }
553
9
    else if (r.getKind() == REGEXP_RANGE)
554
    {
555
5
      lenOnePeriod = true;
556
    }
557
32
    if (!lenOnePeriod)
558
    {
559
10
      break;
560
    }
561
    else
562
    {
563
44
      Node regexp_ch = nm->mkNode(STRING_IN_REGEXP, substr_ch, r);
564
22
      regexp_ch = Rewriter::rewrite(regexp_ch);
565
22
      Assert(regexp_ch.getKind() != STRING_IN_REGEXP);
566
22
      char_constraints.push_back(regexp_ch);
567
    }
568
  }
569
28
  if (lenOnePeriod)
570
  {
571
18
    Assert(!char_constraints.empty());
572
    Node bound = nm->mkNode(
573
36
        AND, nm->mkNode(LEQ, zero, index), nm->mkNode(LT, index, lenx));
574
36
    Node conc = char_constraints.size() == 1 ? char_constraints[0]
575
54
                                             : nm->mkNode(OR, char_constraints);
576
36
    Node body = nm->mkNode(OR, bound.negate(), conc);
577
36
    Node bvl = nm->mkNode(BOUND_VAR_LIST, index);
578
36
    Node res = utils::mkForallInternal(bvl, body);
579
    // e.g.
580
    //   x in (re.* (re.union "A" "B" )) --->
581
    //   forall k. 0<=k<len(x) => (substr(x,k,1) in "A" OR substr(x,k,1) in "B")
582
18
    return returnElim(atom, res, "star-char");
583
  }
584
  // otherwise, for stars of constant length these are periodic
585
10
  if (disj.size() == 1)
586
  {
587
10
    Node r = disj[0];
588
6
    if (r.getKind() == STRING_TO_REGEXP)
589
    {
590
10
      Node s = r[0];
591
6
      if (s.isConst())
592
      {
593
4
        Node lens = nm->mkNode(STRING_LENGTH, s);
594
2
        lens = Rewriter::rewrite(lens);
595
2
        Assert(lens.isConst());
596
2
        Assert(lens.getConst<Rational>().sgn() > 0);
597
4
        std::vector<Node> conj;
598
        // lens is a positive constant, so it is safe to use total div/mod here.
599
        Node bound = nm->mkNode(
600
            AND,
601
4
            nm->mkNode(LEQ, zero, index),
602
8
            nm->mkNode(LT, index, nm->mkNode(INTS_DIVISION_TOTAL, lenx, lens)));
603
        Node conc =
604
4
            nm->mkNode(STRING_SUBSTR, x, nm->mkNode(MULT, index, lens), lens)
605
4
                .eqNode(s);
606
4
        Node body = nm->mkNode(OR, bound.negate(), conc);
607
4
        Node bvl = nm->mkNode(BOUND_VAR_LIST, index);
608
4
        Node res = utils::mkForallInternal(bvl, body);
609
6
        res = nm->mkNode(
610
4
            AND, nm->mkNode(INTS_MODULUS_TOTAL, lenx, lens).eqNode(zero), res);
611
        // e.g.
612
        //    x in ("abc")* --->
613
        //    forall k. 0 <= k < (len( x ) div 3) => substr(x,3*k,3) = "abc" ^
614
        //    len(x) mod 3 = 0
615
2
        return returnElim(atom, res, "star-constant");
616
      }
617
    }
618
  }
619
8
  return Node::null();
620
}
621
622
96
Node RegExpElimination::returnElim(Node atom, Node atomElim, const char* id)
623
{
624
192
  Trace("re-elim") << "re-elim: " << atom << " to " << atomElim << " by " << id
625
96
                   << "." << std::endl;
626
96
  return atomElim;
627
}
628
92
bool RegExpElimination::isProofEnabled() const { return d_pnm != nullptr; }
629
630
}  // namespace strings
631
}  // namespace theory
632
29322
}  // namespace cvc5