GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_elim.cpp Lines: 231 295 78.3 %
Date: 2021-05-22 Branches: 465 1356 34.3 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Mathias Preiner, Andres Noetzli
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of techniques for eliminating regular expressions.
14
 */
15
16
#include "theory/strings/regexp_elim.h"
17
18
#include "expr/proof_node_manager.h"
19
#include "options/strings_options.h"
20
#include "theory/rewriter.h"
21
#include "theory/strings/regexp_entail.h"
22
#include "theory/strings/theory_strings_utils.h"
23
24
using namespace cvc5::kind;
25
26
namespace cvc5 {
27
namespace theory {
28
namespace strings {
29
30
9460
RegExpElimination::RegExpElimination(bool isAgg,
31
                                     ProofNodeManager* pnm,
32
9460
                                     context::Context* c)
33
    : d_isAggressive(isAgg),
34
      d_pnm(pnm),
35
      d_epg(pnm == nullptr
36
                ? nullptr
37
9460
                : new EagerProofGenerator(pnm, c, "RegExpElimination::epg"))
38
{
39
9460
}
40
41
58
Node RegExpElimination::eliminate(Node atom, bool isAgg)
42
{
43
58
  Assert(atom.getKind() == STRING_IN_REGEXP);
44
58
  if (atom[1].getKind() == REGEXP_CONCAT)
45
  {
46
38
    return eliminateConcat(atom, isAgg);
47
  }
48
20
  else if (atom[1].getKind() == REGEXP_STAR)
49
  {
50
20
    return eliminateStar(atom, isAgg);
51
  }
52
  return Node::null();
53
}
54
55
56
TrustNode RegExpElimination::eliminateTrusted(Node atom)
56
{
57
112
  Node eatom = eliminate(atom, d_isAggressive);
58
56
  if (!eatom.isNull())
59
  {
60
    // Currently aggressive doesnt work due to fresh bound variables
61
46
    if (isProofEnabled() && !d_isAggressive)
62
    {
63
4
      Node eq = atom.eqNode(eatom);
64
4
      Node aggn = NodeManager::currentNM()->mkConst(d_isAggressive);
65
      std::shared_ptr<ProofNode> pn =
66
4
          d_pnm->mkNode(PfRule::RE_ELIM, {}, {atom, aggn}, eq);
67
2
      d_epg->setProofFor(eq, pn);
68
2
      return TrustNode::mkTrustRewrite(atom, eatom, d_epg.get());
69
    }
70
44
    return TrustNode::mkTrustRewrite(atom, eatom, nullptr);
71
  }
72
10
  return TrustNode::null();
73
}
74
75
38
Node RegExpElimination::eliminateConcat(Node atom, bool isAgg)
76
{
77
38
  NodeManager* nm = NodeManager::currentNM();
78
76
  Node x = atom[0];
79
76
  Node lenx = nm->mkNode(STRING_LENGTH, x);
80
76
  Node re = atom[1];
81
76
  Node zero = nm->mkConst(Rational(0));
82
76
  std::vector<Node> children;
83
38
  utils::getConcat(re, children);
84
85
  // If it can be reduced to memberships in fixed length regular expressions.
86
  // This includes concatenations where at most one child is of the form
87
  // (re.* re.allchar), which we abbreviate _* below, and all other children
88
  // have a fixed length.
89
  // The intuition why this is a "non-aggressive" rewrite is that membership
90
  // into fixed length regular expressions are easy to handle.
91
38
  bool hasFixedLength = true;
92
  // the index of _* in re
93
38
  unsigned pivotIndex = 0;
94
38
  bool hasPivotIndex = false;
95
76
  std::vector<Node> childLengths;
96
76
  std::vector<Node> childLengthsPostPivot;
97
228
  for (unsigned i = 0, size = children.size(); i < size; i++)
98
  {
99
394
    Node c = children[i];
100
394
    Node fl = RegExpEntail::getFixedLengthForRegexp(c);
101
204
    if (fl.isNull())
102
    {
103
80
      if (!hasPivotIndex && c.getKind() == REGEXP_STAR
104
80
          && c[0].getKind() == REGEXP_SIGMA)
105
      {
106
16
        hasPivotIndex = true;
107
16
        pivotIndex = i;
108
        // set to zero for the sum below
109
16
        fl = zero;
110
      }
111
      else
112
      {
113
14
        hasFixedLength = false;
114
14
        break;
115
      }
116
    }
117
190
    childLengths.push_back(fl);
118
190
    if (hasPivotIndex)
119
    {
120
106
      childLengthsPostPivot.push_back(fl);
121
    }
122
  }
123
38
  if (hasFixedLength)
124
  {
125
24
    Assert(re.getNumChildren() == children.size());
126
48
    Node sum = nm->mkNode(PLUS, childLengths);
127
48
    std::vector<Node> conc;
128
24
    conc.push_back(nm->mkNode(hasPivotIndex ? GEQ : EQUAL, lenx, sum));
129
48
    Node currEnd = zero;
130
184
    for (unsigned i = 0, size = childLengths.size(); i < size; i++)
131
    {
132
160
      if (hasPivotIndex && i == pivotIndex)
133
      {
134
6
        Node ppSum = childLengthsPostPivot.size() == 1
135
                         ? childLengthsPostPivot[0]
136
12
                         : nm->mkNode(PLUS, childLengthsPostPivot);
137
12
        currEnd = nm->mkNode(MINUS, lenx, ppSum);
138
      }
139
      else
140
      {
141
308
        Node curr = nm->mkNode(STRING_SUBSTR, x, currEnd, childLengths[i]);
142
        // We do not need to include memberships of the form
143
        //   (str.substr x n 1) in re.allchar
144
        // since we know that by construction, n < len( x ).
145
154
        if (re[i].getKind() != REGEXP_SIGMA)
146
        {
147
164
          Node currMem = nm->mkNode(STRING_IN_REGEXP, curr, re[i]);
148
82
          conc.push_back(currMem);
149
        }
150
154
        currEnd = nm->mkNode(PLUS, currEnd, childLengths[i]);
151
154
        currEnd = Rewriter::rewrite(currEnd);
152
      }
153
    }
154
48
    Node res = nm->mkNode(AND, conc);
155
    // For example:
156
    //   x in re.++(re.union(re.range("A", "J"), re.range("N", "Z")), "AB") -->
157
    //   len( x ) = 3 ^
158
    //   substr(x,0,1) in re.union(re.range("A", "J"), re.range("N", "Z")) ^
159
    //   substr(x,1,2) in "AB"
160
    // An example with a pivot index:
161
    //   x in re.++( "AB" ++ _* ++ "C" ) -->
162
    //   len( x ) >= 3 ^
163
    //   substr( x, 0, 2 ) in "AB" ^
164
    //   substr( x, len( x ) - 1, 1 ) in "C"
165
24
    return returnElim(atom, res, "concat-fixed-len");
166
  }
167
168
  // memberships of the form x in re.++ * s1 * ... * sn *, where * are
169
  // any number of repetitions (exact or indefinite) of re.allchar.
170
14
  Trace("re-elim-debug") << "Try re concat with gaps " << atom << std::endl;
171
14
  bool onlySigmasAndConsts = true;
172
28
  std::vector<Node> sep_children;
173
28
  std::vector<unsigned> gap_minsize;
174
28
  std::vector<bool> gap_exact;
175
  // the first gap is initially strict zero
176
14
  gap_minsize.push_back(0);
177
14
  gap_exact.push_back(true);
178
84
  for (const Node& c : children)
179
  {
180
74
    Trace("re-elim-debug") << "  " << c << std::endl;
181
74
    onlySigmasAndConsts = false;
182
74
    if (c.getKind() == STRING_TO_REGEXP)
183
    {
184
40
      onlySigmasAndConsts = true;
185
40
      sep_children.push_back(c[0]);
186
      // the next gap is initially strict zero
187
40
      gap_minsize.push_back(0);
188
40
      gap_exact.push_back(true);
189
    }
190
34
    else if (c.getKind() == REGEXP_STAR && c[0].getKind() == REGEXP_SIGMA)
191
    {
192
      // found a gap of any size
193
30
      onlySigmasAndConsts = true;
194
30
      gap_exact[gap_exact.size() - 1] = false;
195
    }
196
4
    else if (c.getKind() == REGEXP_SIGMA)
197
    {
198
      // add one to the minimum size of the gap
199
      onlySigmasAndConsts = true;
200
      gap_minsize[gap_minsize.size() - 1]++;
201
    }
202
74
    if (!onlySigmasAndConsts)
203
    {
204
4
      Trace("re-elim-debug") << "...cannot handle " << c << std::endl;
205
4
      break;
206
    }
207
  }
208
  // we should always rewrite concatenations that are purely re.allchar
209
  // and re.*( re.allchar ).
210
14
  Assert(!onlySigmasAndConsts || !sep_children.empty());
211
14
  if (onlySigmasAndConsts && !sep_children.empty())
212
  {
213
10
    bool canProcess = true;
214
10
    std::vector<Node> conj;
215
    // The following constructs a set of constraints that encodes that a
216
    // set of string terms are found, in order, in string x.
217
    // prev_end stores the current (symbolic) index in x that we are
218
    // searching.
219
10
    Node prev_end = zero;
220
    // the symbolic index we start searching, for each child in sep_children.
221
10
    std::vector<Node> prev_ends;
222
10
    unsigned gap_minsize_end = gap_minsize.back();
223
10
    bool gap_exact_end = gap_exact.back();
224
10
    std::vector<Node> non_greedy_find_vars;
225
50
    for (unsigned i = 0, size = sep_children.size(); i < size; i++)
226
    {
227
40
      if (gap_minsize[i] > 0)
228
      {
229
        // the gap to this child is at least gap_minsize[i]
230
        prev_end =
231
            nm->mkNode(PLUS, prev_end, nm->mkConst(Rational(gap_minsize[i])));
232
      }
233
40
      prev_ends.push_back(prev_end);
234
80
      Node sc = sep_children[i];
235
80
      Node lensc = nm->mkNode(STRING_LENGTH, sc);
236
40
      if (gap_exact[i])
237
      {
238
        // if the gap is exact, it is a substring constraint
239
20
        Node curr = prev_end;
240
20
        Node ss = nm->mkNode(STRING_SUBSTR, x, curr, lensc);
241
10
        conj.push_back(ss.eqNode(sc));
242
10
        prev_end = nm->mkNode(PLUS, curr, lensc);
243
      }
244
      else
245
      {
246
        // otherwise, we can use indexof to represent some next occurrence
247
30
        if (gap_exact[i + 1] && i + 1 != size)
248
        {
249
          if (!isAgg)
250
          {
251
            canProcess = false;
252
            break;
253
          }
254
          // if the gap after this one is strict, we need a non-greedy find
255
          // thus, we add a symbolic constant
256
          Node k = nm->mkBoundVar(nm->integerType());
257
          non_greedy_find_vars.push_back(k);
258
          prev_end = nm->mkNode(PLUS, prev_end, k);
259
        }
260
60
        Node curr = nm->mkNode(STRING_STRIDOF, x, sc, prev_end);
261
60
        Node idofFind = curr.eqNode(nm->mkConst(Rational(-1))).negate();
262
30
        conj.push_back(idofFind);
263
30
        prev_end = nm->mkNode(PLUS, curr, lensc);
264
      }
265
    }
266
267
10
    if (canProcess)
268
    {
269
      // since sep_children is non-empty, conj is non-empty
270
10
      Assert(!conj.empty());
271
      // Process the last gap, if necessary.
272
      // Notice that if the last gap is not exact and its minsize is zero,
273
      // then the last indexof/substr constraint entails the following
274
      // constraint, so it is not necessary to add.
275
      // Below, we may write "A" for (str.to.re "A") and _ for re.allchar:
276
20
      Node cEnd = nm->mkConst(Rational(gap_minsize_end));
277
10
      if (gap_exact_end)
278
      {
279
10
        Assert(!sep_children.empty());
280
        // if it is strict, it corresponds to a substr case.
281
        // For example:
282
        //     x in (re.++ "A" (re.* _) "B" _ _) --->
283
        //        ... ^ "B" = substr( x, len( x ) - 3, 1 )  ^ ...
284
20
        Node sc = sep_children.back();
285
20
        Node lenSc = nm->mkNode(STRING_LENGTH, sc);
286
20
        Node loc = nm->mkNode(MINUS, lenx, nm->mkNode(PLUS, lenSc, cEnd));
287
20
        Node scc = sc.eqNode(nm->mkNode(STRING_SUBSTR, x, loc, lenSc));
288
        // We also must ensure that we fit. This constraint is necessary in
289
        // addition to the constraint above. Take this example:
290
        //     x in (re.++ "A" _ (re.* _) "B" _) --->
291
        //       substr( x, 0, 1 ) = "A" ^             // find "A"
292
        //       indexof( x, "B", 2 ) != -1 ^          // find "B" >=1 after "A"
293
        //       substr( x, len(x)-2, 1 ) = "B" ^      // "B" is at end - 2.
294
        //       indexof( x, "B", 2 ) <= len( x ) - 2
295
        // The last constaint ensures that the second and third constraints
296
        // may refer to the same "B". If it were not for the last constraint, it
297
        // would have been the case than "ABB" would be a model for x, where
298
        // the second constraint refers to the third position, and the third
299
        // constraint refers to the second position.
300
        //
301
        // With respect to the above example, the following is an optimization.
302
        // For that example, we instead produce:
303
        //     x in (re.++ "A" _ (re.* _) "B" _) --->
304
        //       substr( x, 0, 1 ) = "A" ^          // find "A"
305
        //       substr( x, len(x)-2, 1 ) = "B" ^   // "B" is at end - 2
306
        //       2 <= len( x ) - 2
307
        // The intuition is that above, there are two constraints that insist
308
        // that "B" is found, whereas we only need one. The last constraint
309
        // above says that the "B" we find at end-2 can be found >=1 after
310
        // the "A".
311
10
        conj.pop_back();
312
20
        Node fit = nm->mkNode(gap_exact[sep_children.size() - 1] ? EQUAL : LEQ,
313
10
                              prev_ends.back(),
314
50
                              loc);
315
316
10
        conj.push_back(scc);
317
10
        conj.push_back(fit);
318
      }
319
      else if (gap_minsize_end > 0)
320
      {
321
        // if it is non-strict, we are in a "greedy find" situtation where
322
        // we just need to ensure that the next occurrence fits.
323
        // For example:
324
        //     x in (re.++ "A" (re.* _) "B" _ _ (re.* _)) --->
325
        //        ... ^ indexof( x, "B", 1 ) + 2 <= len( x )
326
        Node fit = nm->mkNode(LEQ, nm->mkNode(PLUS, prev_end, cEnd), lenx);
327
        conj.push_back(fit);
328
      }
329
20
      Node res = conj.size() == 1 ? conj[0] : nm->mkNode(AND, conj);
330
      // process the non-greedy find variables
331
10
      if (!non_greedy_find_vars.empty())
332
      {
333
        std::vector<Node> children2;
334
        for (const Node& v : non_greedy_find_vars)
335
        {
336
          Node bound = nm->mkNode(
337
              AND, nm->mkNode(LEQ, zero, v), nm->mkNode(LT, v, lenx));
338
          children2.push_back(bound);
339
        }
340
        children2.push_back(res);
341
        Node body = nm->mkNode(AND, children2);
342
        Node bvl = nm->mkNode(BOUND_VAR_LIST, non_greedy_find_vars);
343
        res = nm->mkNode(EXISTS, bvl, body);
344
      }
345
      // Examples of this elimination:
346
      //   x in (re.++ "A" (re.* _) "B" (re.* _)) --->
347
      //     substr(x,0,1)="A" ^ indexof(x,"B",1)!=-1
348
      //   x in (re.++ (re.* _) "A" _ _ _ (re.* _) "B" _ _ (re.* _)) --->
349
      //     indexof(x,"A",0)!=-1 ^
350
      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 ) != -1 ^
351
      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x)
352
353
      // An example of a non-greedy find:
354
      //   x in re.++( re.*( _ ), "A", _, "B", re.*( _ ) ) --->
355
      //     exists k. 0 <= k < len( x ) ^
356
      //               indexof( x, "A", k ) != -1 ^
357
      //               substr( x, indexof( x, "A", k )+2, 1 ) = "B"
358
10
      return returnElim(atom, res, "concat-with-gaps");
359
    }
360
  }
361
362
4
  if (!isAgg)
363
  {
364
    return Node::null();
365
  }
366
  // only aggressive rewrites below here
367
368
  // if the first or last child is constant string, we can split the membership
369
  // into a conjunction of two memberships.
370
8
  Node sStartIndex = zero;
371
8
  Node sLength = lenx;
372
8
  std::vector<Node> sConstraints;
373
8
  std::vector<Node> rexpElimChildren;
374
4
  unsigned nchildren = children.size();
375
4
  Assert(nchildren > 1);
376
12
  for (unsigned r = 0; r < 2; r++)
377
  {
378
8
    unsigned index = r == 0 ? 0 : nchildren - 1;
379
16
    Node c = children[index];
380
8
    if (c.getKind() == STRING_TO_REGEXP)
381
    {
382
      Assert(children[index + (r == 0 ? 1 : -1)].getKind() != STRING_TO_REGEXP);
383
      Node s = c[0];
384
      Node lens = nm->mkNode(STRING_LENGTH, s);
385
      Node sss = r == 0 ? zero : nm->mkNode(MINUS, lenx, lens);
386
      Node ss = nm->mkNode(STRING_SUBSTR, x, sss, lens);
387
      sConstraints.push_back(ss.eqNode(s));
388
      if (r == 0)
389
      {
390
        sStartIndex = lens;
391
      }
392
      sLength = nm->mkNode(MINUS, sLength, lens);
393
    }
394
8
    if (r == 1 && !sConstraints.empty())
395
    {
396
      // add the middle children
397
      for (unsigned i = 1; i < (nchildren - 1); i++)
398
      {
399
        rexpElimChildren.push_back(children[i]);
400
      }
401
    }
402
8
    if (c.getKind() != STRING_TO_REGEXP)
403
    {
404
8
      rexpElimChildren.push_back(c);
405
    }
406
  }
407
4
  if (!sConstraints.empty())
408
  {
409
    Assert(rexpElimChildren.size() + sConstraints.size() == nchildren);
410
    Node ss = nm->mkNode(STRING_SUBSTR, x, sStartIndex, sLength);
411
    Assert(!rexpElimChildren.empty());
412
    Node regElim = utils::mkConcat(rexpElimChildren, nm->regExpType());
413
    sConstraints.push_back(nm->mkNode(STRING_IN_REGEXP, ss, regElim));
414
    Node ret = nm->mkNode(AND, sConstraints);
415
    // e.g.
416
    // x in re.++( "A", R ) ---> substr(x,0,1)="A" ^ substr(x,1,len(x)-1) in R
417
    return returnElim(atom, ret, "concat-splice");
418
  }
419
4
  Assert(nchildren > 1);
420
8
  for (unsigned i = 0; i < nchildren; i++)
421
  {
422
8
    if (children[i].getKind() == STRING_TO_REGEXP)
423
    {
424
8
      Node s = children[i][0];
425
8
      Node lens = nm->mkNode(STRING_LENGTH, s);
426
      // there exists an index in this string such that the substring is this
427
8
      Node k;
428
8
      std::vector<Node> echildren;
429
4
      if (i == 0)
430
      {
431
        k = zero;
432
      }
433
4
      else if (i + 1 == nchildren)
434
      {
435
        k = nm->mkNode(MINUS, lenx, lens);
436
      }
437
      else
438
      {
439
4
        k = nm->mkBoundVar(nm->integerType());
440
        Node bound =
441
            nm->mkNode(AND,
442
8
                       nm->mkNode(LEQ, zero, k),
443
16
                       nm->mkNode(LEQ, k, nm->mkNode(MINUS, lenx, lens)));
444
4
        echildren.push_back(bound);
445
      }
446
8
      Node substrEq = nm->mkNode(STRING_SUBSTR, x, k, lens).eqNode(s);
447
4
      echildren.push_back(substrEq);
448
4
      if (i > 0)
449
      {
450
8
        std::vector<Node> rprefix;
451
4
        rprefix.insert(rprefix.end(), children.begin(), children.begin() + i);
452
8
        Node rpn = utils::mkConcat(rprefix, nm->regExpType());
453
        Node substrPrefix = nm->mkNode(
454
8
            STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, zero, k), rpn);
455
4
        echildren.push_back(substrPrefix);
456
      }
457
4
      if (i + 1 < nchildren)
458
      {
459
8
        std::vector<Node> rsuffix;
460
4
        rsuffix.insert(rsuffix.end(), children.begin() + i + 1, children.end());
461
8
        Node rps = utils::mkConcat(rsuffix, nm->regExpType());
462
8
        Node ks = nm->mkNode(PLUS, k, lens);
463
        Node substrSuffix = nm->mkNode(
464
            STRING_IN_REGEXP,
465
8
            nm->mkNode(STRING_SUBSTR, x, ks, nm->mkNode(MINUS, lenx, ks)),
466
16
            rps);
467
4
        echildren.push_back(substrSuffix);
468
      }
469
8
      Node body = nm->mkNode(AND, echildren);
470
4
      if (k.getKind() == BOUND_VARIABLE)
471
      {
472
8
        Node bvl = nm->mkNode(BOUND_VAR_LIST, k);
473
4
        body = nm->mkNode(EXISTS, bvl, body);
474
      }
475
      // e.g. x in re.++( R1, "AB", R2 ) --->
476
      //  exists k.
477
      //    0 <= k <= (len(x)-2) ^
478
      //    substr( x, k, 2 ) = "AB" ^
479
      //    substr( x, 0, k ) in R1 ^
480
      //    substr( x, k+2, len(x)-(k+2) ) in R2
481
4
      return returnElim(atom, body, "concat-find");
482
    }
483
  }
484
  return Node::null();
485
}
486
487
20
Node RegExpElimination::eliminateStar(Node atom, bool isAgg)
488
{
489
20
  if (!isAgg)
490
  {
491
6
    return Node::null();
492
  }
493
  // only aggressive rewrites below here
494
495
14
  NodeManager* nm = NodeManager::currentNM();
496
28
  Node x = atom[0];
497
28
  Node lenx = nm->mkNode(STRING_LENGTH, x);
498
28
  Node re = atom[1];
499
28
  Node zero = nm->mkConst(Rational(0));
500
  // for regular expression star,
501
  // if the period is a fixed constant, we can turn it into a bounded
502
  // quantifier
503
28
  std::vector<Node> disj;
504
14
  if (re[0].getKind() == REGEXP_UNION)
505
  {
506
12
    for (const Node& r : re[0])
507
    {
508
8
      disj.push_back(r);
509
    }
510
  }
511
  else
512
  {
513
10
    disj.push_back(re[0]);
514
  }
515
14
  bool lenOnePeriod = true;
516
28
  std::vector<Node> char_constraints;
517
28
  Node index = nm->mkBoundVar(nm->integerType());
518
  Node substr_ch =
519
28
      nm->mkNode(STRING_SUBSTR, x, index, nm->mkConst(Rational(1)));
520
14
  substr_ch = Rewriter::rewrite(substr_ch);
521
  // handle the case where it is purely characters
522
28
  for (const Node& r : disj)
523
  {
524
18
    Assert(r.getKind() != REGEXP_UNION);
525
18
    Assert(r.getKind() != REGEXP_SIGMA);
526
18
    lenOnePeriod = false;
527
    // lenOnePeriod is true if this regular expression is a single character
528
    // regular expression
529
18
    if (r.getKind() == STRING_TO_REGEXP)
530
    {
531
22
      Node s = r[0];
532
11
      if (s.isConst() && s.getConst<String>().size() == 1)
533
      {
534
11
        lenOnePeriod = true;
535
      }
536
    }
537
7
    else if (r.getKind() == REGEXP_RANGE)
538
    {
539
3
      lenOnePeriod = true;
540
    }
541
18
    if (!lenOnePeriod)
542
    {
543
4
      break;
544
    }
545
    else
546
    {
547
28
      Node regexp_ch = nm->mkNode(STRING_IN_REGEXP, substr_ch, r);
548
14
      regexp_ch = Rewriter::rewrite(regexp_ch);
549
14
      Assert(regexp_ch.getKind() != STRING_IN_REGEXP);
550
14
      char_constraints.push_back(regexp_ch);
551
    }
552
  }
553
14
  if (lenOnePeriod)
554
  {
555
10
    Assert(!char_constraints.empty());
556
    Node bound = nm->mkNode(
557
20
        AND, nm->mkNode(LEQ, zero, index), nm->mkNode(LT, index, lenx));
558
20
    Node conc = char_constraints.size() == 1 ? char_constraints[0]
559
30
                                             : nm->mkNode(OR, char_constraints);
560
20
    Node body = nm->mkNode(OR, bound.negate(), conc);
561
20
    Node bvl = nm->mkNode(BOUND_VAR_LIST, index);
562
20
    Node res = nm->mkNode(FORALL, bvl, body);
563
    // e.g.
564
    //   x in (re.* (re.union "A" "B" )) --->
565
    //   forall k. 0<=k<len(x) => (substr(x,k,1) in "A" OR substr(x,k,1) in "B")
566
10
    return returnElim(atom, res, "star-char");
567
  }
568
  // otherwise, for stars of constant length these are periodic
569
4
  if (disj.size() == 1)
570
  {
571
    Node r = disj[0];
572
    if (r.getKind() == STRING_TO_REGEXP)
573
    {
574
      Node s = r[0];
575
      if (s.isConst())
576
      {
577
        Node lens = nm->mkNode(STRING_LENGTH, s);
578
        lens = Rewriter::rewrite(lens);
579
        Assert(lens.isConst());
580
        Assert(lens.getConst<Rational>().sgn() > 0);
581
        std::vector<Node> conj;
582
        // lens is a positive constant, so it is safe to use total div/mod here.
583
        Node bound = nm->mkNode(
584
            AND,
585
            nm->mkNode(LEQ, zero, index),
586
            nm->mkNode(LT, index, nm->mkNode(INTS_DIVISION_TOTAL, lenx, lens)));
587
        Node conc =
588
            nm->mkNode(STRING_SUBSTR, x, nm->mkNode(MULT, index, lens), lens)
589
                .eqNode(s);
590
        Node body = nm->mkNode(OR, bound.negate(), conc);
591
        Node bvl = nm->mkNode(BOUND_VAR_LIST, index);
592
        Node res = nm->mkNode(FORALL, bvl, body);
593
        res = nm->mkNode(
594
            AND, nm->mkNode(INTS_MODULUS_TOTAL, lenx, lens).eqNode(zero), res);
595
        // e.g.
596
        //    x in ("abc")* --->
597
        //    forall k. 0 <= k < (len( x ) div 3) => substr(x,3*k,3) = "abc" ^
598
        //    len(x) mod 3 = 0
599
        return returnElim(atom, res, "star-constant");
600
      }
601
    }
602
  }
603
4
  return Node::null();
604
}
605
606
48
Node RegExpElimination::returnElim(Node atom, Node atomElim, const char* id)
607
{
608
96
  Trace("re-elim") << "re-elim: " << atom << " to " << atomElim << " by " << id
609
48
                   << "." << std::endl;
610
48
  return atomElim;
611
}
612
46
bool RegExpElimination::isProofEnabled() const { return d_pnm != nullptr; }
613
614
}  // namespace strings
615
}  // namespace theory
616
28194
}  // namespace cvc5