GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_elim.cpp Lines: 283 306 92.5 %
Date: 2021-11-07 Branches: 631 1404 44.9 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Mathias Preiner, Andres Noetzli
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of techniques for eliminating regular expressions.
14
 */
15
16
#include "theory/strings/regexp_elim.h"
17
18
#include "expr/bound_var_manager.h"
19
#include "options/strings_options.h"
20
#include "proof/proof_node_manager.h"
21
#include "theory/rewriter.h"
22
#include "theory/strings/regexp_entail.h"
23
#include "theory/strings/theory_strings_utils.h"
24
#include "util/rational.h"
25
#include "util/string.h"
26
27
using namespace cvc5::kind;
28
29
namespace cvc5 {
30
namespace theory {
31
namespace strings {
32
33
/**
34
 * Attributes used for constructing unique bound variables. The following
35
 * attributes are used to construct (deterministic) bound variables for
36
 * eliminations within eliminateConcat and eliminateStar respectively.
37
 */
38
struct ReElimConcatIndexAttributeId
39
{
40
};
41
typedef expr::Attribute<ReElimConcatIndexAttributeId, Node>
42
    ReElimConcatIndexAttribute;
43
struct ReElimStarIndexAttributeId
44
{
45
};
46
typedef expr::Attribute<ReElimStarIndexAttributeId, Node>
47
    ReElimStarIndexAttribute;
48
49
15273
RegExpElimination::RegExpElimination(bool isAgg,
50
                                     ProofNodeManager* pnm,
51
15273
                                     context::Context* c)
52
    : d_isAggressive(isAgg),
53
      d_pnm(pnm),
54
      d_epg(pnm == nullptr
55
                ? nullptr
56
15273
                : new EagerProofGenerator(pnm, c, "RegExpElimination::epg"))
57
{
58
15273
}
59
60
156
Node RegExpElimination::eliminate(Node atom, bool isAgg)
61
{
62
156
  Assert(atom.getKind() == STRING_IN_REGEXP);
63
156
  if (atom[1].getKind() == REGEXP_CONCAT)
64
  {
65
110
    return eliminateConcat(atom, isAgg);
66
  }
67
46
  else if (atom[1].getKind() == REGEXP_STAR)
68
  {
69
46
    return eliminateStar(atom, isAgg);
70
  }
71
  return Node::null();
72
}
73
74
152
TrustNode RegExpElimination::eliminateTrusted(Node atom)
75
{
76
304
  Node eatom = eliminate(atom, d_isAggressive);
77
152
  if (!eatom.isNull())
78
  {
79
    // Currently aggressive doesnt work due to fresh bound variables
80
110
    if (isProofEnabled() && !d_isAggressive)
81
    {
82
8
      Node eq = atom.eqNode(eatom);
83
8
      Node aggn = NodeManager::currentNM()->mkConst(d_isAggressive);
84
      std::shared_ptr<ProofNode> pn =
85
8
          d_pnm->mkNode(PfRule::RE_ELIM, {}, {atom, aggn}, eq);
86
4
      d_epg->setProofFor(eq, pn);
87
4
      return TrustNode::mkTrustRewrite(atom, eatom, d_epg.get());
88
    }
89
106
    return TrustNode::mkTrustRewrite(atom, eatom, nullptr);
90
  }
91
42
  return TrustNode::null();
92
}
93
94
110
Node RegExpElimination::eliminateConcat(Node atom, bool isAgg)
95
{
96
110
  NodeManager* nm = NodeManager::currentNM();
97
110
  BoundVarManager* bvm = nm->getBoundVarManager();
98
220
  Node x = atom[0];
99
220
  Node lenx = nm->mkNode(STRING_LENGTH, x);
100
220
  Node re = atom[1];
101
220
  Node zero = nm->mkConst(Rational(0));
102
220
  std::vector<Node> children;
103
110
  utils::getConcat(re, children);
104
105
  // If it can be reduced to memberships in fixed length regular expressions.
106
  // This includes concatenations where at most one child is of the form
107
  // (re.* re.allchar), which we abbreviate _* below, and all other children
108
  // have a fixed length.
109
  // The intuition why this is a "non-aggressive" rewrite is that membership
110
  // into fixed length regular expressions are easy to handle.
111
  // the index of _* in re
112
110
  unsigned pivotIndex = 0;
113
110
  bool hasPivotIndex = false;
114
110
  bool hasFixedLength = true;
115
220
  std::vector<Node> childLengths;
116
220
  std::vector<Node> childLengthsPostPivot;
117
526
  for (unsigned i = 0, size = children.size(); i < size; i++)
118
  {
119
832
    Node c = children[i];
120
832
    Node fl = RegExpEntail::getFixedLengthForRegexp(c);
121
416
    if (fl.isNull())
122
    {
123
370
      if (!hasPivotIndex && c.getKind() == REGEXP_STAR
124
321
          && c[0].getKind() == REGEXP_SIGMA)
125
      {
126
21
        hasPivotIndex = true;
127
21
        pivotIndex = i;
128
        // zero is used in sum below and is used for concat-fixed-len
129
21
        fl = zero;
130
      }
131
      else
132
      {
133
109
        hasFixedLength = false;
134
      }
135
    }
136
416
    if (!fl.isNull())
137
    {
138
307
      childLengths.push_back(fl);
139
307
      if (hasPivotIndex)
140
      {
141
131
        childLengthsPostPivot.push_back(fl);
142
      }
143
    }
144
  }
145
110
  Node lenSum = childLengths.size() > 1
146
                    ? nm->mkNode(PLUS, childLengths)
147
220
                    : (childLengths.empty() ? zero : childLengths[0]);
148
  // if we have a fixed length
149
110
  if (hasFixedLength)
150
  {
151
29
    Assert(re.getNumChildren() == children.size());
152
58
    std::vector<Node> conc;
153
29
    conc.push_back(nm->mkNode(hasPivotIndex ? GEQ : EQUAL, lenx, lenSum));
154
58
    Node currEnd = zero;
155
204
    for (unsigned i = 0, size = childLengths.size(); i < size; i++)
156
    {
157
175
      if (hasPivotIndex && i == pivotIndex)
158
      {
159
6
        Node ppSum = childLengthsPostPivot.size() == 1
160
                         ? childLengthsPostPivot[0]
161
12
                         : nm->mkNode(PLUS, childLengthsPostPivot);
162
12
        currEnd = nm->mkNode(MINUS, lenx, ppSum);
163
      }
164
      else
165
      {
166
338
        Node curr = nm->mkNode(STRING_SUBSTR, x, currEnd, childLengths[i]);
167
        // We do not need to include memberships of the form
168
        //   (str.substr x n 1) in re.allchar
169
        // since we know that by construction, n < len( x ).
170
169
        if (re[i].getKind() != REGEXP_SIGMA)
171
        {
172
184
          Node currMem = nm->mkNode(STRING_IN_REGEXP, curr, re[i]);
173
92
          conc.push_back(currMem);
174
        }
175
169
        currEnd = nm->mkNode(PLUS, currEnd, childLengths[i]);
176
169
        currEnd = Rewriter::rewrite(currEnd);
177
      }
178
    }
179
58
    Node res = nm->mkNode(AND, conc);
180
    // For example:
181
    //   x in re.++(re.union(re.range("A", "J"), re.range("N", "Z")), "AB") -->
182
    //   len( x ) = 3 ^
183
    //   substr(x,0,1) in re.union(re.range("A", "J"), re.range("N", "Z")) ^
184
    //   substr(x,1,2) in "AB"
185
    // An example with a pivot index:
186
    //   x in re.++( "AB" ++ _* ++ "C" ) -->
187
    //   len( x ) >= 3 ^
188
    //   substr( x, 0, 2 ) in "AB" ^
189
    //   substr( x, len( x ) - 1, 1 ) in "C"
190
29
    return returnElim(atom, res, "concat-fixed-len");
191
  }
192
193
  // memberships of the form x in re.++ * s1 * ... * sn *, where * are
194
  // any number of repetitions (exact or indefinite) of re.allchar.
195
81
  Trace("re-elim-debug") << "Try re concat with gaps " << atom << std::endl;
196
81
  bool onlySigmasAndConsts = true;
197
162
  std::vector<Node> sep_children;
198
162
  std::vector<unsigned> gap_minsize;
199
162
  std::vector<bool> gap_exact;
200
  // the first gap is initially strict zero
201
81
  gap_minsize.push_back(0);
202
81
  gap_exact.push_back(true);
203
204
  for (const Node& c : children)
204
  {
205
189
    Trace("re-elim-debug") << "  " << c << std::endl;
206
189
    onlySigmasAndConsts = false;
207
189
    if (c.getKind() == STRING_TO_REGEXP)
208
    {
209
83
      onlySigmasAndConsts = true;
210
83
      sep_children.push_back(c[0]);
211
      // the next gap is initially strict zero
212
83
      gap_minsize.push_back(0);
213
83
      gap_exact.push_back(true);
214
    }
215
106
    else if (c.getKind() == REGEXP_STAR && c[0].getKind() == REGEXP_SIGMA)
216
    {
217
      // found a gap of any size
218
35
      onlySigmasAndConsts = true;
219
35
      gap_exact[gap_exact.size() - 1] = false;
220
    }
221
71
    else if (c.getKind() == REGEXP_SIGMA)
222
    {
223
      // add one to the minimum size of the gap
224
5
      onlySigmasAndConsts = true;
225
5
      gap_minsize[gap_minsize.size() - 1]++;
226
    }
227
189
    if (!onlySigmasAndConsts)
228
    {
229
66
      Trace("re-elim-debug") << "...cannot handle " << c << std::endl;
230
66
      break;
231
    }
232
  }
233
  // we should always rewrite concatenations that are purely re.allchar
234
  // and re.*( re.allchar ).
235
81
  Assert(!onlySigmasAndConsts || !sep_children.empty());
236
81
  if (onlySigmasAndConsts && !sep_children.empty())
237
  {
238
15
    bool canProcess = true;
239
15
    std::vector<Node> conj;
240
    // The following constructs a set of constraints that encodes that a
241
    // set of string terms are found, in order, in string x.
242
    // prev_end stores the current (symbolic) index in x that we are
243
    // searching.
244
15
    Node prev_end = zero;
245
    // the symbolic index we start searching, for each child in sep_children.
246
15
    std::vector<Node> prev_ends;
247
15
    unsigned gap_minsize_end = gap_minsize.back();
248
15
    bool gap_exact_end = gap_exact.back();
249
15
    std::vector<Node> non_greedy_find_vars;
250
60
    for (unsigned i = 0, size = sep_children.size(); i < size; i++)
251
    {
252
45
      if (gap_minsize[i] > 0)
253
      {
254
        // the gap to this child is at least gap_minsize[i]
255
5
        prev_end =
256
10
            nm->mkNode(PLUS, prev_end, nm->mkConst(Rational(gap_minsize[i])));
257
      }
258
45
      prev_ends.push_back(prev_end);
259
90
      Node sc = sep_children[i];
260
90
      Node lensc = nm->mkNode(STRING_LENGTH, sc);
261
45
      if (gap_exact[i])
262
      {
263
        // if the gap is exact, it is a substring constraint
264
30
        Node curr = prev_end;
265
30
        Node ss = nm->mkNode(STRING_SUBSTR, x, curr, lensc);
266
15
        conj.push_back(ss.eqNode(sc));
267
15
        prev_end = nm->mkNode(PLUS, curr, lensc);
268
      }
269
      else
270
      {
271
        // otherwise, we can use indexof to represent some next occurrence
272
30
        if (gap_exact[i + 1] && i + 1 != size)
273
        {
274
          if (!isAgg)
275
          {
276
            canProcess = false;
277
            break;
278
          }
279
          // if the gap after this one is strict, we need a non-greedy find
280
          // thus, we add a symbolic constant
281
          Node cacheVal =
282
              BoundVarManager::getCacheValue(atom, nm->mkConst(Rational(i)));
283
          TypeNode intType = nm->integerType();
284
          Node k =
285
              bvm->mkBoundVar<ReElimConcatIndexAttribute>(cacheVal, intType);
286
          non_greedy_find_vars.push_back(k);
287
          prev_end = nm->mkNode(PLUS, prev_end, k);
288
        }
289
60
        Node curr = nm->mkNode(STRING_INDEXOF, x, sc, prev_end);
290
60
        Node idofFind = curr.eqNode(nm->mkConst(Rational(-1))).negate();
291
30
        conj.push_back(idofFind);
292
30
        prev_end = nm->mkNode(PLUS, curr, lensc);
293
      }
294
    }
295
296
15
    if (canProcess)
297
    {
298
      // since sep_children is non-empty, conj is non-empty
299
15
      Assert(!conj.empty());
300
      // Process the last gap, if necessary.
301
      // Notice that if the last gap is not exact and its minsize is zero,
302
      // then the last indexof/substr constraint entails the following
303
      // constraint, so it is not necessary to add.
304
      // Below, we may write "A" for (str.to.re "A") and _ for re.allchar:
305
30
      Node cEnd = nm->mkConst(Rational(gap_minsize_end));
306
15
      if (gap_exact_end)
307
      {
308
10
        Assert(!sep_children.empty());
309
        // if it is strict, it corresponds to a substr case.
310
        // For example:
311
        //     x in (re.++ "A" (re.* _) "B" _ _) --->
312
        //        ... ^ "B" = substr( x, len( x ) - 3, 1 )  ^ ...
313
20
        Node sc = sep_children.back();
314
20
        Node lenSc = nm->mkNode(STRING_LENGTH, sc);
315
20
        Node loc = nm->mkNode(MINUS, lenx, nm->mkNode(PLUS, lenSc, cEnd));
316
20
        Node scc = sc.eqNode(nm->mkNode(STRING_SUBSTR, x, loc, lenSc));
317
        // We also must ensure that we fit. This constraint is necessary in
318
        // addition to the constraint above. Take this example:
319
        //     x in (re.++ "A" _ (re.* _) "B" _) --->
320
        //       substr( x, 0, 1 ) = "A" ^             // find "A"
321
        //       indexof( x, "B", 2 ) != -1 ^          // find "B" >=1 after "A"
322
        //       substr( x, len(x)-2, 1 ) = "B" ^      // "B" is at end - 2.
323
        //       indexof( x, "B", 2 ) <= len( x ) - 2
324
        // The last constaint ensures that the second and third constraints
325
        // may refer to the same "B". If it were not for the last constraint, it
326
        // would have been the case than "ABB" would be a model for x, where
327
        // the second constraint refers to the third position, and the third
328
        // constraint refers to the second position.
329
        //
330
        // With respect to the above example, the following is an optimization.
331
        // For that example, we instead produce:
332
        //     x in (re.++ "A" _ (re.* _) "B" _) --->
333
        //       substr( x, 0, 1 ) = "A" ^          // find "A"
334
        //       substr( x, len(x)-2, 1 ) = "B" ^   // "B" is at end - 2
335
        //       2 <= len( x ) - 2
336
        // The intuition is that above, there are two constraints that insist
337
        // that "B" is found, whereas we only need one. The last constraint
338
        // above says that the "B" we find at end-2 can be found >=1 after
339
        // the "A".
340
10
        conj.pop_back();
341
20
        Node fit = nm->mkNode(gap_exact[sep_children.size() - 1] ? EQUAL : LEQ,
342
10
                              prev_ends.back(),
343
50
                              loc);
344
345
10
        conj.push_back(scc);
346
10
        conj.push_back(fit);
347
      }
348
5
      else if (gap_minsize_end > 0)
349
      {
350
        // if it is non-strict, we are in a "greedy find" situtation where
351
        // we just need to ensure that the next occurrence fits.
352
        // For example:
353
        //     x in (re.++ "A" (re.* _) "B" _ _ (re.* _)) --->
354
        //        ... ^ indexof( x, "B", 1 ) + 2 <= len( x )
355
        Node fit = nm->mkNode(LEQ, nm->mkNode(PLUS, prev_end, cEnd), lenx);
356
        conj.push_back(fit);
357
      }
358
30
      Node res = nm->mkAnd(conj);
359
      // process the non-greedy find variables
360
15
      if (!non_greedy_find_vars.empty())
361
      {
362
        std::vector<Node> children2;
363
        for (const Node& v : non_greedy_find_vars)
364
        {
365
          Node bound = nm->mkNode(
366
              AND, nm->mkNode(LEQ, zero, v), nm->mkNode(LT, v, lenx));
367
          children2.push_back(bound);
368
        }
369
        children2.push_back(res);
370
        Node body = nm->mkNode(AND, children2);
371
        Node bvl = nm->mkNode(BOUND_VAR_LIST, non_greedy_find_vars);
372
        res = utils::mkForallInternal(bvl, body.negate()).negate();
373
      }
374
      // must also give a minimum length requirement
375
15
      res = nm->mkNode(AND, res, nm->mkNode(GEQ, lenx, lenSum));
376
      // Examples of this elimination:
377
      //   x in (re.++ "A" (re.* _) "B" (re.* _)) --->
378
      //     substr(x,0,1)="A" ^ indexof(x,"B",1)!=-1
379
      //   x in (re.++ (re.* _) "A" _ _ _ (re.* _) "B" _ _ (re.* _)) --->
380
      //     indexof(x,"A",0)!=-1 ^
381
      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 ) != -1 ^
382
      //     indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x) ^
383
      //     len(x) >= 7
384
385
      // An example of a non-greedy find:
386
      //   x in re.++( re.*( _ ), "A", _, "B", re.*( _ ) ) --->
387
      //     (exists k. 0 <= k < len( x ) ^
388
      //               indexof( x, "A", k ) != -1 ^
389
      //               substr( x, indexof( x, "A", k )+2, 1 ) = "B") ^
390
      //     len(x) >= 3
391
15
      return returnElim(atom, res, "concat-with-gaps");
392
    }
393
  }
394
395
66
  if (!isAgg)
396
  {
397
20
    return Node::null();
398
  }
399
  // only aggressive rewrites below here
400
401
  // if the first or last child is constant string, we can split the membership
402
  // into a conjunction of two memberships.
403
92
  Node sStartIndex = zero;
404
92
  Node sLength = lenx;
405
92
  std::vector<Node> sConstraints;
406
92
  std::vector<Node> rexpElimChildren;
407
46
  unsigned nchildren = children.size();
408
46
  Assert(nchildren > 1);
409
138
  for (unsigned r = 0; r < 2; r++)
410
  {
411
92
    unsigned index = r == 0 ? 0 : nchildren - 1;
412
184
    Node c = children[index];
413
92
    if (c.getKind() == STRING_TO_REGEXP)
414
    {
415
42
      Assert(children[index + (r == 0 ? 1 : -1)].getKind() != STRING_TO_REGEXP);
416
84
      Node s = c[0];
417
84
      Node lens = nm->mkNode(STRING_LENGTH, s);
418
84
      Node sss = r == 0 ? zero : nm->mkNode(MINUS, lenx, lens);
419
84
      Node ss = nm->mkNode(STRING_SUBSTR, x, sss, lens);
420
42
      sConstraints.push_back(ss.eqNode(s));
421
42
      if (r == 0)
422
      {
423
34
        sStartIndex = lens;
424
      }
425
8
      else if (r == 1 && sConstraints.size() == 2)
426
      {
427
        // first and last children cannot overlap
428
4
        Node bound = nm->mkNode(GEQ, sss, sStartIndex);
429
2
        sConstraints.push_back(bound);
430
      }
431
42
      sLength = nm->mkNode(MINUS, sLength, lens);
432
    }
433
92
    if (r == 1 && !sConstraints.empty())
434
    {
435
      // add the middle children
436
42
      for (unsigned i = 1; i < (nchildren - 1); i++)
437
      {
438
2
        rexpElimChildren.push_back(children[i]);
439
      }
440
    }
441
92
    if (c.getKind() != STRING_TO_REGEXP)
442
    {
443
50
      rexpElimChildren.push_back(c);
444
    }
445
  }
446
46
  if (!sConstraints.empty())
447
  {
448
80
    Node ss = nm->mkNode(STRING_SUBSTR, x, sStartIndex, sLength);
449
40
    Assert(!rexpElimChildren.empty());
450
80
    Node regElim = utils::mkConcat(rexpElimChildren, nm->regExpType());
451
40
    sConstraints.push_back(nm->mkNode(STRING_IN_REGEXP, ss, regElim));
452
80
    Node ret = nm->mkNode(AND, sConstraints);
453
    // e.g.
454
    // x in re.++( "A", R ) ---> substr(x,0,1)="A" ^ substr(x,1,len(x)-1) in R
455
40
    return returnElim(atom, ret, "concat-splice");
456
  }
457
6
  Assert(nchildren > 1);
458
12
  for (unsigned i = 0; i < nchildren; i++)
459
  {
460
12
    if (children[i].getKind() == STRING_TO_REGEXP)
461
    {
462
12
      Node s = children[i][0];
463
12
      Node lens = nm->mkNode(STRING_LENGTH, s);
464
      // there exists an index in this string such that the substring is this
465
12
      Node k;
466
12
      std::vector<Node> echildren;
467
6
      if (i == 0)
468
      {
469
        k = zero;
470
      }
471
6
      else if (i + 1 == nchildren)
472
      {
473
        k = nm->mkNode(MINUS, lenx, lens);
474
      }
475
      else
476
      {
477
        Node cacheVal =
478
12
            BoundVarManager::getCacheValue(atom, nm->mkConst(Rational(i)));
479
12
        TypeNode intType = nm->integerType();
480
6
        k = bvm->mkBoundVar<ReElimConcatIndexAttribute>(cacheVal, intType);
481
        Node bound =
482
            nm->mkNode(AND,
483
12
                       nm->mkNode(LEQ, zero, k),
484
24
                       nm->mkNode(LEQ, k, nm->mkNode(MINUS, lenx, lens)));
485
6
        echildren.push_back(bound);
486
      }
487
12
      Node substrEq = nm->mkNode(STRING_SUBSTR, x, k, lens).eqNode(s);
488
6
      echildren.push_back(substrEq);
489
6
      if (i > 0)
490
      {
491
12
        std::vector<Node> rprefix;
492
6
        rprefix.insert(rprefix.end(), children.begin(), children.begin() + i);
493
12
        Node rpn = utils::mkConcat(rprefix, nm->regExpType());
494
        Node substrPrefix = nm->mkNode(
495
12
            STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, zero, k), rpn);
496
6
        echildren.push_back(substrPrefix);
497
      }
498
6
      if (i + 1 < nchildren)
499
      {
500
12
        std::vector<Node> rsuffix;
501
6
        rsuffix.insert(rsuffix.end(), children.begin() + i + 1, children.end());
502
12
        Node rps = utils::mkConcat(rsuffix, nm->regExpType());
503
12
        Node ks = nm->mkNode(PLUS, k, lens);
504
        Node substrSuffix = nm->mkNode(
505
            STRING_IN_REGEXP,
506
12
            nm->mkNode(STRING_SUBSTR, x, ks, nm->mkNode(MINUS, lenx, ks)),
507
24
            rps);
508
6
        echildren.push_back(substrSuffix);
509
      }
510
12
      Node body = nm->mkNode(AND, echildren);
511
6
      if (k.getKind() == BOUND_VARIABLE)
512
      {
513
12
        Node bvl = nm->mkNode(BOUND_VAR_LIST, k);
514
6
        body = utils::mkForallInternal(bvl, body.negate()).negate();
515
      }
516
      // e.g. x in re.++( R1, "AB", R2 ) --->
517
      //  exists k.
518
      //    0 <= k <= (len(x)-2) ^
519
      //    substr( x, k, 2 ) = "AB" ^
520
      //    substr( x, 0, k ) in R1 ^
521
      //    substr( x, k+2, len(x)-(k+2) ) in R2
522
6
      return returnElim(atom, body, "concat-find");
523
    }
524
  }
525
  return Node::null();
526
}
527
528
46
Node RegExpElimination::eliminateStar(Node atom, bool isAgg)
529
{
530
46
  if (!isAgg)
531
  {
532
14
    return Node::null();
533
  }
534
  // only aggressive rewrites below here
535
536
32
  NodeManager* nm = NodeManager::currentNM();
537
32
  BoundVarManager* bvm = nm->getBoundVarManager();
538
64
  Node x = atom[0];
539
64
  Node lenx = nm->mkNode(STRING_LENGTH, x);
540
64
  Node re = atom[1];
541
64
  Node zero = nm->mkConst(Rational(0));
542
  // for regular expression star,
543
  // if the period is a fixed constant, we can turn it into a bounded
544
  // quantifier
545
64
  std::vector<Node> disj;
546
32
  if (re[0].getKind() == REGEXP_UNION)
547
  {
548
12
    for (const Node& r : re[0])
549
    {
550
8
      disj.push_back(r);
551
    }
552
  }
553
  else
554
  {
555
28
    disj.push_back(re[0]);
556
  }
557
32
  bool lenOnePeriod = true;
558
64
  std::vector<Node> char_constraints;
559
64
  TypeNode intType = nm->integerType();
560
64
  Node index = bvm->mkBoundVar<ReElimStarIndexAttribute>(atom, intType);
561
  Node substr_ch =
562
64
      nm->mkNode(STRING_SUBSTR, x, index, nm->mkConst(Rational(1)));
563
32
  substr_ch = Rewriter::rewrite(substr_ch);
564
  // handle the case where it is purely characters
565
58
  for (const Node& r : disj)
566
  {
567
36
    Assert(r.getKind() != REGEXP_UNION);
568
36
    Assert(r.getKind() != REGEXP_SIGMA);
569
36
    lenOnePeriod = false;
570
    // lenOnePeriod is true if this regular expression is a single character
571
    // regular expression
572
36
    if (r.getKind() == STRING_TO_REGEXP)
573
    {
574
54
      Node s = r[0];
575
27
      if (s.isConst() && s.getConst<String>().size() == 1)
576
      {
577
21
        lenOnePeriod = true;
578
      }
579
    }
580
9
    else if (r.getKind() == REGEXP_RANGE)
581
    {
582
5
      lenOnePeriod = true;
583
    }
584
36
    if (!lenOnePeriod)
585
    {
586
10
      break;
587
    }
588
    else
589
    {
590
52
      Node regexp_ch = nm->mkNode(STRING_IN_REGEXP, substr_ch, r);
591
26
      regexp_ch = Rewriter::rewrite(regexp_ch);
592
26
      Assert(regexp_ch.getKind() != STRING_IN_REGEXP);
593
26
      char_constraints.push_back(regexp_ch);
594
    }
595
  }
596
32
  if (lenOnePeriod)
597
  {
598
22
    Assert(!char_constraints.empty());
599
    Node bound = nm->mkNode(
600
44
        AND, nm->mkNode(LEQ, zero, index), nm->mkNode(LT, index, lenx));
601
44
    Node conc = char_constraints.size() == 1 ? char_constraints[0]
602
66
                                             : nm->mkNode(OR, char_constraints);
603
44
    Node body = nm->mkNode(OR, bound.negate(), conc);
604
44
    Node bvl = nm->mkNode(BOUND_VAR_LIST, index);
605
44
    Node res = utils::mkForallInternal(bvl, body);
606
    // e.g.
607
    //   x in (re.* (re.union "A" "B" )) --->
608
    //   forall k. 0<=k<len(x) => (substr(x,k,1) in "A" OR substr(x,k,1) in "B")
609
22
    return returnElim(atom, res, "star-char");
610
  }
611
  // otherwise, for stars of constant length these are periodic
612
10
  if (disj.size() == 1)
613
  {
614
10
    Node r = disj[0];
615
6
    if (r.getKind() == STRING_TO_REGEXP)
616
    {
617
10
      Node s = r[0];
618
6
      if (s.isConst())
619
      {
620
4
        Node lens = nm->mkNode(STRING_LENGTH, s);
621
2
        lens = Rewriter::rewrite(lens);
622
2
        Assert(lens.isConst());
623
2
        Assert(lens.getConst<Rational>().sgn() > 0);
624
4
        std::vector<Node> conj;
625
        // lens is a positive constant, so it is safe to use total div/mod here.
626
        Node bound = nm->mkNode(
627
            AND,
628
4
            nm->mkNode(LEQ, zero, index),
629
8
            nm->mkNode(LT, index, nm->mkNode(INTS_DIVISION_TOTAL, lenx, lens)));
630
        Node conc =
631
4
            nm->mkNode(STRING_SUBSTR, x, nm->mkNode(MULT, index, lens), lens)
632
4
                .eqNode(s);
633
4
        Node body = nm->mkNode(OR, bound.negate(), conc);
634
4
        Node bvl = nm->mkNode(BOUND_VAR_LIST, index);
635
4
        Node res = utils::mkForallInternal(bvl, body);
636
6
        res = nm->mkNode(
637
4
            AND, nm->mkNode(INTS_MODULUS_TOTAL, lenx, lens).eqNode(zero), res);
638
        // e.g.
639
        //    x in ("abc")* --->
640
        //    forall k. 0 <= k < (len( x ) div 3) => substr(x,3*k,3) = "abc" ^
641
        //    len(x) mod 3 = 0
642
2
        return returnElim(atom, res, "star-constant");
643
      }
644
    }
645
  }
646
8
  return Node::null();
647
}
648
649
114
Node RegExpElimination::returnElim(Node atom, Node atomElim, const char* id)
650
{
651
228
  Trace("re-elim") << "re-elim: " << atom << " to " << atomElim << " by " << id
652
114
                   << "." << std::endl;
653
114
  return atomElim;
654
}
655
110
bool RegExpElimination::isProofEnabled() const { return d_pnm != nullptr; }
656
657
}  // namespace strings
658
}  // namespace theory
659
31137
}  // namespace cvc5