GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/sequences_rewriter.cpp Lines: 1604 1658 96.7 %
Date: 2021-09-29 Branches: 5062 9698 52.2 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of the theory of strings.
14
 */
15
16
#include "theory/strings/sequences_rewriter.h"
17
18
#include "expr/attribute.h"
19
#include "expr/node_builder.h"
20
#include "expr/sequence.h"
21
#include "theory/rewriter.h"
22
#include "theory/strings/arith_entail.h"
23
#include "theory/strings/regexp_entail.h"
24
#include "theory/strings/skolem_cache.h"
25
#include "theory/strings/strings_rewriter.h"
26
#include "theory/strings/theory_strings_utils.h"
27
#include "theory/strings/word.h"
28
#include "util/rational.h"
29
#include "util/regexp.h"
30
#include "util/statistics_registry.h"
31
#include "util/string.h"
32
33
using namespace std;
34
using namespace cvc5::kind;
35
36
namespace cvc5 {
37
namespace theory {
38
namespace strings {
39
40
6535
SequencesRewriter::SequencesRewriter(Rewriter* r,
41
6535
                                     HistogramStat<Rewrite>* statistics)
42
    : d_statistics(statistics),
43
      d_arithEntail(r),
44
6535
      d_stringsEntail(r, d_arithEntail, *this)
45
{
46
6535
}
47
48
4
ArithEntail& SequencesRewriter::getArithEntail() { return d_arithEntail; }
49
50
6
StringsEntail& SequencesRewriter::getStringsEntail() { return d_stringsEntail; }
51
52
93637
Node SequencesRewriter::rewriteEquality(Node node)
53
{
54
93637
  Assert(node.getKind() == kind::EQUAL);
55
93637
  if (node[0] == node[1])
56
  {
57
4098
    Node ret = NodeManager::currentNM()->mkConst(true);
58
2049
    return returnRewrite(node, ret, Rewrite::EQ_REFL);
59
  }
60
91588
  else if (node[0].isConst() && node[1].isConst())
61
  {
62
4370
    Node ret = NodeManager::currentNM()->mkConst(false);
63
2185
    return returnRewrite(node, ret, Rewrite::EQ_CONST_FALSE);
64
  }
65
  // standard ordering
66
89403
  if (node[0] > node[1])
67
  {
68
41786
    Node ret = NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]);
69
20893
    return returnRewrite(node, ret, Rewrite::EQ_SYM);
70
  }
71
68510
  return node;
72
}
73
74
75427
Node SequencesRewriter::rewriteEqualityExt(Node node)
75
{
76
75427
  Assert(node.getKind() == EQUAL);
77
150854
  TypeNode tn = node[0].getType();
78
75427
  if (tn.isInteger())
79
  {
80
25356
    return rewriteArithEqualityExt(node);
81
  }
82
50071
  if (tn.isStringLike())
83
  {
84
50071
    return rewriteStrEqualityExt(node);
85
  }
86
  return node;
87
}
88
89
50342
Node SequencesRewriter::rewriteStrEqualityExt(Node node)
90
{
91
50342
  Assert(node.getKind() == EQUAL && node[0].getType().isStringLike());
92
100684
  TypeNode stype = node[0].getType();
93
94
50342
  NodeManager* nm = NodeManager::currentNM();
95
  // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false )
96
150758
  for (unsigned r = 0; r < 2; r++)
97
  {
98
    // must call rewrite contains directly to avoid infinite loop
99
201006
    Node ctn = nm->mkNode(STRING_CONTAINS, node[r], node[1 - r]);
100
100590
    ctn = rewriteContains(ctn);
101
100590
    Assert(!ctn.isNull());
102
100590
    if (ctn.isConst())
103
    {
104
29221
      if (!ctn.getConst<bool>())
105
      {
106
174
        return returnRewrite(node, ctn, Rewrite::EQ_NCTN);
107
      }
108
      else
109
      {
110
        // definitely contains but not syntactically equal
111
        // We may be able to simplify, e.g.
112
        //  str.++( x, "a" ) == "a"  ----> x = ""
113
      }
114
    }
115
  }
116
117
  // ( len( s ) != len( t ) ) => ( s == t ---> false )
118
  // This covers cases like str.++( x, x ) == "a" ---> false
119
100336
  Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
120
100336
  Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
121
100336
  Node len_eq = len0.eqNode(len1);
122
50168
  len_eq = Rewriter::rewrite(len_eq);
123
50168
  if (len_eq.isConst() && !len_eq.getConst<bool>())
124
  {
125
2
    return returnRewrite(node, len_eq, Rewrite::EQ_LEN_DEQ);
126
  }
127
128
100332
  std::vector<Node> c[2];
129
150498
  for (unsigned i = 0; i < 2; i++)
130
  {
131
100332
    utils::getConcat(node[i], c[i]);
132
  }
133
134
  // check if the prefix, suffix mismatches
135
  //   For example, str.++( x, "a", y ) == str.++( x, "bc", z ) ---> false
136
50166
  unsigned minsize = std::min(c[0].size(), c[1].size());
137
150494
  for (unsigned r = 0; r < 2; r++)
138
  {
139
100483
    for (unsigned i = 0; i < minsize; i++)
140
    {
141
100365
      unsigned index1 = r == 0 ? i : (c[0].size() - 1) - i;
142
100365
      unsigned index2 = r == 0 ? i : (c[1].size() - 1) - i;
143
100517
      Node s = c[0][index1];
144
100517
      Node t = c[1][index2];
145
100365
      if (s.isConst() && t.isConst())
146
      {
147
241
        size_t lenS = Word::getLength(s);
148
241
        size_t lenT = Word::getLength(t);
149
241
        size_t lenShort = lenS <= lenT ? lenS : lenT;
150
866
        bool isSameFix = r == 1 ? Word::rstrncmp(s, t, lenShort)
151
625
                                : Word::strncmp(s, t, lenShort);
152
241
        if (!isSameFix)
153
        {
154
6
          Node ret = NodeManager::currentNM()->mkConst(false);
155
3
          return returnRewrite(node, ret, Rewrite::EQ_NFIX);
156
        }
157
      }
158
100362
      if (s != t)
159
      {
160
100210
        break;
161
      }
162
    }
163
  }
164
165
100326
  Node new_ret;
166
  // ------- equality unification
167
50163
  bool changed = false;
168
150489
  for (unsigned i = 0; i < 2; i++)
169
  {
170
100574
    while (!c[0].empty() && !c[1].empty() && c[0].back() == c[1].back())
171
    {
172
124
      c[0].pop_back();
173
124
      c[1].pop_back();
174
124
      changed = true;
175
    }
176
    // splice constants
177
300849
    if (!c[0].empty() && !c[1].empty() && c[0].back().isConst()
178
193565
        && c[1].back().isConst())
179
    {
180
306
      Node cs[2];
181
      size_t csl[2];
182
459
      for (unsigned j = 0; j < 2; j++)
183
      {
184
306
        cs[j] = c[j].back();
185
306
        csl[j] = Word::getLength(cs[j]);
186
      }
187
153
      size_t larger = csl[0] > csl[1] ? 0 : 1;
188
153
      size_t smallerSize = csl[1 - larger];
189
306
      if (cs[1 - larger]
190
306
          == (i == 0 ? Word::suffix(cs[larger], smallerSize)
191
                     : Word::prefix(cs[larger], smallerSize)))
192
      {
193
153
        size_t sizeDiff = csl[larger] - smallerSize;
194
153
        c[larger][c[larger].size() - 1] =
195
306
            i == 0 ? Word::prefix(cs[larger], sizeDiff)
196
                   : Word::suffix(cs[larger], sizeDiff);
197
153
        c[1 - larger].pop_back();
198
153
        changed = true;
199
      }
200
    }
201
300978
    for (unsigned j = 0; j < 2; j++)
202
    {
203
200652
      std::reverse(c[j].begin(), c[j].end());
204
    }
205
  }
206
50163
  if (changed)
207
  {
208
    // e.g. x++y = x++z ---> y = z, "AB" ++ x = "A" ++ y --> "B" ++ x = y
209
264
    Node s1 = utils::mkConcat(c[0], stype);
210
264
    Node s2 = utils::mkConcat(c[1], stype);
211
255
    if (s1 != node[0] || s2 != node[1])
212
    {
213
246
      new_ret = s1.eqNode(s2);
214
      // We generally don't apply the extended equality rewriter if the
215
      // original node was an equality but we may be able to do additional
216
      // rewriting here, e.g.,
217
      // x++y = "" --> x = "" and y = ""
218
246
      new_ret = returnRewrite(node, new_ret, Rewrite::STR_EQ_UNIFY);
219
246
      return rewriteStrEqualityExt(new_ret);
220
    }
221
  }
222
223
  // ------- homogeneous constants
224
149698
  for (unsigned i = 0; i < 2; i++)
225
  {
226
199591
    Node cn = d_stringsEntail.checkHomogeneousString(node[i]);
227
99810
    if (!cn.isNull() && !Word::isEmpty(cn))
228
    {
229
19667
      Assert(cn.isConst());
230
19667
      Assert(Word::getLength(cn) == 1);
231
232
      // The operands of the concat on each side of the equality without
233
      // constant strings
234
59001
      std::vector<Node> trimmed[2];
235
      // Counts the number of `cn`s on each side
236
19667
      size_t numCns[2] = {0, 0};
237
58994
      for (size_t j = 0; j < 2; j++)
238
      {
239
        // Sort the operands of the concats on both sides of the equality
240
        // (since both sides may only contain one char, the order does not
241
        // matter)
242
39331
        std::sort(c[j].begin(), c[j].end());
243
78736
        for (const Node& cc : c[j])
244
        {
245
39409
          if (cc.isConst())
246
          {
247
            // Count the number of `cn`s in the string constant and make
248
            // sure that all chars are `cn`s
249
39314
            std::vector<Node> veccc = Word::getChars(cc);
250
40090
            for (const Node& cv : veccc)
251
            {
252
20435
              if (cv != cn)
253
              {
254
                // This conflict case should mostly should be taken care of by
255
                // multiset reasoning in the strings rewriter, but we recognize
256
                // this conflict just in case.
257
4
                new_ret = nm->mkConst(false);
258
                return returnRewrite(
259
4
                    node, new_ret, Rewrite::STR_EQ_CONST_NHOMOG);
260
              }
261
20431
              numCns[j]++;
262
            }
263
          }
264
          else
265
          {
266
19750
            trimmed[j].push_back(cc);
267
          }
268
        }
269
      }
270
271
      // We have to remove the same number of `cn`s from both sides, so the
272
      // side with less `cn`s determines how many we can remove
273
19663
      size_t trimmedConst = std::min(numCns[0], numCns[1]);
274
58989
      for (size_t j = 0; j < 2; j++)
275
      {
276
39326
        size_t diff = numCns[j] - trimmedConst;
277
39326
        if (diff != 0)
278
        {
279
          // Add a constant string to the side with more `cn`s to restore
280
          // the difference in number of `cn`s
281
36776
          std::vector<Node> vec(diff, cn);
282
18388
          trimmed[j].push_back(Word::mkWordFlatten(vec));
283
        }
284
      }
285
286
39301
      Node lhs = utils::mkConcat(trimmed[i], stype);
287
39301
      Node ss = utils::mkConcat(trimmed[1 - i], stype);
288
19663
      if (lhs != node[i] || ss != node[1 - i])
289
      {
290
        // e.g.
291
        //  "AA" = y ++ x ---> "AA" = x ++ y if x < y
292
        //  "AAA" = y ++ "A" ++ z ---> "AA" = y ++ z
293
        //
294
        // We generally don't apply the extended equality rewriter if the
295
        // original node was an equality but we may be able to do additional
296
        // rewriting here.
297
25
        new_ret = lhs.eqNode(ss);
298
25
        new_ret = returnRewrite(node, new_ret, Rewrite::STR_EQ_HOMOG_CONST);
299
25
        return rewriteStrEqualityExt(new_ret);
300
      }
301
    }
302
  }
303
304
  // ------- rewrites for (= "" _)
305
99776
  Node empty = Word::mkEmptyWord(stype);
306
141410
  for (size_t i = 0; i < 2; i++)
307
  {
308
95669
    if (node[i] == empty)
309
    {
310
51545
      Node ne = node[1 - i];
311
27846
      if (ne.getKind() == STRING_REPLACE)
312
      {
313
        // (= "" (str.replace x y x)) ---> (= x "")
314
495
        if (ne[0] == ne[2])
315
        {
316
32
          Node ret = nm->mkNode(EQUAL, ne[0], empty);
317
16
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_X_Y_X);
318
        }
319
320
        // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y "")))
321
479
        if (d_stringsEntail.checkNonEmpty(ne[2]))
322
        {
323
          Node ret =
324
              nm->mkNode(AND,
325
110
                         nm->mkNode(EQUAL, ne[0], empty),
326
220
                         nm->mkNode(NOT, nm->mkNode(EQUAL, ne[1], empty)));
327
55
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
328
        }
329
330
        // (= "" (str.replace x "A" "")) ---> (str.prefix x "A")
331
424
        if (d_stringsEntail.checkLengthOne(ne[1], true) && ne[2] == empty)
332
        {
333
176
          Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]);
334
88
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
335
        }
336
      }
337
27351
      else if (ne.getKind() == STRING_SUBSTR)
338
      {
339
48580
        Node zero = nm->mkConst(Rational(0));
340
341
78852
        if (d_arithEntail.check(ne[1], false)
342
78852
            && d_arithEntail.check(ne[2], true))
343
        {
344
          // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0
345
1464
          if (ne[1] == zero)
346
          {
347
530
            Node ret = nm->mkNode(EQUAL, ne[0], empty);
348
265
            return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
349
          }
350
351
          // (= "" (str.substr x n m)) ---> (<= (str.len x) n)
352
          // if n >= 0 and m > 0
353
2398
          Node ret = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, ne[0]), ne[1]);
354
1199
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
355
        }
356
357
        // (= "" (str.substr "A" 0 z)) ---> (<= z 0)
358
24820
        if (d_stringsEntail.checkNonEmpty(ne[0]) && ne[1] == zero)
359
        {
360
5048
          Node ret = nm->mkNode(LEQ, ne[2], zero);
361
2524
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_Z);
362
        }
363
      }
364
    }
365
  }
366
367
  // ------- rewrites for (= (str.replace _ _ _) _)
368
137191
  for (size_t i = 0; i < 2; i++)
369
  {
370
91475
    if (node[i].getKind() == STRING_REPLACE)
371
    {
372
2183
      Node repl = node[i];
373
2183
      Node x = node[1 - i];
374
375
      // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x))
376
1104
      if (d_stringsEntail.checkNonEmpty(x) && repl[0] == empty)
377
      {
378
        Node ret = nm->mkNode(
379
6
            EQUAL, empty, nm->mkNode(STRING_REPLACE, x, repl[2], repl[1]));
380
3
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_EMP);
381
      }
382
383
      // (= x (str.replace y x y)) ---> (= x y)
384
1101
      if (repl[0] == repl[2] && x == repl[1])
385
      {
386
4
        Node ret = nm->mkNode(EQUAL, x, repl[0]);
387
2
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_EQ);
388
      }
389
390
      // (= x (str.replace x "A" "B")) ---> (not (str.contains x "A"))
391
1099
      if (x == repl[0])
392
      {
393
1124
        Node eq = Rewriter::rewrite(nm->mkNode(EQUAL, repl[1], repl[2]));
394
570
        if (eq.isConst() && !eq.getConst<bool>())
395
        {
396
32
          Node ret = nm->mkNode(NOT, nm->mkNode(STRING_CONTAINS, x, repl[1]));
397
16
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_NOT_CTN);
398
        }
399
      }
400
401
      // (= (str.replace x y z) z) --> (or (= x y) (= x z))
402
      // if (str.len y) = (str.len z)
403
1083
      if (repl[2] == x)
404
      {
405
1056
        Node lenY = nm->mkNode(STRING_LENGTH, repl[1]);
406
1056
        Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]);
407
530
        if (d_arithEntail.checkEq(lenY, lenZ))
408
        {
409
          Node ret = nm->mkNode(OR,
410
8
                                nm->mkNode(EQUAL, repl[0], repl[1]),
411
16
                                nm->mkNode(EQUAL, repl[0], repl[2]));
412
4
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_DIS);
413
        }
414
      }
415
    }
416
  }
417
418
  // Try to rewrite (= x y) into a conjunction of equalities based on length
419
  // entailment.
420
  //
421
  // (<= (str.len x) (str.++ y1 ... yn)) AND (= x (str.++ y1 ... yn)) --->
422
  //  (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
423
  //
424
  // where yi' and yi'' correspond to some yj and
425
  //   (<= (str.len x) (str.++ y1' ... ym'))
426
136967
  for (unsigned i = 0; i < 2; i++)
427
  {
428
91346
    if (node[1 - i].getKind() == STRING_CONCAT)
429
    {
430
607
      new_ret = d_stringsEntail.inferEqsFromContains(node[i], node[1 - i]);
431
607
      if (!new_ret.isNull())
432
      {
433
95
        return returnRewrite(node, new_ret, Rewrite::STR_EQ_CONJ_LEN_ENTAIL);
434
      }
435
    }
436
  }
437
438
45621
  if (node[0].getKind() == STRING_CONCAT && node[1].getKind() == STRING_CONCAT)
439
  {
440
    // (= (str.++ x_1 ... x_i x_{i + 1} ... x_n)
441
    //    (str.++ y_1 ... y_j y_{j + 1} ... y_m)) --->
442
    //  (and (= (str.++ x_1 ... x_i) (str.++ y_1 ... y_j))
443
    //       (= (str.++ x_{i + 1} ... x_n) (str.++ y_{j + 1} ... y_m)))
444
    //
445
    // if (str.len (str.++ x_1 ... x_i)) = (str.len (str.++ y_1 ... y_j))
446
    //
447
    // This rewrite performs length-based equality splitting: If we can show
448
    // that two prefixes have the same length, we can split an equality into
449
    // two equalities, one over the prefixes and another over the suffixes.
450
63
    std::vector<Node> v0, v1;
451
37
    utils::getConcat(node[0], v0);
452
37
    utils::getConcat(node[1], v1);
453
37
    size_t startRhs = 0;
454
163
    for (size_t i = 0, size0 = v0.size(); i <= size0; i++)
455
    {
456
263
      const std::vector<Node> pfxv0(v0.begin(), v0.begin() + i);
457
263
      Node pfx0 = utils::mkConcat(pfxv0, stype);
458
511
      for (size_t j = startRhs, size1 = v1.size(); j <= size1; j++)
459
      {
460
431
        if (!(i == 0 && j == 0) && !(i == v0.size() && j == v1.size()))
461
        {
462
679
          std::vector<Node> pfxv1(v1.begin(), v1.begin() + j);
463
679
          Node pfx1 = utils::mkConcat(pfxv1, stype);
464
679
          Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0);
465
679
          Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1);
466
467
368
          if (d_arithEntail.checkEq(lenPfx0, lenPfx1))
468
          {
469
14
            std::vector<Node> sfxv0(v0.begin() + i, v0.end());
470
14
            std::vector<Node> sfxv1(v1.begin() + j, v1.end());
471
            Node ret = nm->mkNode(kind::AND,
472
14
                                  pfx0.eqNode(pfx1),
473
14
                                  utils::mkConcat(sfxv0, stype)
474
28
                                      .eqNode(utils::mkConcat(sfxv1, stype)));
475
7
            return returnRewrite(node, ret, Rewrite::SPLIT_EQ);
476
          }
477
361
          else if (d_arithEntail.check(lenPfx1, lenPfx0, true))
478
          {
479
            // The prefix on the right-hand side is strictly longer than the
480
            // prefix on the left-hand side, so we try to strip the right-hand
481
            // prefix by the length of the left-hand prefix
482
            //
483
            // Example:
484
            // (= (str.++ "A" x y) (str.++ x "AB" z)) --->
485
            //   (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z)))
486
46
            std::vector<Node> rpfxv1;
487
46
            if (d_stringsEntail.stripSymbolicLength(
488
                    pfxv1, rpfxv1, 1, lenPfx0, true))
489
            {
490
              // The rewrite requires the full left-hand prefix length to be
491
              // stripped (otherwise we would have to keep parts of the
492
              // left-hand prefix).
493
              if (lenPfx0.isConst() && lenPfx0.getConst<Rational>().isZero())
494
              {
495
                std::vector<Node> sfxv0(v0.begin() + i, v0.end());
496
                pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end());
497
                Node ret =
498
                    nm->mkNode(kind::AND,
499
                               pfx0.eqNode(utils::mkConcat(rpfxv1, stype)),
500
                               utils::mkConcat(sfxv0, stype)
501
                                   .eqNode(utils::mkConcat(pfxv1, stype)));
502
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_R);
503
              }
504
            }
505
506
            // If the prefix of the right-hand side is (strictly) longer than
507
            // the prefix of the left-hand side, we can advance the left-hand
508
            // side (since the length of the right-hand side is only increasing
509
            // in the inner loop)
510
46
            break;
511
          }
512
315
          else if (d_arithEntail.check(lenPfx0, lenPfx1, true))
513
          {
514
            // The prefix on the left-hand side is strictly longer than the
515
            // prefix on the right-hand side, so we try to strip the left-hand
516
            // prefix by the length of the right-hand prefix
517
            //
518
            // Example:
519
            // (= (str.++ x "AB" z) (str.++ "A" x y)) --->
520
            //   (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y))
521
114
            std::vector<Node> sfxv0 = pfxv0;
522
114
            std::vector<Node> rpfxv0;
523
59
            if (d_stringsEntail.stripSymbolicLength(
524
                    sfxv0, rpfxv0, 1, lenPfx1, true))
525
            {
526
              // The rewrite requires the full right-hand prefix length to be
527
              // stripped (otherwise we would have to keep parts of the
528
              // right-hand prefix).
529
12
              if (lenPfx1.isConst() && lenPfx1.getConst<Rational>().isZero())
530
              {
531
4
                sfxv0.insert(sfxv0.end(), v0.begin() + i, v0.end());
532
8
                std::vector<Node> sfxv1(v1.begin() + j, v1.end());
533
                Node ret =
534
                    nm->mkNode(kind::AND,
535
8
                               utils::mkConcat(rpfxv0, stype).eqNode(pfx1),
536
8
                               utils::mkConcat(sfxv0, stype)
537
16
                                   .eqNode(utils::mkConcat(sfxv1, stype)));
538
4
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_L);
539
              }
540
            }
541
542
            // If the prefix of the left-hand side is (strictly) longer than
543
            // the prefix of the right-hand side, then we don't need to check
544
            // that right-hand prefix for future left-hand prefixes anymore
545
            // (since they are increasing in length)
546
55
            startRhs = j + 1;
547
          }
548
        }
549
      }
550
    }
551
  }
552
45610
  return node;
553
}
554
555
25356
Node SequencesRewriter::rewriteArithEqualityExt(Node node)
556
{
557
25356
  Assert(node.getKind() == EQUAL && node[0].getType().isInteger());
558
559
  // cases where we can solve the equality
560
561
  // notice we cannot rewrite str.to.int(x)=n to x="n" due to leading zeroes.
562
563
25356
  return node;
564
}
565
566
167750
Node SequencesRewriter::rewriteLength(Node node)
567
{
568
167750
  Assert(node.getKind() == STRING_LENGTH);
569
167750
  NodeManager* nm = NodeManager::currentNM();
570
167750
  Kind nk0 = node[0].getKind();
571
167750
  if (node[0].isConst())
572
  {
573
12124
    Node retNode = nm->mkConst(Rational(Word::getLength(node[0])));
574
6062
    return returnRewrite(node, retNode, Rewrite::LEN_EVAL);
575
  }
576
161688
  else if (nk0 == kind::STRING_CONCAT)
577
  {
578
3527
    Node tmpNode = node[0];
579
3527
    if (tmpNode.getKind() == kind::STRING_CONCAT)
580
    {
581
7054
      std::vector<Node> node_vec;
582
14713
      for (unsigned int i = 0; i < tmpNode.getNumChildren(); ++i)
583
      {
584
11186
        if (tmpNode[i].isConst())
585
        {
586
3059
          node_vec.push_back(
587
6118
              nm->mkConst(Rational(Word::getLength(tmpNode[i]))));
588
        }
589
        else
590
        {
591
8127
          node_vec.push_back(NodeManager::currentNM()->mkNode(
592
              kind::STRING_LENGTH, tmpNode[i]));
593
        }
594
      }
595
7054
      Node retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec);
596
3527
      return returnRewrite(node, retNode, Rewrite::LEN_CONCAT);
597
    }
598
  }
599
158161
  else if (nk0 == STRING_REPLACE || nk0 == STRING_REPLACE_ALL)
600
  {
601
1267
    Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1]));
602
1267
    Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2]));
603
649
    if (len1 == len2)
604
    {
605
      // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x )
606
62
      Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
607
31
      return returnRewrite(node, retNode, Rewrite::LEN_REPL_INV);
608
618
    }
609
  }
610
157512
  else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER || nk0 == STRING_REV
611
157429
           || nk0 == STRING_UPDATE)
612
  {
613
    // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev.
614
    // len( update( x, n, y ) ) = len( x )
615
188
    Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
616
94
    return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV);
617
  }
618
157418
  else if (nk0 == SEQ_UNIT)
619
  {
620
152
    Node retNode = nm->mkConst(Rational(1));
621
76
    return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT);
622
  }
623
157960
  return node;
624
}
625
626
// TODO (#1180) add rewrite
627
//  str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) --->
628
//  str.substr( x, n1, n2+n3 )
629
48628
Node SequencesRewriter::rewriteConcat(Node node)
630
{
631
48628
  Assert(node.getKind() == kind::STRING_CONCAT);
632
97256
  Trace("strings-rewrite-debug")
633
48628
      << "Strings::rewriteConcat start " << node << std::endl;
634
97256
  std::vector<Node> node_vec;
635
97256
  Node preNode = Node::null();
636
224675
  for (Node tmpNode : node)
637
  {
638
176047
    if (tmpNode.getKind() == STRING_CONCAT)
639
    {
640
188
      unsigned j = 0;
641
      // combine the first term with the previous constant if applicable
642
188
      if (!preNode.isNull())
643
      {
644
26
        if (tmpNode[0].isConst())
645
        {
646
16
          std::vector<Node> wvec;
647
8
          wvec.push_back(preNode);
648
8
          wvec.push_back(tmpNode[0]);
649
8
          preNode = Word::mkWordFlatten(wvec);
650
8
          node_vec.push_back(preNode);
651
        }
652
        else
653
        {
654
18
          node_vec.push_back(preNode);
655
18
          node_vec.push_back(tmpNode[0]);
656
        }
657
26
        preNode = Node::null();
658
26
        ++j;
659
      }
660
      // insert the middle terms to node_vec
661
188
      if (j <= tmpNode.getNumChildren() - 1)
662
      {
663
188
        node_vec.insert(node_vec.end(), tmpNode.begin() + j, tmpNode.end() - 1);
664
      }
665
      // take the last term as the current
666
188
      tmpNode = tmpNode[tmpNode.getNumChildren() - 1];
667
    }
668
176047
    if (!tmpNode.isConst())
669
    {
670
86212
      if (!preNode.isNull())
671
      {
672
17412
        if (preNode.isConst() && !Word::isEmpty(preNode))
673
        {
674
16991
          node_vec.push_back(preNode);
675
        }
676
17412
        preNode = Node::null();
677
      }
678
86212
      node_vec.push_back(tmpNode);
679
    }
680
    else
681
    {
682
89835
      if (preNode.isNull())
683
      {
684
41800
        preNode = tmpNode;
685
      }
686
      else
687
      {
688
96070
        std::vector<Node> vec;
689
48035
        vec.push_back(preNode);
690
48035
        vec.push_back(tmpNode);
691
48035
        preNode = Word::mkWordFlatten(vec);
692
      }
693
    }
694
  }
695
48628
  if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode)))
696
  {
697
23619
    node_vec.push_back(preNode);
698
  }
699
700
  // Sort adjacent operands in str.++ that all result in the same string or the
701
  // empty string.
702
  //
703
  // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) -->
704
  // (str.++ ... [sort those 3 arguments] ... )
705
48628
  size_t lastIdx = 0;
706
97256
  Node lastX;
707
176194
  for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++)
708
  {
709
255132
    Node s = d_stringsEntail.getStringOrEmpty(node_vec[i]);
710
127566
    bool nextX = false;
711
127566
    if (s != lastX)
712
    {
713
123349
      nextX = true;
714
    }
715
716
127566
    if (nextX)
717
    {
718
123349
      std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i);
719
123349
      lastX = s;
720
123349
      lastIdx = i;
721
    }
722
  }
723
48628
  std::sort(node_vec.begin() + lastIdx, node_vec.end());
724
725
97256
  TypeNode tn = node.getType();
726
97256
  Node retNode = utils::mkConcat(node_vec, tn);
727
97256
  Trace("strings-rewrite-debug")
728
48628
      << "Strings::rewriteConcat end " << retNode << std::endl;
729
48628
  if (retNode != node)
730
  {
731
15286
    return returnRewrite(node, retNode, Rewrite::CONCAT_NORM);
732
  }
733
33342
  return node;
734
}
735
736
1779
Node SequencesRewriter::rewriteConcatRegExp(TNode node)
737
{
738
1779
  Assert(node.getKind() == kind::REGEXP_CONCAT);
739
1779
  NodeManager* nm = NodeManager::currentNM();
740
3558
  Trace("strings-rewrite-debug")
741
1779
      << "Strings::rewriteConcatRegExp flatten " << node << std::endl;
742
3558
  Node retNode = node;
743
3558
  std::vector<Node> vec;
744
1779
  bool changed = false;
745
3558
  Node emptyRe;
746
747
  // get the string type that are members of this regular expression
748
3558
  TypeNode rtype = node.getType();
749
3558
  TypeNode stype;
750
1779
  if (rtype.isRegExp())
751
  {
752
    // standard regular expressions are for strings
753
1779
    stype = nm->stringType();
754
  }
755
  else
756
  {
757
    Unimplemented();
758
  }
759
760
7141
  for (const Node& c : node)
761
  {
762
5374
    if (c.getKind() == REGEXP_CONCAT)
763
    {
764
127
      changed = true;
765
422
      for (const Node& cc : c)
766
      {
767
295
        vec.push_back(cc);
768
      }
769
    }
770
12593
    else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst()
771
12551
             && Word::isEmpty(c[0]))
772
    {
773
96
      changed = true;
774
96
      emptyRe = c;
775
    }
776
5151
    else if (c.getKind() == REGEXP_EMPTY)
777
    {
778
      // re.++( ..., empty, ... ) ---> empty
779
24
      Node ret = nm->mkNode(REGEXP_EMPTY);
780
12
      return returnRewrite(node, ret, Rewrite::RE_CONCAT_EMPTY);
781
    }
782
    else
783
    {
784
5139
      vec.push_back(c);
785
    }
786
  }
787
1767
  if (changed)
788
  {
789
    // flatten
790
    // this handles nested re.++ and elimination or str.to.re(""), e.g.:
791
    // re.++( re.++( R1, R2 ), str.to.re(""), R3 ) ---> re.++( R1, R2, R3 )
792
209
    if (vec.empty())
793
    {
794
2
      Assert(!emptyRe.isNull());
795
2
      retNode = emptyRe;
796
    }
797
    else
798
    {
799
207
      retNode = vec.size() == 1 ? vec[0] : nm->mkNode(REGEXP_CONCAT, vec);
800
    }
801
209
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_FLATTEN);
802
  }
803
3116
  Trace("strings-rewrite-debug")
804
1558
      << "Strings::rewriteConcatRegExp start " << node << std::endl;
805
3116
  std::vector<Node> cvec;
806
  // the current accumulation of constant strings
807
3116
  std::vector<Node> preReStr;
808
  // whether the last component was (_)*
809
1558
  bool lastAllStar = false;
810
3116
  String emptyStr = String("");
811
  // this loop checks to see if components can be combined or dropped
812
8013
  for (unsigned i = 0, size = vec.size(); i <= size; i++)
813
  {
814
12910
    Node curr;
815
6455
    if (i < size)
816
    {
817
4897
      curr = vec[i];
818
4897
      Assert(curr.getKind() != REGEXP_CONCAT);
819
    }
820
    // update preReStr
821
6455
    if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP)
822
    {
823
1875
      lastAllStar = false;
824
1875
      preReStr.push_back(curr[0]);
825
1875
      curr = Node::null();
826
    }
827
4580
    else if (!preReStr.empty())
828
    {
829
1485
      Assert(!lastAllStar);
830
      // this groups consecutive strings a++b ---> ab
831
2970
      Node acc = nm->mkNode(STRING_TO_REGEXP, utils::mkConcat(preReStr, stype));
832
1485
      cvec.push_back(acc);
833
1485
      preReStr.clear();
834
    }
835
3095
    else if (!curr.isNull() && lastAllStar)
836
    {
837
      // if empty, drop it
838
      // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)*
839
252
      if (RegExpEntail::isConstRegExp(curr)
840
252
          && RegExpEntail::testConstStringInRegExp(emptyStr, 0, curr))
841
      {
842
35
        curr = Node::null();
843
      }
844
    }
845
6455
    if (!curr.isNull())
846
    {
847
2987
      lastAllStar = false;
848
2987
      if (curr.getKind() == REGEXP_STAR)
849
      {
850
        // we can group stars (a)* ++ (a)* ---> (a)*
851
1439
        if (!cvec.empty() && cvec.back() == curr)
852
        {
853
2
          curr = Node::null();
854
        }
855
1437
        else if (curr[0].getKind() == REGEXP_SIGMA)
856
        {
857
605
          Assert(!lastAllStar);
858
605
          lastAllStar = true;
859
          // go back and remove empty ones from back of cvec
860
          // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)*
861
1705
          while (!cvec.empty() && RegExpEntail::isConstRegExp(cvec.back())
862
2298
                 && RegExpEntail::testConstStringInRegExp(
863
477
                     emptyStr, 0, cvec.back()))
864
          {
865
2
            cvec.pop_back();
866
          }
867
        }
868
      }
869
    }
870
6455
    if (!curr.isNull())
871
    {
872
2985
      cvec.push_back(curr);
873
    }
874
  }
875
1558
  Assert(!cvec.empty());
876
1558
  retNode = utils::mkConcat(cvec, rtype);
877
1558
  if (retNode != node)
878
  {
879
    // handles all cases where consecutive re constants are combined or
880
    // dropped as described in the loop above.
881
120
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT);
882
  }
883
884
  // flipping adjacent star arguments
885
1438
  changed = false;
886
4208
  for (size_t i = 0, size = cvec.size() - 1; i < size; i++)
887
  {
888
2770
    if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1])
889
    {
890
      // by convention, flip the order (a*)++a ---> a++(a*)
891
43
      std::swap(cvec[i], cvec[i + 1]);
892
43
      changed = true;
893
    }
894
  }
895
1438
  if (changed)
896
  {
897
30
    retNode = utils::mkConcat(cvec, rtype);
898
30
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_OPT);
899
  }
900
1408
  return node;
901
}
902
903
709
Node SequencesRewriter::rewriteStarRegExp(TNode node)
904
{
905
709
  Assert(node.getKind() == REGEXP_STAR);
906
709
  NodeManager* nm = NodeManager::currentNM();
907
1418
  Node retNode = node;
908
709
  if (node[0].getKind() == REGEXP_STAR)
909
  {
910
    // ((R)*)* ---> R*
911
1
    return returnRewrite(node, node[0], Rewrite::RE_STAR_NESTED_STAR);
912
  }
913
2384
  else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst()
914
2344
           && Word::isEmpty(node[0][0]))
915
  {
916
    // ("")* ---> ""
917
18
    return returnRewrite(node, node[0], Rewrite::RE_STAR_EMPTY_STRING);
918
  }
919
690
  else if (node[0].getKind() == REGEXP_EMPTY)
920
  {
921
    // (empty)* ---> ""
922
4
    retNode = nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")));
923
4
    return returnRewrite(node, retNode, Rewrite::RE_STAR_EMPTY);
924
  }
925
686
  else if (node[0].getKind() == REGEXP_UNION)
926
  {
927
    // simplification of unions under star
928
188
    if (RegExpEntail::hasEpsilonNode(node[0]))
929
    {
930
28
      bool changed = false;
931
28
      std::vector<Node> node_vec;
932
90
      for (const Node& nc : node[0])
933
      {
934
180
        if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst()
935
179
            && Word::isEmpty(nc[0]))
936
        {
937
          // can be removed
938
28
          changed = true;
939
        }
940
        else
941
        {
942
34
          node_vec.push_back(nc);
943
        }
944
      }
945
28
      if (changed)
946
      {
947
28
        retNode = node_vec.size() == 1 ? node_vec[0]
948
                                       : nm->mkNode(REGEXP_UNION, node_vec);
949
28
        retNode = nm->mkNode(REGEXP_STAR, retNode);
950
        // simplification of union beneath star based on loop above
951
        // for example, ( "" | "a" )* ---> ("a")*
952
28
        return returnRewrite(node, retNode, Rewrite::RE_STAR_UNION);
953
      }
954
    }
955
  }
956
658
  return node;
957
}
958
959
835
Node SequencesRewriter::rewriteAndOrRegExp(TNode node)
960
{
961
835
  Kind nk = node.getKind();
962
835
  Assert(nk == REGEXP_UNION || nk == REGEXP_INTER);
963
1670
  Trace("strings-rewrite-debug")
964
835
      << "Strings::rewriteAndOrRegExp start " << node << std::endl;
965
1670
  std::vector<Node> node_vec;
966
1670
  std::vector<Node> polRegExp[2];
967
2744
  for (const Node& ni : node)
968
  {
969
1911
    if (ni.getKind() == nk)
970
    {
971
36
      for (const Node& nic : ni)
972
      {
973
24
        if (std::find(node_vec.begin(), node_vec.end(), nic) == node_vec.end())
974
        {
975
22
          node_vec.push_back(nic);
976
        }
977
      }
978
    }
979
1899
    else if (ni.getKind() == REGEXP_EMPTY)
980
    {
981
59
      if (nk == REGEXP_INTER)
982
      {
983
2
        return returnRewrite(node, ni, Rewrite::RE_AND_EMPTY);
984
      }
985
      // otherwise, can ignore
986
    }
987
1840
    else if (ni.getKind() == REGEXP_STAR && ni[0].getKind() == REGEXP_SIGMA)
988
    {
989
5
      if (nk == REGEXP_UNION)
990
      {
991
        return returnRewrite(node, ni, Rewrite::RE_OR_ALL);
992
      }
993
      // otherwise, can ignore
994
    }
995
1835
    else if (std::find(node_vec.begin(), node_vec.end(), ni) == node_vec.end())
996
    {
997
1799
      node_vec.push_back(ni);
998
1799
      uint32_t pindex = ni.getKind() == REGEXP_COMPLEMENT ? 1 : 0;
999
3598
      Node nia = pindex == 1 ? ni[0] : ni;
1000
1799
      polRegExp[pindex].push_back(nia);
1001
    }
1002
  }
1003
833
  NodeManager* nm = NodeManager::currentNM();
1004
  // use inclusion tests
1005
935
  for (const Node& negMem : polRegExp[1])
1006
  {
1007
167
    for (const Node& posMem : polRegExp[0])
1008
    {
1009
127
      Node m1 = nk == REGEXP_INTER ? negMem : posMem;
1010
127
      Node m2 = nk == REGEXP_INTER ? posMem : negMem;
1011
      // inclusion test for conflicting case m1 contains m2
1012
      // (re.inter (re.comp R1) R2) --> re.none where R1 includes R2
1013
      // (re.union R1 (re.comp R2)) --> (re.* re.allchar) where R1 includes R2
1014
65
      if (RegExpEntail::regExpIncludes(m1, m2))
1015
      {
1016
6
        Node retNode;
1017
3
        if (nk == REGEXP_INTER)
1018
        {
1019
3
          retNode = nm->mkNode(kind::REGEXP_EMPTY);
1020
        }
1021
        else
1022
        {
1023
          retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1024
        }
1025
3
        return returnRewrite(node, retNode, Rewrite::RE_ANDOR_INC_CONFLICT);
1026
      }
1027
    }
1028
  }
1029
1660
  Node retNode;
1030
830
  if (node_vec.empty())
1031
  {
1032
    if (nk == REGEXP_INTER)
1033
    {
1034
      retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1035
    }
1036
    else
1037
    {
1038
      retNode = nm->mkNode(kind::REGEXP_EMPTY);
1039
    }
1040
  }
1041
  else
1042
  {
1043
830
    retNode = node_vec.size() == 1 ? node_vec[0] : nm->mkNode(nk, node_vec);
1044
  }
1045
830
  if (retNode != node)
1046
  {
1047
    // flattening and removing children, based on loop above
1048
90
    return returnRewrite(node, retNode, Rewrite::RE_ANDOR_FLATTEN);
1049
  }
1050
740
  return node;
1051
}
1052
1053
26
Node SequencesRewriter::rewriteLoopRegExp(TNode node)
1054
{
1055
26
  Assert(node.getKind() == REGEXP_LOOP);
1056
52
  Node retNode = node;
1057
52
  Node r = node[0];
1058
26
  if (r.getKind() == REGEXP_STAR)
1059
  {
1060
    return returnRewrite(node, r, Rewrite::RE_LOOP_STAR);
1061
  }
1062
26
  NodeManager* nm = NodeManager::currentNM();
1063
52
  cvc5::Rational rMaxInt(String::maxSize());
1064
26
  uint32_t l = utils::getLoopMinOccurrences(node);
1065
52
  std::vector<Node> vec_nodes;
1066
113
  for (unsigned i = 0; i < l; i++)
1067
  {
1068
87
    vec_nodes.push_back(r);
1069
  }
1070
  Node n =
1071
26
      vec_nodes.size() == 0
1072
27
          ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")))
1073
53
          : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes);
1074
26
  uint32_t u = utils::getLoopMaxOccurrences(node);
1075
26
  if (u < l)
1076
  {
1077
2
    std::vector<Node> nvec;
1078
1
    retNode = nm->mkNode(REGEXP_EMPTY, nvec);
1079
  }
1080
25
  else if (u == l)
1081
  {
1082
10
    retNode = n;
1083
  }
1084
  else
1085
  {
1086
30
    std::vector<Node> vec2;
1087
15
    vec2.push_back(n);
1088
30
    TypeNode rtype = nm->regExpType();
1089
71
    for (uint32_t j = l; j < u; j++)
1090
    {
1091
56
      vec_nodes.push_back(r);
1092
56
      n = utils::mkConcat(vec_nodes, rtype);
1093
56
      vec2.push_back(n);
1094
    }
1095
15
    retNode = nm->mkNode(REGEXP_UNION, vec2);
1096
  }
1097
52
  Trace("strings-lp") << "Strings::lp " << node << " => " << retNode
1098
26
                      << std::endl;
1099
26
  if (retNode != node)
1100
  {
1101
26
    return returnRewrite(node, retNode, Rewrite::RE_LOOP);
1102
  }
1103
  return node;
1104
}
1105
1106
4
Node SequencesRewriter::rewriteRepeatRegExp(TNode node)
1107
{
1108
4
  Assert(node.getKind() == REGEXP_REPEAT);
1109
4
  NodeManager* nm = NodeManager::currentNM();
1110
  // ((_ re.^ n) R) --> ((_ re.loop n n) R)
1111
4
  unsigned r = utils::getRepeatAmount(node);
1112
8
  Node lop = nm->mkConst(RegExpLoop(r, r));
1113
8
  Node retNode = nm->mkNode(REGEXP_LOOP, lop, node[0]);
1114
8
  return returnRewrite(node, retNode, Rewrite::RE_REPEAT_ELIM);
1115
}
1116
1117
25
Node SequencesRewriter::rewriteOptionRegExp(TNode node)
1118
{
1119
25
  Assert(node.getKind() == REGEXP_OPT);
1120
25
  NodeManager* nm = NodeManager::currentNM();
1121
  Node retNode =
1122
      nm->mkNode(REGEXP_UNION,
1123
50
                 nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))),
1124
100
                 node[0]);
1125
50
  return returnRewrite(node, retNode, Rewrite::RE_OPT_ELIM);
1126
}
1127
1128
27
Node SequencesRewriter::rewritePlusRegExp(TNode node)
1129
{
1130
27
  Assert(node.getKind() == REGEXP_PLUS);
1131
27
  NodeManager* nm = NodeManager::currentNM();
1132
  Node retNode =
1133
54
      nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0]));
1134
54
  return returnRewrite(node, retNode, Rewrite::RE_PLUS_ELIM);
1135
}
1136
1137
27
Node SequencesRewriter::rewriteDifferenceRegExp(TNode node)
1138
{
1139
27
  Assert(node.getKind() == REGEXP_DIFF);
1140
27
  NodeManager* nm = NodeManager::currentNM();
1141
  Node retNode =
1142
54
      nm->mkNode(REGEXP_INTER, node[0], nm->mkNode(REGEXP_COMPLEMENT, node[1]));
1143
54
  return returnRewrite(node, retNode, Rewrite::RE_DIFF_ELIM);
1144
}
1145
1146
330
Node SequencesRewriter::rewriteRangeRegExp(TNode node)
1147
{
1148
330
  Assert(node.getKind() == REGEXP_RANGE);
1149
  unsigned ch[2];
1150
990
  for (size_t i = 0; i < 2; ++i)
1151
  {
1152
660
    if (!node[i].isConst() || node[i].getConst<String>().size() != 1)
1153
    {
1154
      // not applied to characters, it is not handled
1155
      return node;
1156
    }
1157
660
    ch[i] = node[i].getConst<String>().front();
1158
  }
1159
1160
330
  NodeManager* nm = NodeManager::currentNM();
1161
330
  if (node[0] == node[1])
1162
  {
1163
    Node retNode = nm->mkNode(STRING_TO_REGEXP, node[0]);
1164
    // re.range( "A", "A" ) ---> str.to_re( "A" )
1165
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_SINGLE);
1166
  }
1167
1168
330
  if (ch[0] > ch[1])
1169
  {
1170
    // re.range( "B", "A" ) ---> re.none
1171
4
    Node retNode = nm->mkNode(REGEXP_EMPTY);
1172
2
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_EMPTY);
1173
  }
1174
328
  return node;
1175
}
1176
1177
9496
Node SequencesRewriter::rewriteMembership(TNode node)
1178
{
1179
9496
  NodeManager* nm = NodeManager::currentNM();
1180
18992
  Node x = node[0];
1181
18992
  Node r = node[1];
1182
1183
18992
  TypeNode stype = x.getType();
1184
18992
  TypeNode rtype = r.getType();
1185
1186
9496
  if(r.getKind() == kind::REGEXP_EMPTY)
1187
  {
1188
32
    Node retNode = NodeManager::currentNM()->mkConst(false);
1189
16
    return returnRewrite(node, retNode, Rewrite::RE_IN_EMPTY);
1190
  }
1191
9480
  else if (x.isConst() && RegExpEntail::isConstRegExp(r))
1192
  {
1193
    // test whether x in node[1]
1194
2924
    cvc5::String s = x.getConst<String>();
1195
1462
    bool test = RegExpEntail::testConstStringInRegExp(s, 0, r);
1196
2924
    Node retNode = NodeManager::currentNM()->mkConst(test);
1197
1462
    return returnRewrite(node, retNode, Rewrite::RE_IN_EVAL);
1198
  }
1199
8018
  else if (r.getKind() == kind::REGEXP_SIGMA)
1200
  {
1201
98
    Node one = nm->mkConst(Rational(1));
1202
98
    Node retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x));
1203
49
    return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA);
1204
  }
1205
7969
  else if (r.getKind() == kind::REGEXP_STAR)
1206
  {
1207
2896
    if (x.isConst())
1208
    {
1209
28
      size_t xlen = Word::getLength(x);
1210
28
      if (xlen == 0)
1211
      {
1212
28
        Node retNode = nm->mkConst(true);
1213
        // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true
1214
14
        return returnRewrite(node, retNode, Rewrite::RE_EMPTY_IN_STR_STAR);
1215
      }
1216
14
      else if (xlen == 1)
1217
      {
1218
14
        if (r[0].getKind() == STRING_TO_REGEXP)
1219
        {
1220
4
          Node retNode = r[0][0].eqNode(x);
1221
          // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x
1222
2
          return returnRewrite(node, retNode, Rewrite::RE_CHAR_IN_STR_STAR);
1223
        }
1224
      }
1225
    }
1226
2868
    else if (x.getKind() == STRING_CONCAT)
1227
    {
1228
      // (str.in.re (str.++ x1 ... xn) (re.* R)) -->
1229
      //   (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R))
1230
      //     if the length of all strings in R is one.
1231
1411
      Node flr = RegExpEntail::getFixedLengthForRegexp(r[0]);
1232
939
      if (!flr.isNull())
1233
      {
1234
543
        Node one = nm->mkConst(Rational(1));
1235
505
        if (flr == one)
1236
        {
1237
934
          NodeBuilder nb(AND);
1238
1895
          for (const Node& xc : x)
1239
          {
1240
1428
            nb << nm->mkNode(STRING_IN_REGEXP, xc, r);
1241
          }
1242
          return returnRewrite(
1243
467
              node, nb.constructNode(), Rewrite::RE_IN_DIST_CHAR_STAR);
1244
        }
1245
      }
1246
    }
1247
2413
    if (r[0].getKind() == kind::REGEXP_SIGMA)
1248
    {
1249
146
      Node retNode = NodeManager::currentNM()->mkConst(true);
1250
73
      return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA_STAR);
1251
    }
1252
  }
1253
5073
  else if (r.getKind() == kind::REGEXP_CONCAT)
1254
  {
1255
3582
    bool allSigma = true;
1256
3582
    bool allSigmaStrict = true;
1257
3582
    unsigned allSigmaMinSize = 0;
1258
7092
    Node constStr;
1259
3582
    size_t constIdx = 0;
1260
3582
    size_t nchildren = r.getNumChildren();
1261
6759
    for (size_t i = 0; i < nchildren; i++)
1262
    {
1263
9477
      Node rc = r[i];
1264
6300
      Assert(rc.getKind() != kind::REGEXP_EMPTY);
1265
6300
      if (rc.getKind() == kind::REGEXP_SIGMA)
1266
      {
1267
610
        allSigmaMinSize++;
1268
      }
1269
5690
      else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA)
1270
      {
1271
1039
        allSigmaStrict = false;
1272
      }
1273
4651
      else if (rc.getKind() == STRING_TO_REGEXP)
1274
      {
1275
1879
        if (constStr.isNull())
1276
        {
1277
1528
          constStr = rc[0];
1278
1528
          constIdx = i;
1279
        }
1280
        else
1281
        {
1282
351
          allSigma = false;
1283
351
          break;
1284
        }
1285
      }
1286
      else
1287
      {
1288
2772
        allSigma = false;
1289
2772
        break;
1290
      }
1291
    }
1292
3582
    if (allSigma)
1293
    {
1294
459
      if (constStr.isNull())
1295
      {
1296
        // x in re.++(_*, _, _) ---> str.len(x) >= 2
1297
28
        Node num = nm->mkConst(Rational(allSigmaMinSize));
1298
28
        Node lenx = nm->mkNode(STRING_LENGTH, x);
1299
28
        Node retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num);
1300
14
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_PURE_ALLCHAR);
1301
      }
1302
445
      else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0
1303
58
               && constIdx != nchildren - 1)
1304
      {
1305
        // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc")
1306
116
        Node retNode = nm->mkNode(STRING_CONTAINS, x, constStr);
1307
58
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_TO_CONTAINS);
1308
      }
1309
    }
1310
  }
1311
2982
  else if (r.getKind() == kind::REGEXP_INTER
1312
1491
           || r.getKind() == kind::REGEXP_UNION)
1313
  {
1314
748
    std::vector<Node> mvec;
1315
1241
    for (unsigned i = 0; i < r.getNumChildren(); i++)
1316
    {
1317
867
      mvec.push_back(
1318
1734
          NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r[i]));
1319
    }
1320
    Node retNode = NodeManager::currentNM()->mkNode(
1321
748
        r.getKind() == kind::REGEXP_INTER ? kind::AND : kind::OR, mvec);
1322
374
    return returnRewrite(node, retNode, Rewrite::RE_IN_ANDOR);
1323
  }
1324
1117
  else if (r.getKind() == kind::STRING_TO_REGEXP)
1325
  {
1326
1216
    Node retNode = x.eqNode(r[0]);
1327
608
    return returnRewrite(node, retNode, Rewrite::RE_IN_CSTRING);
1328
  }
1329
509
  else if (r.getKind() == REGEXP_RANGE)
1330
  {
1331
    // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j
1332
    // we do not do this if the arguments are not constant
1333
375
    if (RegExpEntail::isConstRegExp(r))
1334
    {
1335
750
      Node xcode = nm->mkNode(STRING_TO_CODE, x);
1336
      Node retNode =
1337
          nm->mkNode(AND,
1338
750
                     nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode),
1339
1500
                     nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1])));
1340
375
      return returnRewrite(node, retNode, Rewrite::RE_IN_RANGE);
1341
    }
1342
  }
1343
134
  else if (r.getKind() == REGEXP_COMPLEMENT)
1344
  {
1345
208
    Node retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate();
1346
104
    return returnRewrite(node, retNode, Rewrite::RE_IN_COMPLEMENT);
1347
  }
1348
1349
  // do simple consumes
1350
11760
  Node retNode = node;
1351
5880
  if (r.getKind() == kind::REGEXP_STAR)
1352
  {
1353
6862
    for (unsigned dir = 0; dir <= 1; dir++)
1354
    {
1355
9190
      std::vector<Node> mchildren;
1356
4668
      utils::getConcat(x, mchildren);
1357
4668
      bool success = true;
1358
14220
      while (success)
1359
      {
1360
4796
        success = false;
1361
9572
        std::vector<Node> children;
1362
4796
        utils::getConcat(r[0], children);
1363
9572
        Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children, dir);
1364
4796
        if (!scn.isNull())
1365
        {
1366
40
          Trace("regexp-ext-rewrite")
1367
20
              << "Regexp star : const conflict : " << node << std::endl;
1368
20
          return returnRewrite(node, scn, Rewrite::RE_CONSUME_S_CCONF);
1369
        }
1370
4776
        else if (children.empty())
1371
        {
1372
          // fully consumed one copy of the STAR
1373
130
          if (mchildren.empty())
1374
          {
1375
            Trace("regexp-ext-rewrite")
1376
                << "Regexp star : full consume : " << node << std::endl;
1377
            Node ret = NodeManager::currentNM()->mkConst(true);
1378
            return returnRewrite(node, ret, Rewrite::RE_CONSUME_S_FULL);
1379
          }
1380
          else
1381
          {
1382
260
            Node prev = retNode;
1383
390
            retNode = nm->mkNode(
1384
260
                STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r);
1385
            // Iterate again if the node changed. It may not have changed if
1386
            // nothing was consumed from mchildren (e.g. if the body of the
1387
            // re.* accepts the empty string.
1388
130
            success = (retNode != prev);
1389
          }
1390
        }
1391
      }
1392
4648
      if (retNode != node)
1393
      {
1394
252
        Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node
1395
126
                                    << " -> " << retNode << std::endl;
1396
126
        return returnRewrite(node, retNode, Rewrite::RE_CONSUME_S);
1397
      }
1398
    }
1399
  }
1400
  else
1401
  {
1402
6346
    std::vector<Node> children;
1403
3540
    utils::getConcat(r, children);
1404
6346
    std::vector<Node> mchildren;
1405
3540
    utils::getConcat(x, mchildren);
1406
3540
    unsigned prevSize = children.size() + mchildren.size();
1407
6346
    Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children);
1408
3540
    if (!scn.isNull())
1409
    {
1410
210
      Trace("regexp-ext-rewrite")
1411
105
          << "Regexp : const conflict : " << node << std::endl;
1412
105
      return returnRewrite(node, scn, Rewrite::RE_CONSUME_CCONF);
1413
    }
1414
3435
    else if ((children.size() + mchildren.size()) != prevSize)
1415
    {
1416
      // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm),
1417
      // above, we strip components to construct an equivalent membership:
1418
      // (str.++ xi .. xj) in (re.++ rk ... rl).
1419
1258
      Node xn = utils::mkConcat(mchildren, stype);
1420
1258
      Node emptyStr = Word::mkEmptyWord(stype);
1421
629
      if (children.empty())
1422
      {
1423
        // If we stripped all components on the right, then the left is
1424
        // equal to the empty string.
1425
        // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "")
1426
69
        retNode = xn.eqNode(emptyStr);
1427
      }
1428
      else
1429
      {
1430
        // otherwise, construct the updated regular expression
1431
560
        retNode =
1432
1120
            nm->mkNode(STRING_IN_REGEXP, xn, utils::mkConcat(children, rtype));
1433
      }
1434
1258
      Trace("regexp-ext-rewrite")
1435
629
          << "Regexp : rewrite : " << node << " -> " << retNode << std::endl;
1436
629
      return returnRewrite(node, retNode, Rewrite::RE_SIMPLE_CONSUME);
1437
    }
1438
  }
1439
5000
  return node;
1440
}
1441
1442
419083
RewriteResponse SequencesRewriter::postRewrite(TNode node)
1443
{
1444
838166
  Trace("sequences-postrewrite")
1445
419083
      << "Strings::SequencesRewriter::postRewrite start " << node << std::endl;
1446
838166
  Node retNode = node;
1447
419083
  Kind nk = node.getKind();
1448
419083
  if (nk == kind::STRING_CONCAT)
1449
  {
1450
48628
    retNode = rewriteConcat(node);
1451
  }
1452
370455
  else if (nk == kind::EQUAL)
1453
  {
1454
93637
    retNode = rewriteEquality(node);
1455
  }
1456
276818
  else if (nk == kind::STRING_LENGTH)
1457
  {
1458
167750
    retNode = rewriteLength(node);
1459
  }
1460
109068
  else if (nk == kind::STRING_CHARAT)
1461
  {
1462
71
    retNode = rewriteCharAt(node);
1463
  }
1464
108997
  else if (nk == kind::STRING_SUBSTR)
1465
  {
1466
41991
    retNode = rewriteSubstr(node);
1467
  }
1468
67006
  else if (nk == kind::STRING_UPDATE)
1469
  {
1470
165
    retNode = rewriteUpdate(node);
1471
  }
1472
66841
  else if (nk == kind::STRING_CONTAINS)
1473
  {
1474
21832
    retNode = rewriteContains(node);
1475
  }
1476
45009
  else if (nk == kind::STRING_INDEXOF)
1477
  {
1478
3242
    retNode = rewriteIndexof(node);
1479
  }
1480
41767
  else if (nk == kind::STRING_INDEXOF_RE)
1481
  {
1482
504
    retNode = rewriteIndexofRe(node);
1483
  }
1484
41263
  else if (nk == kind::STRING_REPLACE)
1485
  {
1486
2664
    retNode = rewriteReplace(node);
1487
  }
1488
38599
  else if (nk == kind::STRING_REPLACE_ALL)
1489
  {
1490
202
    retNode = rewriteReplaceAll(node);
1491
  }
1492
38397
  else if (nk == kind::STRING_REPLACE_RE)
1493
  {
1494
283
    retNode = rewriteReplaceRe(node);
1495
  }
1496
38114
  else if (nk == kind::STRING_REPLACE_RE_ALL)
1497
  {
1498
186
    retNode = rewriteReplaceReAll(node);
1499
  }
1500
37928
  else if (nk == STRING_REV)
1501
  {
1502
176
    retNode = rewriteStrReverse(node);
1503
  }
1504
37752
  else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX)
1505
  {
1506
98
    retNode = rewritePrefixSuffix(node);
1507
  }
1508
37654
  else if (nk == kind::STRING_IN_REGEXP)
1509
  {
1510
9496
    retNode = rewriteMembership(node);
1511
  }
1512
28158
  else if (nk == REGEXP_CONCAT)
1513
  {
1514
1779
    retNode = rewriteConcatRegExp(node);
1515
  }
1516
26379
  else if (nk == REGEXP_UNION || nk == REGEXP_INTER)
1517
  {
1518
835
    retNode = rewriteAndOrRegExp(node);
1519
  }
1520
25544
  else if (nk == REGEXP_DIFF)
1521
  {
1522
27
    retNode = rewriteDifferenceRegExp(node);
1523
  }
1524
25517
  else if (nk == REGEXP_STAR)
1525
  {
1526
709
    retNode = rewriteStarRegExp(node);
1527
  }
1528
24808
  else if (nk == REGEXP_PLUS)
1529
  {
1530
27
    retNode = rewritePlusRegExp(node);
1531
  }
1532
24781
  else if (nk == REGEXP_OPT)
1533
  {
1534
25
    retNode = rewriteOptionRegExp(node);
1535
  }
1536
24756
  else if (nk == REGEXP_RANGE)
1537
  {
1538
330
    retNode = rewriteRangeRegExp(node);
1539
  }
1540
24426
  else if (nk == REGEXP_LOOP)
1541
  {
1542
26
    retNode = rewriteLoopRegExp(node);
1543
  }
1544
24400
  else if (nk == REGEXP_REPEAT)
1545
  {
1546
4
    retNode = rewriteRepeatRegExp(node);
1547
  }
1548
24396
  else if (nk == SEQ_UNIT)
1549
  {
1550
240
    retNode = rewriteSeqUnit(node);
1551
  }
1552
24156
  else if (nk == SEQ_NTH || nk == SEQ_NTH_TOTAL)
1553
  {
1554
140
    retNode = rewriteSeqNth(node);
1555
  }
1556
1557
838166
  Trace("sequences-postrewrite")
1558
419083
      << "Strings::SequencesRewriter::postRewrite returning " << retNode
1559
419083
      << std::endl;
1560
419083
  if (node != retNode)
1561
  {
1562
    // also post process the rewrite, which may apply extended rewriting to
1563
    // equalities, if we rewrite to an equality from a non-equality
1564
77543
    retNode = postProcessRewrite(node, retNode);
1565
155086
    Trace("strings-rewrite-debug") << "Strings::SequencesRewriter::postRewrite "
1566
77543
                                   << node << " to " << retNode << std::endl;
1567
77543
    return RewriteResponse(REWRITE_AGAIN_FULL, retNode);
1568
  }
1569
341540
  Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
1570
341540
  return RewriteResponse(REWRITE_DONE, retNode);
1571
}
1572
1573
259691
RewriteResponse SequencesRewriter::preRewrite(TNode node)
1574
{
1575
259691
  return RewriteResponse(REWRITE_DONE, node);
1576
}
1577
1578
717
TrustNode SequencesRewriter::expandDefinition(Node node)
1579
{
1580
1434
  Trace("strings-exp-def") << "SequencesRewriter::expandDefinition : " << node
1581
717
                           << std::endl;
1582
1583
717
  if (node.getKind() == kind::SEQ_NTH)
1584
  {
1585
2
    NodeManager* nm = NodeManager::currentNM();
1586
4
    Node s = node[0];
1587
4
    Node n = node[1];
1588
    // seq.nth(s, n) --> ite(0 <= n < len(s), seq.nth_total(s,n), Uf(s, n))
1589
    Node cond = nm->mkNode(AND,
1590
4
                           nm->mkNode(LEQ, nm->mkConst(Rational(0)), n),
1591
8
                           nm->mkNode(LT, n, nm->mkNode(STRING_LENGTH, s)));
1592
4
    Node ss = nm->mkNode(SEQ_NTH_TOTAL, s, n);
1593
4
    Node uf = SkolemCache::mkSkolemSeqNth(s.getType(), "Uf");
1594
4
    Node u = nm->mkNode(APPLY_UF, uf, s, n);
1595
4
    Node ret = nm->mkNode(ITE, cond, ss, u);
1596
2
    Trace("strings-exp-def") << "...return " << ret << std::endl;
1597
2
    return TrustNode::mkTrustRewrite(node, ret, nullptr);
1598
  }
1599
715
  return TrustNode::null();
1600
}
1601
1602
140
Node SequencesRewriter::rewriteSeqNth(Node node)
1603
{
1604
140
  Assert(node.getKind() == SEQ_NTH || node.getKind() == SEQ_NTH_TOTAL);
1605
280
  Node s = node[0];
1606
280
  Node i = node[1];
1607
140
  if (s.isConst() && i.isConst())
1608
  {
1609
18
    size_t len = Word::getLength(s);
1610
18
    if (i.getConst<Rational>().sgn() != -1)
1611
    {
1612
18
      size_t pos = i.getConst<Rational>().getNumerator().toUnsignedInt();
1613
18
      if (pos < len)
1614
      {
1615
8
        std::vector<Node> elements = s.getConst<Sequence>().getVec();
1616
4
        const Node& ret = elements[pos];
1617
4
        return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL);
1618
      }
1619
    }
1620
14
    if (node.getKind() == SEQ_NTH_TOTAL)
1621
    {
1622
      // return arbitrary term
1623
4
      Node ret = s.getType().getSequenceElementType().mkGroundValue();
1624
2
      return returnRewrite(node, ret, Rewrite::SEQ_NTH_TOTAL_OOB);
1625
    }
1626
    else
1627
    {
1628
12
      return node;
1629
    }
1630
  }
1631
  else
1632
  {
1633
122
    return node;
1634
  }
1635
}
1636
1637
71
Node SequencesRewriter::rewriteCharAt(Node node)
1638
{
1639
71
  Assert(node.getKind() == STRING_CHARAT);
1640
71
  NodeManager* nm = NodeManager::currentNM();
1641
142
  Node one = nm->mkConst(Rational(1));
1642
142
  Node retNode = nm->mkNode(STRING_SUBSTR, node[0], node[1], one);
1643
142
  return returnRewrite(node, retNode, Rewrite::CHARAT_ELIM);
1644
}
1645
1646
42003
Node SequencesRewriter::rewriteSubstr(Node node)
1647
{
1648
42003
  Assert(node.getKind() == kind::STRING_SUBSTR);
1649
1650
42003
  NodeManager* nm = NodeManager::currentNM();
1651
42003
  if (node[0].isConst())
1652
  {
1653
10228
    if (Word::isEmpty(node[0]))
1654
    {
1655
1688
      Node ret = node[0];
1656
844
      return returnRewrite(node, ret, Rewrite::SS_EMPTYSTR);
1657
    }
1658
    // rewriting for constant arguments
1659
9384
    if (node[1].isConst() && node[2].isConst())
1660
    {
1661
10188
      Node s = node[0];
1662
10188
      cvc5::Rational rMaxInt(String::maxSize());
1663
      uint32_t start;
1664
5094
      if (node[1].getConst<Rational>() > rMaxInt)
1665
      {
1666
        // start beyond the maximum size of strings
1667
        // thus, it must be beyond the end point of this string
1668
2
        Node ret = Word::mkEmptyWord(node.getType());
1669
1
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_MAX_OOB);
1670
      }
1671
5093
      else if (node[1].getConst<Rational>().sgn() < 0)
1672
      {
1673
        // start before the beginning of the string
1674
1036
        Node ret = Word::mkEmptyWord(node.getType());
1675
518
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_NEG);
1676
      }
1677
      else
1678
      {
1679
4575
        start = node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1680
4575
        if (start >= Word::getLength(node[0]))
1681
        {
1682
          // start beyond the end of the string
1683
774
          Node ret = Word::mkEmptyWord(node.getType());
1684
387
          return returnRewrite(node, ret, Rewrite::SS_CONST_START_OOB);
1685
        }
1686
      }
1687
4188
      if (node[2].getConst<Rational>() > rMaxInt)
1688
      {
1689
        // take up to the end of the string
1690
1
        size_t lenS = Word::getLength(s);
1691
2
        Node ret = Word::suffix(s, lenS - start);
1692
1
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_MAX_OOB);
1693
      }
1694
4187
      else if (node[2].getConst<Rational>().sgn() <= 0)
1695
      {
1696
660
        Node ret = Word::mkEmptyWord(node.getType());
1697
330
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_NON_POS);
1698
      }
1699
      else
1700
      {
1701
        uint32_t len =
1702
3857
            node[2].getConst<Rational>().getNumerator().toUnsignedInt();
1703
3857
        if (start + len > Word::getLength(node[0]))
1704
        {
1705
          // take up to the end of the string
1706
335
          size_t lenS = Word::getLength(s);
1707
670
          Node ret = Word::suffix(s, lenS - start);
1708
335
          return returnRewrite(node, ret, Rewrite::SS_CONST_END_OOB);
1709
        }
1710
        else
1711
        {
1712
          // compute the substr using the constant string
1713
7044
          Node ret = Word::substr(s, start, len);
1714
3522
          return returnRewrite(node, ret, Rewrite::SS_CONST_SS);
1715
        }
1716
      }
1717
    }
1718
  }
1719
72130
  Node zero = nm->mkConst(cvc5::Rational(0));
1720
1721
  // if entailed non-positive length or negative start point
1722
36065
  if (d_arithEntail.check(zero, node[1], true))
1723
  {
1724
56
    Node ret = Word::mkEmptyWord(node.getType());
1725
28
    return returnRewrite(node, ret, Rewrite::SS_START_NEG);
1726
  }
1727
36037
  else if (d_arithEntail.check(zero, node[2]))
1728
  {
1729
1050
    Node ret = Word::mkEmptyWord(node.getType());
1730
525
    return returnRewrite(node, ret, Rewrite::SS_LEN_NON_POS);
1731
  }
1732
1733
35512
  if (node[0].getKind() == STRING_SUBSTR)
1734
  {
1735
    // (str.substr (str.substr x a b) c d) ---> "" if c >= b
1736
    //
1737
    // Note that this rewrite can be generalized to:
1738
    //
1739
    // (str.substr x a b) ---> "" if a >= (str.len x)
1740
    //
1741
    // This can be done when we generalize our entailment methods to
1742
    // accept an optional context. Then we could conjecture that
1743
    // (str.substr x a b) rewrites to "" and do a case analysis:
1744
    //
1745
    // - a < 0 or b < 0 (the result is trivially empty in these cases)
1746
    // - a >= (str.len x) assuming that { a >= 0, b >= 0 }
1747
    //
1748
    // For example, for (str.substr (str.substr x a a) a a), we could
1749
    // then deduce that under those assumptions, "a" is an
1750
    // over-approximation of the length of (str.substr x a a), which
1751
    // then allows us to reason that the result of the whole term must
1752
    // be empty.
1753
2725
    if (d_arithEntail.check(node[1], node[0][2]))
1754
    {
1755
50
      Node ret = Word::mkEmptyWord(node.getType());
1756
25
      return returnRewrite(node, ret, Rewrite::SS_START_GEQ_LEN);
1757
    }
1758
  }
1759
32787
  else if (node[0].getKind() == STRING_REPLACE)
1760
  {
1761
    // (str.substr (str.replace x y z) 0 n)
1762
    // 	 ---> (str.replace (str.substr x 0 n) y z)
1763
    // if (str.len y) = 1 and (str.len z) = 1
1764
190
    if (node[1] == zero)
1765
    {
1766
312
      if (d_stringsEntail.checkLengthOne(node[0][1], true)
1767
312
          && d_stringsEntail.checkLengthOne(node[0][2], true))
1768
      {
1769
        Node ret = nm->mkNode(
1770
            kind::STRING_REPLACE,
1771
8
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]),
1772
            node[0][1],
1773
16
            node[0][2]);
1774
4
        return returnRewrite(node, ret, Rewrite::SUBSTR_REPL_SWAP);
1775
      }
1776
    }
1777
  }
1778
1779
70966
  std::vector<Node> n1;
1780
35483
  utils::getConcat(node[0], n1);
1781
70966
  TypeNode stype = node.getType();
1782
1783
  // definite inclusion
1784
35483
  if (node[1] == zero)
1785
  {
1786
23302
    Node curr = node[2];
1787
23302
    std::vector<Node> childrenr;
1788
12185
    if (d_stringsEntail.stripSymbolicLength(n1, childrenr, 1, curr))
1789
    {
1790
1068
      if (curr != zero && !n1.empty())
1791
      {
1792
1341
        childrenr.push_back(nm->mkNode(
1793
894
            kind::STRING_SUBSTR, utils::mkConcat(n1, stype), node[1], curr));
1794
      }
1795
2136
      Node ret = utils::mkConcat(childrenr, stype);
1796
1068
      return returnRewrite(node, ret, Rewrite::SS_LEN_INCLUDE);
1797
    }
1798
  }
1799
1800
  // symbolic length analysis
1801
95898
  for (unsigned r = 0; r < 2; r++)
1802
  {
1803
    // the amount of characters we can strip
1804
126901
    Node curr;
1805
65418
    if (r == 0)
1806
    {
1807
34415
      if (node[1] != zero)
1808
      {
1809
        // strip up to start point off the start of the string
1810
23298
        curr = node[1];
1811
      }
1812
    }
1813
31003
    else if (r == 1)
1814
    {
1815
      Node tot_len =
1816
61854
          Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, node[0]));
1817
61854
      Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2]));
1818
31003
      if (node[2] != tot_len)
1819
      {
1820
30011
        if (d_arithEntail.check(node[2], tot_len))
1821
        {
1822
          // end point beyond end point of string, map to tot_len
1823
246
          Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len);
1824
123
          return returnRewrite(node, ret, Rewrite::SS_END_PT_NORM);
1825
        }
1826
        else
1827
        {
1828
          // strip up to ( str.len(node[0]) - end_pt ) off the end of the string
1829
29888
          curr = Rewriter::rewrite(nm->mkNode(kind::MINUS, tot_len, end_pt));
1830
        }
1831
      }
1832
1833
      // (str.substr s x y) --> "" if x < len(s) |= 0 >= y
1834
      Node n1_lt_tot_len =
1835
61731
          Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len));
1836
30880
      if (d_arithEntail.checkWithAssumption(
1837
              n1_lt_tot_len, zero, node[2], false))
1838
      {
1839
40
        Node ret = Word::mkEmptyWord(node.getType());
1840
20
        return returnRewrite(node, ret, Rewrite::SS_START_ENTAILS_ZERO_LEN);
1841
      }
1842
1843
      // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s)
1844
      Node non_zero_len =
1845
61711
          Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2]));
1846
30860
      if (d_arithEntail.checkWithAssumption(
1847
              non_zero_len, node[1], tot_len, false))
1848
      {
1849
4
        Node ret = Word::mkEmptyWord(node.getType());
1850
2
        return returnRewrite(node, ret, Rewrite::SS_NON_ZERO_LEN_ENTAILS_OOB);
1851
      }
1852
1853
      // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s)
1854
      Node geq_zero_start =
1855
61709
          Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero));
1856
30858
      if (d_arithEntail.checkWithAssumption(
1857
              geq_zero_start, zero, tot_len, false))
1858
      {
1859
4
        Node ret = Word::mkEmptyWord(node.getType());
1860
        return returnRewrite(
1861
2
            node, ret, Rewrite::SS_GEQ_ZERO_START_ENTAILS_EMP_S);
1862
      }
1863
1864
      // (str.substr s x x) ---> "" if (str.len s) <= 1
1865
30856
      if (node[1] == node[2] && d_stringsEntail.checkLengthOne(node[0]))
1866
      {
1867
10
        Node ret = Word::mkEmptyWord(node.getType());
1868
5
        return returnRewrite(node, ret, Rewrite::SS_LEN_ONE_Z_Z);
1869
      }
1870
    }
1871
65266
    if (!curr.isNull())
1872
    {
1873
      // strip off components while quantity is entailed positive
1874
53157
      int dir = r == 0 ? 1 : -1;
1875
102531
      std::vector<Node> childrenr;
1876
53157
      if (d_stringsEntail.stripSymbolicLength(n1, childrenr, dir, curr))
1877
      {
1878
3783
        if (r == 0)
1879
        {
1880
          Node ret = nm->mkNode(
1881
6824
              kind::STRING_SUBSTR, utils::mkConcat(n1, stype), curr, node[2]);
1882
3412
          return returnRewrite(node, ret, Rewrite::SS_STRIP_START_PT);
1883
        }
1884
        else
1885
        {
1886
          Node ret = nm->mkNode(kind::STRING_SUBSTR,
1887
742
                                utils::mkConcat(n1, stype),
1888
                                node[1],
1889
1484
                                node[2]);
1890
371
          return returnRewrite(node, ret, Rewrite::SS_STRIP_END_PT);
1891
        }
1892
      }
1893
    }
1894
  }
1895
  // combine substr
1896
30480
  if (node[0].getKind() == kind::STRING_SUBSTR)
1897
  {
1898
4656
    Node start_inner = node[0][1];
1899
4656
    Node start_outer = node[1];
1900
2514
    if (d_arithEntail.check(start_outer) && d_arithEntail.check(start_inner))
1901
    {
1902
      // both are positive
1903
      // thus, start point is definitely start_inner+start_outer.
1904
      // We can rewrite if it for certain what the length is
1905
1906
      // the length of a string from the inner substr subtracts the start point
1907
      // of the outer substr
1908
      Node len_from_inner =
1909
2992
          Rewriter::rewrite(nm->mkNode(kind::MINUS, node[0][2], start_outer));
1910
2992
      Node len_from_outer = node[2];
1911
2992
      Node new_len;
1912
      // take quantity that is for sure smaller than the other
1913
1682
      if (len_from_inner == len_from_outer)
1914
      {
1915
12
        new_len = len_from_inner;
1916
      }
1917
1670
      else if (d_arithEntail.check(len_from_inner, len_from_outer))
1918
      {
1919
287
        new_len = len_from_outer;
1920
      }
1921
1383
      else if (d_arithEntail.check(len_from_outer, len_from_inner))
1922
      {
1923
73
        new_len = len_from_inner;
1924
      }
1925
1682
      if (!new_len.isNull())
1926
      {
1927
744
        Node new_start = nm->mkNode(kind::PLUS, start_inner, start_outer);
1928
        Node ret =
1929
744
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], new_start, new_len);
1930
372
        return returnRewrite(node, ret, Rewrite::SS_COMBINE);
1931
      }
1932
    }
1933
  }
1934
30108
  return node;
1935
}
1936
1937
165
Node SequencesRewriter::rewriteUpdate(Node node)
1938
{
1939
165
  Assert(node.getKind() == kind::STRING_UPDATE);
1940
330
  Node s = node[0];
1941
165
  if (s.isConst())
1942
  {
1943
153
    if (Word::isEmpty(s))
1944
    {
1945
      return returnRewrite(node, s, Rewrite::UPD_EMPTYSTR);
1946
    }
1947
    // rewriting for constant arguments
1948
153
    if (node[1].isConst())
1949
    {
1950
127
      cvc5::Rational rMaxInt(String::maxSize());
1951
113
      if (node[1].getConst<Rational>() > rMaxInt)
1952
      {
1953
        // start beyond the maximum size of strings
1954
        // thus, it must be beyond the end point of this string
1955
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_MAX_OOB);
1956
      }
1957
113
      else if (node[1].getConst<Rational>().sgn() < 0)
1958
      {
1959
        // start before the beginning of the string
1960
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_NEG);
1961
      }
1962
      uint32_t start =
1963
113
          node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1964
113
      size_t len = Word::getLength(s);
1965
113
      if (start >= len)
1966
      {
1967
        // start beyond the end of the string
1968
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_OOB);
1969
      }
1970
113
      if (node[2].isConst())
1971
      {
1972
198
        Node ret = Word::update(s, start, node[2]);
1973
99
        return returnRewrite(node, ret, Rewrite::UPD_EVAL);
1974
      }
1975
    }
1976
  }
1977
1978
66
  return node;
1979
}
1980
1981
122422
Node SequencesRewriter::rewriteContains(Node node)
1982
{
1983
122422
  Assert(node.getKind() == kind::STRING_CONTAINS);
1984
122422
  NodeManager* nm = NodeManager::currentNM();
1985
1986
122422
  if (node[0] == node[1])
1987
  {
1988
982
    Node ret = NodeManager::currentNM()->mkConst(true);
1989
491
    return returnRewrite(node, ret, Rewrite::CTN_EQ);
1990
  }
1991
121931
  if (node[0].isConst())
1992
  {
1993
64326
    if (node[1].isConst())
1994
    {
1995
9440
      Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos);
1996
4720
      return returnRewrite(node, ret, Rewrite::CTN_CONST);
1997
    }
1998
    else
1999
    {
2000
108552
      Node t = node[1];
2001
59606
      if (Word::isEmpty(node[0]))
2002
      {
2003
        Node len1 =
2004
56055
            NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2005
28093
        if (d_arithEntail.check(len1, true))
2006
        {
2007
          // we handle the false case here since the rewrite for equality
2008
          // uses this function, hence we want to conclude false if possible.
2009
          // len(x)>0 => contains( "", x ) ---> false
2010
262
          Node ret = NodeManager::currentNM()->mkConst(false);
2011
131
          return returnRewrite(node, ret, Rewrite::CTN_LHS_EMPTYSTR);
2012
        }
2013
      }
2014
31513
      else if (d_stringsEntail.checkLengthOne(t))
2015
      {
2016
21024
        std::vector<Node> vec = Word::getChars(node[0]);
2017
21024
        Node emp = Word::mkEmptyWord(t.getType());
2018
21024
        NodeBuilder nb(OR);
2019
10512
        nb << emp.eqNode(t);
2020
22340
        for (const Node& c : vec)
2021
        {
2022
11828
          Assert(c.getType() == t.getType());
2023
11828
          nb << c.eqNode(t);
2024
        }
2025
2026
        // str.contains("ABCabc", t) --->
2027
        // t = "" v t = "A" v t = "B" v t = "C" v t = "a" v t = "b" v t = "c"
2028
        // if len(t) <= 1
2029
21024
        Node ret = nb;
2030
10512
        return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2031
      }
2032
21001
      else if (node[1].getKind() == kind::STRING_CONCAT)
2033
      {
2034
        int firstc, lastc;
2035
600
        if (!d_stringsEntail.canConstantContainConcat(
2036
                node[0], node[1], firstc, lastc))
2037
        {
2038
34
          Node ret = NodeManager::currentNM()->mkConst(false);
2039
17
          return returnRewrite(node, ret, Rewrite::CTN_NCONST_CTN_CONCAT);
2040
        }
2041
      }
2042
    }
2043
  }
2044
106551
  if (node[1].isConst())
2045
  {
2046
53699
    size_t len = Word::getLength(node[1]);
2047
53699
    if (len == 0)
2048
    {
2049
      // contains( x, "" ) ---> true
2050
55820
      Node ret = NodeManager::currentNM()->mkConst(true);
2051
27910
      return returnRewrite(node, ret, Rewrite::CTN_RHS_EMPTYSTR);
2052
    }
2053
25789
    else if (len == 1)
2054
    {
2055
      // The following rewrites are specific to a single character second
2056
      // argument of contains, where we can reason that this character is
2057
      // not split over multiple components in the first argument.
2058
19562
      if (node[0].getKind() == STRING_CONCAT)
2059
      {
2060
1332
        std::vector<Node> nc1;
2061
666
        utils::getConcat(node[0], nc1);
2062
1332
        NodeBuilder nb(OR);
2063
2460
        for (const Node& ncc : nc1)
2064
        {
2065
1794
          nb << nm->mkNode(STRING_CONTAINS, ncc, node[1]);
2066
        }
2067
1332
        Node ret = nb.constructNode();
2068
        // str.contains( x ++ y, "A" ) --->
2069
        //   str.contains( x, "A" ) OR str.contains( y, "A" )
2070
666
        return returnRewrite(node, ret, Rewrite::CTN_CONCAT_CHAR);
2071
      }
2072
18896
      else if (node[0].getKind() == STRING_REPLACE)
2073
      {
2074
1080
        Node rplDomain = d_stringsEntail.checkContains(node[0][1], node[1]);
2075
592
        if (!rplDomain.isNull() && !rplDomain.getConst<bool>())
2076
        {
2077
208
          Node d1 = nm->mkNode(STRING_CONTAINS, node[0][0], node[1]);
2078
          Node d2 =
2079
              nm->mkNode(AND,
2080
208
                         nm->mkNode(STRING_CONTAINS, node[0][0], node[0][1]),
2081
416
                         nm->mkNode(STRING_CONTAINS, node[0][2], node[1]));
2082
208
          Node ret = nm->mkNode(OR, d1, d2);
2083
          // If str.contains( y, "A" ) ---> false, then:
2084
          // str.contains( str.replace( x, y, z ), "A" ) --->
2085
          //   str.contains( x, "A" ) OR
2086
          //   ( str.contains( x, y ) AND str.contains( z, "A" ) )
2087
104
          return returnRewrite(node, ret, Rewrite::CTN_REPL_CHAR);
2088
        }
2089
      }
2090
    }
2091
  }
2092
155742
  std::vector<Node> nc1;
2093
77871
  utils::getConcat(node[0], nc1);
2094
155742
  std::vector<Node> nc2;
2095
77871
  utils::getConcat(node[1], nc2);
2096
2097
  // component-wise containment
2098
155742
  std::vector<Node> nc1rb;
2099
155742
  std::vector<Node> nc1re;
2100
77871
  if (d_stringsEntail.componentContains(nc1, nc2, nc1rb, nc1re) != -1)
2101
  {
2102
2786
    Node ret = NodeManager::currentNM()->mkConst(true);
2103
1393
    return returnRewrite(node, ret, Rewrite::CTN_COMPONENT);
2104
  }
2105
152956
  TypeNode stype = node[0].getType();
2106
2107
  // strip endpoints
2108
152956
  std::vector<Node> nb;
2109
152956
  std::vector<Node> ne;
2110
76478
  if (d_stringsEntail.stripConstantEndpoints(nc1, nc2, nb, ne))
2111
  {
2112
    Node ret = NodeManager::currentNM()->mkNode(
2113
214
        kind::STRING_CONTAINS, utils::mkConcat(nc1, stype), node[1]);
2114
107
    return returnRewrite(node, ret, Rewrite::CTN_STRIP_ENDPT);
2115
  }
2116
2117
154395
  for (const Node& n : nc2)
2118
  {
2119
78388
    if (n.getKind() == kind::STRING_REPLACE)
2120
    {
2121
      // (str.contains x (str.replace y z w)) --> false
2122
      // if (str.contains x y) = false and (str.contains x w) = false
2123
      //
2124
      // Reasoning: (str.contains x y) checks that x does not contain y if the
2125
      // replacement does not change y. (str.contains x w) checks that if the
2126
      // replacement changes anything in y, the w makes it impossible for it to
2127
      // occur in x.
2128
2882
      Node ctnConst = d_stringsEntail.checkContains(node[0], n[0]);
2129
1623
      if (!ctnConst.isNull() && !ctnConst.getConst<bool>())
2130
      {
2131
500
        Node ctnConst2 = d_stringsEntail.checkContains(node[0], n[2]);
2132
258
        if (!ctnConst2.isNull() && !ctnConst2.getConst<bool>())
2133
        {
2134
32
          Node res = nm->mkConst(false);
2135
16
          return returnRewrite(node, res, Rewrite::CTN_RPL_NON_CTN);
2136
        }
2137
      }
2138
2139
      // (str.contains x (str.++ w (str.replace x y x) z)) --->
2140
      //   (and (= w "") (= x (str.replace x y x)) (= z ""))
2141
      //
2142
      // TODO: Remove with under-/over-approximation
2143
1607
      if (node[0] == n[0] && node[0] == n[2])
2144
      {
2145
696
        Node ret;
2146
348
        if (nc2.size() > 1)
2147
        {
2148
20
          Node emp = Word::mkEmptyWord(stype);
2149
20
          NodeBuilder nb2(kind::AND);
2150
30
          for (const Node& n2 : nc2)
2151
          {
2152
20
            if (n2 == n)
2153
            {
2154
10
              nb2 << nm->mkNode(kind::EQUAL, node[0], node[1]);
2155
            }
2156
            else
2157
            {
2158
10
              nb2 << nm->mkNode(kind::EQUAL, emp, n2);
2159
            }
2160
          }
2161
10
          ret = nb2.constructNode();
2162
        }
2163
        else
2164
        {
2165
338
          ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2166
        }
2167
348
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SELF);
2168
      }
2169
    }
2170
  }
2171
2172
  // length entailment
2173
152014
  Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2174
152014
  Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2175
76007
  if (d_arithEntail.check(len_n2, len_n1, true))
2176
  {
2177
    // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false
2178
130
    Node ret = NodeManager::currentNM()->mkConst(false);
2179
65
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ);
2180
  }
2181
2182
  // multi-set reasoning
2183
  //   For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false
2184
  //   since the number of a's in the second argument is greater than the number
2185
  //   of a's in the first argument
2186
75942
  if (d_stringsEntail.checkMultisetSubset(node[0], node[1]))
2187
  {
2188
30
    Node ret = nm->mkConst(false);
2189
15
    return returnRewrite(node, ret, Rewrite::CTN_MSET_NSS);
2190
  }
2191
2192
75927
  if (d_arithEntail.check(len_n2, len_n1, false))
2193
  {
2194
    // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2
2195
80188
    Node ret = node[0].eqNode(node[1]);
2196
40094
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ_NSTRICT);
2197
  }
2198
2199
  // splitting
2200
35833
  if (node[0].getKind() == kind::STRING_CONCAT)
2201
  {
2202
1203
    if (node[1].isConst())
2203
    {
2204
911
      Node t = node[1];
2205
      // Below, we are looking for a constant component of node[0]
2206
      // has no overlap with node[1], which means we can split.
2207
      // Notice that if the first or last components had no
2208
      // overlap, these would have been removed by strip
2209
      // constant endpoints above.
2210
      // Hence, we consider only the inner children.
2211
1124
      for (unsigned i = 1; i < (node[0].getNumChildren() - 1); i++)
2212
      {
2213
        // constant contains
2214
679
        if (node[0][i].isConst())
2215
        {
2216
          // if no overlap, we can split into disjunction
2217
27
          if (Word::noOverlapWith(node[0][i], node[1]))
2218
          {
2219
42
            std::vector<Node> nc0;
2220
21
            utils::getConcat(node[0], nc0);
2221
42
            std::vector<Node> spl[2];
2222
21
            spl[0].insert(spl[0].end(), nc0.begin(), nc0.begin() + i);
2223
21
            Assert(i < nc0.size() - 1);
2224
21
            spl[1].insert(spl[1].end(), nc0.begin() + i + 1, nc0.end());
2225
            Node ret = NodeManager::currentNM()->mkNode(
2226
                kind::OR,
2227
84
                NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS,
2228
42
                                                 utils::mkConcat(spl[0], stype),
2229
                                                 node[1]),
2230
84
                NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS,
2231
42
                                                 utils::mkConcat(spl[1], stype),
2232
105
                                                 node[1]));
2233
21
            return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2234
          }
2235
        }
2236
      }
2237
    }
2238
  }
2239
34630
  else if (node[0].getKind() == kind::STRING_SUBSTR)
2240
  {
2241
    // (str.contains (str.substr x n (str.len y)) y) --->
2242
    //   (= (str.substr x n (str.len y)) y)
2243
    //
2244
    // TODO: Remove with under-/over-approximation
2245
8630
    if (node[0][2] == nm->mkNode(kind::STRING_LENGTH, node[1]))
2246
    {
2247
20
      Node ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2248
10
      return returnRewrite(node, ret, Rewrite::CTN_SUBSTR);
2249
    }
2250
  }
2251
26000
  else if (node[0].getKind() == kind::STRING_REPLACE)
2252
  {
2253
501
    if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst())
2254
    {
2255
272
      if (Word::noOverlapWith(node[1], node[0][1])
2256
272
          && Word::noOverlapWith(node[1], node[0][2]))
2257
      {
2258
        // (str.contains (str.replace x c1 c2) c3) ---> (str.contains x c3)
2259
        // if there is no overlap between c1 and c3 and none between c2 and c3
2260
4
        Node ret = nm->mkNode(STRING_CONTAINS, node[0][0], node[1]);
2261
2
        return returnRewrite(node, ret, Rewrite::CTN_REPL_CNSTS_TO_CTN);
2262
      }
2263
    }
2264
2265
499
    if (node[0][0] == node[0][2])
2266
    {
2267
      // (str.contains (str.replace x y x) y) ---> (str.contains x y)
2268
55
      if (node[0][1] == node[1])
2269
      {
2270
10
        Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0][0], node[1]);
2271
5
        return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN);
2272
      }
2273
2274
      // (str.contains (str.replace x y x) z) ---> (str.contains x z)
2275
      // if (str.len z) <= 1
2276
50
      if (d_stringsEntail.checkLengthOne(node[1]))
2277
      {
2278
12
        Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0][0], node[1]);
2279
6
        return returnRewrite(node, ret, Rewrite::CTN_REPL_LEN_ONE_TO_CTN);
2280
      }
2281
    }
2282
2283
    // (str.contains (str.replace x y z) z) --->
2284
    //   (or (str.contains x y) (str.contains x z))
2285
488
    if (node[0][2] == node[1])
2286
    {
2287
      Node ret =
2288
          nm->mkNode(OR,
2289
100
                     nm->mkNode(STRING_CONTAINS, node[0][0], node[0][1]),
2290
200
                     nm->mkNode(STRING_CONTAINS, node[0][0], node[0][2]));
2291
50
      return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN_DISJ);
2292
    }
2293
2294
    // (str.contains (str.replace x y z) w) --->
2295
    //   (str.contains (str.replace x y "") w)
2296
    // if (str.contains z w) ---> false and (str.len w) = 1
2297
438
    if (d_stringsEntail.checkLengthOne(node[1]))
2298
    {
2299
308
      Node ctn = d_stringsEntail.checkContains(node[0][2], node[1]);
2300
165
      if (!ctn.isNull() && !ctn.getConst<bool>())
2301
      {
2302
44
        Node empty = Word::mkEmptyWord(stype);
2303
        Node ret = nm->mkNode(
2304
            kind::STRING_CONTAINS,
2305
44
            nm->mkNode(kind::STRING_REPLACE, node[0][0], node[0][1], empty),
2306
88
            node[1]);
2307
22
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SIMP_REPL);
2308
      }
2309
    }
2310
  }
2311
2312
35717
  if (node[1].getKind() == kind::STRING_REPLACE)
2313
  {
2314
    // (str.contains x (str.replace y x y)) --->
2315
    //   (str.contains x y)
2316
455
    if (node[0] == node[1][1] && node[1][0] == node[1][2])
2317
    {
2318
8
      Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0], node[1][0]);
2319
4
      return returnRewrite(node, ret, Rewrite::CTN_REPL);
2320
    }
2321
2322
    // (str.contains x (str.replace "" x y)) --->
2323
    //   (= "" (str.replace "" x y))
2324
    //
2325
    // Note: Length-based reasoning is not sufficient to get this rewrite. We
2326
    // can neither show that str.len(str.replace("", x, y)) - str.len(x) >= 0
2327
    // nor str.len(x) - str.len(str.replace("", x, y)) >= 0
2328
900
    Node emp = Word::mkEmptyWord(stype);
2329
451
    if (node[0] == node[1][1] && node[1][0] == emp)
2330
    {
2331
4
      Node ret = nm->mkNode(kind::EQUAL, emp, node[1]);
2332
2
      return returnRewrite(node, ret, Rewrite::CTN_REPL_EMPTY);
2333
    }
2334
  }
2335
2336
35711
  return node;
2337
}
2338
2339
3242
Node SequencesRewriter::rewriteIndexof(Node node)
2340
{
2341
3242
  Assert(node.getKind() == kind::STRING_INDEXOF);
2342
3242
  NodeManager* nm = NodeManager::currentNM();
2343
2344
3242
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() < 0)
2345
  {
2346
    // z<0  implies  str.indexof( x, y, z ) --> -1
2347
356
    Node negone = nm->mkConst(Rational(-1));
2348
178
    return returnRewrite(node, negone, Rewrite::IDOF_NEG);
2349
  }
2350
2351
  // the string type
2352
6128
  TypeNode stype = node[0].getType();
2353
2354
  // evaluation and simple cases
2355
6128
  std::vector<Node> children0;
2356
3064
  utils::getConcat(node[0], children0);
2357
3064
  if (children0[0].isConst() && node[1].isConst() && node[2].isConst())
2358
  {
2359
992
    cvc5::Rational rMaxInt(cvc5::String::maxSize());
2360
831
    if (node[2].getConst<Rational>() > rMaxInt)
2361
    {
2362
      // We know that, due to limitations on the size of string constants
2363
      // in our implementation, that accessing a position greater than
2364
      // rMaxInt is guaranteed to be out of bounds.
2365
      Node negone = nm->mkConst(Rational(-1));
2366
      return returnRewrite(node, negone, Rewrite::IDOF_MAX);
2367
    }
2368
831
    Assert(node[2].getConst<Rational>().sgn() >= 0);
2369
992
    Node s = children0[0];
2370
992
    Node t = node[1];
2371
    uint32_t start =
2372
831
        node[2].getConst<Rational>().getNumerator().toUnsignedInt();
2373
831
    std::size_t ret = Word::find(s, t, start);
2374
831
    if (ret != std::string::npos)
2375
    {
2376
574
      Node retv = nm->mkConst(Rational(static_cast<unsigned>(ret)));
2377
287
      return returnRewrite(node, retv, Rewrite::IDOF_FIND);
2378
    }
2379
544
    else if (children0.size() == 1)
2380
    {
2381
766
      Node negone = nm->mkConst(Rational(-1));
2382
383
      return returnRewrite(node, negone, Rewrite::IDOF_NFIND);
2383
    }
2384
  }
2385
2386
2394
  if (node[0] == node[1])
2387
  {
2388
71
    if (node[2].isConst())
2389
    {
2390
18
      if (node[2].getConst<Rational>().sgn() == 0)
2391
      {
2392
        // indexof( x, x, 0 ) --> 0
2393
36
        Node zero = nm->mkConst(Rational(0));
2394
18
        return returnRewrite(node, zero, Rewrite::IDOF_EQ_CST_START);
2395
      }
2396
    }
2397
53
    if (d_arithEntail.check(node[2], true))
2398
    {
2399
      // y>0  implies  indexof( x, x, y ) --> -1
2400
      Node negone = nm->mkConst(Rational(-1));
2401
      return returnRewrite(node, negone, Rewrite::IDOF_EQ_NSTART);
2402
    }
2403
87
    Node emp = Word::mkEmptyWord(stype);
2404
53
    if (node[0] != emp)
2405
    {
2406
      // indexof( x, x, z ) ---> indexof( "", "", z )
2407
38
      Node ret = nm->mkNode(STRING_INDEXOF, emp, emp, node[2]);
2408
19
      return returnRewrite(node, ret, Rewrite::IDOF_EQ_NORM);
2409
    }
2410
  }
2411
2412
4714
  Node len0 = nm->mkNode(STRING_LENGTH, node[0]);
2413
4714
  Node len1 = nm->mkNode(STRING_LENGTH, node[1]);
2414
4714
  Node len0m2 = nm->mkNode(MINUS, len0, node[2]);
2415
2416
2357
  if (node[1].isConst())
2417
  {
2418
1911
    if (Word::isEmpty(node[1]))
2419
    {
2420
51
      if (d_arithEntail.check(len0, node[2]) && d_arithEntail.check(node[2]))
2421
      {
2422
        // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z
2423
7
        return returnRewrite(node, node[2], Rewrite::IDOF_EMP_IDOF);
2424
      }
2425
    }
2426
  }
2427
2428
2350
  if (d_arithEntail.check(len1, len0m2, true))
2429
  {
2430
    // len(x)-z < len(y)  implies  indexof( x, y, z ) ----> -1
2431
10
    Node negone = nm->mkConst(Rational(-1));
2432
5
    return returnRewrite(node, negone, Rewrite::IDOF_LEN);
2433
  }
2434
2435
4690
  Node fstr = node[0];
2436
2345
  if (!node[2].isConst() || node[2].getConst<Rational>().sgn() != 0)
2437
  {
2438
590
    fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0);
2439
590
    fstr = Rewriter::rewrite(fstr);
2440
  }
2441
2442
4690
  Node cmp_conr = d_stringsEntail.checkContains(fstr, node[1]);
2443
4690
  Trace("strings-rewrite-debug") << "For " << node << ", check contains("
2444
2345
                                 << fstr << ", " << node[1] << ")" << std::endl;
2445
2345
  Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl;
2446
4690
  std::vector<Node> children1;
2447
2345
  utils::getConcat(node[1], children1);
2448
2345
  if (!cmp_conr.isNull())
2449
  {
2450
273
    if (cmp_conr.getConst<bool>())
2451
    {
2452
270
      if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2453
      {
2454
        // past the first position in node[0] that contains node[1], we can drop
2455
347
        std::vector<Node> nb;
2456
347
        std::vector<Node> ne;
2457
213
        int cc = d_stringsEntail.componentContains(
2458
213
            children0, children1, nb, ne, true, 1);
2459
213
        if (cc != -1 && !ne.empty())
2460
        {
2461
          // For example:
2462
          // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0)
2463
122
          Node nn = utils::mkConcat(children0, stype);
2464
122
          Node ret = nm->mkNode(kind::STRING_INDEXOF, nn, node[1], node[2]);
2465
61
          return returnRewrite(node, ret, Rewrite::IDOF_DEF_CTN);
2466
        }
2467
2468
        // Strip components from the beginning that are guaranteed not to match
2469
152
        if (d_stringsEntail.stripConstantEndpoints(
2470
                children0, children1, nb, ne, 1))
2471
        {
2472
          // str.indexof(str.++("AB", x, "C"), "C", 0) --->
2473
          // 2 + str.indexof(str.++(x, "C"), "C", 0)
2474
          Node ret = nm->mkNode(
2475
              kind::PLUS,
2476
36
              nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(nb, stype)),
2477
72
              nm->mkNode(kind::STRING_INDEXOF,
2478
36
                         utils::mkConcat(children0, stype),
2479
                         node[1],
2480
72
                         node[2]));
2481
18
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_CNST_ENDPTS);
2482
        }
2483
      }
2484
      // To show that the first argument definitely contains the second, the
2485
      // index must be a valid index in the first argument. This ensures that
2486
      // (str.indexof t "" n) is not rewritten to something other than -1 when n
2487
      // is beyond the length of t. This is not required for the above rewrites,
2488
      // which only apply when n=0.
2489
191
      if (d_arithEntail.check(node[2]) && d_arithEntail.check(len0, node[2]))
2490
      {
2491
        // strip symbolic length
2492
281
        Node new_len = node[2];
2493
281
        std::vector<Node> nr;
2494
147
        if (d_stringsEntail.stripSymbolicLength(children0, nr, 1, new_len))
2495
        {
2496
          // For example:
2497
          // z>=0 and z>str.len( x1 ) and str.contains( x2, y )-->true
2498
          // implies
2499
          // str.indexof( str.++( x1, x2 ), y, z ) --->
2500
          // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) )
2501
26
          Node nn = utils::mkConcat(children0, stype);
2502
          Node ret =
2503
              nm->mkNode(PLUS,
2504
26
                         nm->mkNode(MINUS, node[2], new_len),
2505
52
                         nm->mkNode(STRING_INDEXOF, nn, node[1], new_len));
2506
13
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_SYM_LEN);
2507
        }
2508
      }
2509
    }
2510
    else
2511
    {
2512
      // str.contains( x, y ) --> false  implies  str.indexof(x,y,z) --> -1
2513
6
      Node negone = nm->mkConst(Rational(-1));
2514
3
      return returnRewrite(node, negone, Rewrite::IDOF_NCTN);
2515
    }
2516
  }
2517
  else
2518
  {
2519
4118
    Node new_len = node[2];
2520
4118
    std::vector<Node> nr;
2521
2072
    if (d_stringsEntail.stripSymbolicLength(children0, nr, 1, new_len))
2522
    {
2523
      // Normalize the string before the start index.
2524
      //
2525
      // For example:
2526
      // str.indexof(str.++("ABCD", x), y, 3) --->
2527
      // str.indexof(str.++("AAAD", x), y, 3)
2528
126
      Node nodeNr = utils::mkConcat(nr, stype);
2529
126
      Node normNr = lengthPreserveRewrite(nodeNr);
2530
76
      if (normNr != nodeNr)
2531
      {
2532
52
        std::vector<Node> normNrChildren;
2533
26
        utils::getConcat(normNr, normNrChildren);
2534
52
        std::vector<Node> children(normNrChildren);
2535
26
        children.insert(children.end(), children0.begin(), children0.end());
2536
52
        Node nn = utils::mkConcat(children, stype);
2537
52
        Node res = nm->mkNode(kind::STRING_INDEXOF, nn, node[1], node[2]);
2538
26
        return returnRewrite(node, res, Rewrite::IDOF_NORM_PREFIX);
2539
      }
2540
    }
2541
  }
2542
2543
2224
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2544
  {
2545
3342
    std::vector<Node> cb;
2546
3342
    std::vector<Node> ce;
2547
1676
    if (d_stringsEntail.stripConstantEndpoints(
2548
            children0, children1, cb, ce, -1))
2549
    {
2550
20
      Node ret = utils::mkConcat(children0, stype);
2551
10
      ret = nm->mkNode(STRING_INDEXOF, ret, node[1], node[2]);
2552
      // For example:
2553
      // str.indexof( str.++( x, "A" ), "B", 0 ) ---> str.indexof( x, "B", 0 )
2554
10
      return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2555
    }
2556
  }
2557
2558
2214
  return node;
2559
}
2560
2561
504
Node SequencesRewriter::rewriteIndexofRe(Node node)
2562
{
2563
504
  Assert(node.getKind() == STRING_INDEXOF_RE);
2564
504
  NodeManager* nm = NodeManager::currentNM();
2565
1008
  Node s = node[0];
2566
1008
  Node r = node[1];
2567
1008
  Node n = node[2];
2568
1008
  Node zero = nm->mkConst(Rational(0));
2569
1008
  Node slen = nm->mkNode(STRING_LENGTH, s);
2570
2571
504
  if (d_arithEntail.check(zero, n, true) || d_arithEntail.check(n, slen, true))
2572
  {
2573
24
    Node ret = nm->mkConst(Rational(-1));
2574
12
    return returnRewrite(node, ret, Rewrite::INDEXOF_RE_INVALID_INDEX);
2575
  }
2576
2577
492
  if (RegExpEntail::isConstRegExp(r))
2578
  {
2579
372
    if (s.isConst() && n.isConst())
2580
    {
2581
166
      Rational nrat = n.getConst<Rational>();
2582
166
      cvc5::Rational rMaxInt(cvc5::String::maxSize());
2583
83
      if (nrat > rMaxInt)
2584
      {
2585
        // We know that, due to limitations on the size of string constants
2586
        // in our implementation, that accessing a position greater than
2587
        // rMaxInt is guaranteed to be out of bounds.
2588
        Node negone = nm->mkConst(Rational(-1));
2589
        return returnRewrite(node, negone, Rewrite::INDEXOF_RE_MAX_INDEX);
2590
      }
2591
2592
83
      uint32_t start = nrat.getNumerator().toUnsignedInt();
2593
166
      Node rem = nm->mkConst(s.getConst<String>().substr(start));
2594
83
      std::pair<size_t, size_t> match = firstMatch(rem, r);
2595
      Node ret = nm->mkConst(
2596
216
          Rational(match.first == string::npos
2597
                       ? -1
2598
216
                       : static_cast<int64_t>(start + match.first)));
2599
83
      return returnRewrite(node, ret, Rewrite::INDEXOF_RE_EVAL);
2600
    }
2601
2602
289
    if (d_arithEntail.check(n, zero) && d_arithEntail.check(slen, n))
2603
    {
2604
455
      String emptyStr("");
2605
229
      if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, r))
2606
      {
2607
3
        return returnRewrite(node, n, Rewrite::INDEXOF_RE_EMP_RE);
2608
      }
2609
    }
2610
  }
2611
406
  return node;
2612
}
2613
2614
2664
Node SequencesRewriter::rewriteReplace(Node node)
2615
{
2616
2664
  Assert(node.getKind() == kind::STRING_REPLACE);
2617
2664
  NodeManager* nm = NodeManager::currentNM();
2618
2619
2664
  if (node[1].isConst() && Word::isEmpty(node[1]))
2620
  {
2621
698
    Node ret = nm->mkNode(STRING_CONCAT, node[2], node[0]);
2622
349
    return returnRewrite(node, ret, Rewrite::RPL_RPL_EMPTY);
2623
  }
2624
  // the string type
2625
4630
  TypeNode stype = node.getType();
2626
2627
4630
  std::vector<Node> children0;
2628
2315
  utils::getConcat(node[0], children0);
2629
2630
2315
  if (node[1].isConst() && children0[0].isConst())
2631
  {
2632
732
    Node s = children0[0];
2633
732
    Node t = node[1];
2634
728
    std::size_t p = Word::find(s, t);
2635
728
    if (p == std::string::npos)
2636
    {
2637
525
      if (children0.size() == 1)
2638
      {
2639
521
        return returnRewrite(node, node[0], Rewrite::RPL_CONST_NFIND);
2640
      }
2641
    }
2642
    else
2643
    {
2644
406
      Node s1 = Word::substr(s, 0, p);
2645
406
      Node s3 = Word::substr(s, p + Word::getLength(t));
2646
406
      std::vector<Node> children;
2647
203
      if (!Word::isEmpty(s1))
2648
      {
2649
39
        children.push_back(s1);
2650
      }
2651
203
      children.push_back(node[2]);
2652
203
      if (!Word::isEmpty(s3))
2653
      {
2654
73
        children.push_back(s3);
2655
      }
2656
203
      children.insert(children.end(), children0.begin() + 1, children0.end());
2657
406
      Node ret = utils::mkConcat(children, stype);
2658
203
      return returnRewrite(node, ret, Rewrite::RPL_CONST_FIND);
2659
    }
2660
  }
2661
2662
  // rewrites that apply to both replace and replaceall
2663
3182
  Node rri = rewriteReplaceInternal(node);
2664
1591
  if (!rri.isNull())
2665
  {
2666
    // printing of the rewrite managed by the call above
2667
97
    return rri;
2668
  }
2669
2670
1494
  if (node[0] == node[2])
2671
  {
2672
    // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x
2673
412
    Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2674
412
    Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2675
253
    if (d_arithEntail.check(l1, l0))
2676
    {
2677
92
      return returnRewrite(node, node[0], Rewrite::RPL_RPL_LEN_ID);
2678
    }
2679
2680
    // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x)
2681
    // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "")
2682
161
    if (d_stringsEntail.checkLengthOne(node[0]))
2683
    {
2684
90
      Node empty = Word::mkEmptyWord(stype);
2685
      Node rn1 = Rewriter::rewrite(
2686
90
          rewriteEqualityExt(nm->mkNode(EQUAL, node[1], empty)));
2687
46
      if (rn1 != node[1])
2688
      {
2689
90
        std::vector<Node> emptyNodes;
2690
        bool allEmptyEqs;
2691
46
        std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(rn1);
2692
2693
46
        if (allEmptyEqs)
2694
        {
2695
62
          Node nn1 = utils::mkConcat(emptyNodes, stype);
2696
32
          if (node[1] != nn1)
2697
          {
2698
4
            Node ret = nm->mkNode(STRING_REPLACE, node[0], nn1, node[2]);
2699
2
            return returnRewrite(node, ret, Rewrite::RPL_X_Y_X_SIMP);
2700
          }
2701
        }
2702
      }
2703
    }
2704
  }
2705
2706
2800
  std::vector<Node> children1;
2707
1400
  utils::getConcat(node[1], children1);
2708
2709
  // check if contains definitely does (or does not) hold
2710
2800
  Node cmp_con = nm->mkNode(kind::STRING_CONTAINS, node[0], node[1]);
2711
2800
  Node cmp_conr = Rewriter::rewrite(cmp_con);
2712
1400
  if (cmp_conr.isConst())
2713
  {
2714
233
    if (cmp_conr.getConst<bool>())
2715
    {
2716
      // component-wise containment
2717
382
      std::vector<Node> cb;
2718
382
      std::vector<Node> ce;
2719
226
      int cc = d_stringsEntail.componentContains(
2720
226
          children0, children1, cb, ce, true, 1);
2721
226
      if (cc != -1)
2722
      {
2723
178
        if (cc == 0 && children0[0] == children1[0])
2724
        {
2725
          // definitely a prefix, can do the replace
2726
          // for example,
2727
          //   str.replace( str.++( x, "ab" ), str.++( x, "a" ), y )  --->
2728
          //   str.++( y, "b" )
2729
62
          std::vector<Node> cres;
2730
31
          cres.push_back(node[2]);
2731
31
          cres.insert(cres.end(), ce.begin(), ce.end());
2732
62
          Node ret = utils::mkConcat(cres, stype);
2733
31
          return returnRewrite(node, ret, Rewrite::RPL_CCTN_RPL);
2734
        }
2735
147
        else if (!ce.empty())
2736
        {
2737
          // we can pull remainder past first definite containment
2738
          // for example,
2739
          //   str.replace( str.++( x, "ab" ), "a", y ) --->
2740
          //   str.++( str.replace( str.++( x, "a" ), "a", y ), "b" )
2741
          // this is independent of whether the second argument may be empty
2742
78
          std::vector<Node> scc;
2743
117
          scc.push_back(NodeManager::currentNM()->mkNode(
2744
              kind::STRING_REPLACE,
2745
78
              utils::mkConcat(children0, stype),
2746
              node[1],
2747
              node[2]));
2748
39
          scc.insert(scc.end(), ce.begin(), ce.end());
2749
78
          Node ret = utils::mkConcat(scc, stype);
2750
39
          return returnRewrite(node, ret, Rewrite::RPL_CCTN);
2751
        }
2752
      }
2753
    }
2754
    else
2755
    {
2756
      // ~contains( t, s ) => ( replace( t, s, r ) ----> t )
2757
7
      return returnRewrite(node, node[0], Rewrite::RPL_NCTN);
2758
    }
2759
  }
2760
1167
  else if (cmp_conr.getKind() == kind::EQUAL || cmp_conr.getKind() == kind::AND)
2761
  {
2762
    // Rewriting the str.contains may return equalities of the form (= x "").
2763
    // In that case, we can substitute the variables appearing in those
2764
    // equalities with the empty string in the third argument of the
2765
    // str.replace. For example:
2766
    //
2767
    // (str.replace x (str.++ x y) y) --> (str.replace x (str.++ x y) "")
2768
    //
2769
    // This can be done because str.replace changes x iff (str.++ x y) is in x
2770
    // but that means that y must be empty in that case. Thus, we can
2771
    // substitute y with "" in the third argument. Note that the third argument
2772
    // does not matter when the str.replace does not apply.
2773
    //
2774
534
    Node empty = Word::mkEmptyWord(stype);
2775
2776
534
    std::vector<Node> emptyNodes;
2777
    bool allEmptyEqs;
2778
280
    std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(cmp_conr);
2779
2780
280
    if (emptyNodes.size() > 0)
2781
    {
2782
      // Perform the substitutions
2783
294
      std::vector<TNode> substs(emptyNodes.size(), TNode(empty));
2784
      Node nn2 = node[2].substitute(
2785
294
          emptyNodes.begin(), emptyNodes.end(), substs.begin(), substs.end());
2786
2787
      // If the contains rewrites to a conjunction of empty-string equalities
2788
      // and we are doing the replacement in an empty string, we can rewrite
2789
      // the string-to-replace with a concatenation of all the terms that must
2790
      // be empty:
2791
      //
2792
      // (str.replace "" y z) ---> (str.replace "" (str.++ y1 ... yn)  z)
2793
      // if (str.contains "" y) ---> (and (= y1 "") ... (= yn ""))
2794
160
      if (node[0] == empty && allEmptyEqs)
2795
      {
2796
148
        std::vector<Node> emptyNodesList(emptyNodes.begin(), emptyNodes.end());
2797
148
        Node nn1 = utils::mkConcat(emptyNodesList, stype);
2798
78
        if (nn1 != node[1] || nn2 != node[2])
2799
        {
2800
16
          Node res = nm->mkNode(kind::STRING_REPLACE, node[0], nn1, nn2);
2801
8
          return returnRewrite(node, res, Rewrite::RPL_EMP_CNTS_SUBSTS);
2802
        }
2803
      }
2804
2805
152
      if (nn2 != node[2])
2806
      {
2807
36
        Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1], nn2);
2808
18
        return returnRewrite(node, res, Rewrite::RPL_CNTS_SUBSTS);
2809
      }
2810
    }
2811
  }
2812
2813
1297
  if (cmp_conr != cmp_con)
2814
  {
2815
475
    if (d_stringsEntail.checkNonEmpty(node[1]))
2816
    {
2817
      // pull endpoints that can be stripped
2818
      // for example,
2819
      //   str.replace( str.++( "b", x, "b" ), "a", y ) --->
2820
      //   str.++( "b", str.replace( x, "a", y ), "b" )
2821
577
      std::vector<Node> cb;
2822
577
      std::vector<Node> ce;
2823
293
      if (d_stringsEntail.stripConstantEndpoints(children0, children1, cb, ce))
2824
      {
2825
18
        std::vector<Node> cc;
2826
9
        cc.insert(cc.end(), cb.begin(), cb.end());
2827
9
        cc.push_back(
2828
36
            NodeManager::currentNM()->mkNode(kind::STRING_REPLACE,
2829
18
                                             utils::mkConcat(children0, stype),
2830
                                             node[1],
2831
                                             node[2]));
2832
9
        cc.insert(cc.end(), ce.begin(), ce.end());
2833
18
        Node ret = utils::mkConcat(cc, stype);
2834
9
        return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2835
      }
2836
    }
2837
  }
2838
2839
1288
  children1.clear();
2840
1288
  utils::getConcat(node[1], children1);
2841
2576
  Node lastChild1 = children1[children1.size() - 1];
2842
1288
  if (lastChild1.getKind() == kind::STRING_SUBSTR)
2843
  {
2844
    // (str.replace x (str.++ t (str.substr y i j)) z) --->
2845
    // (str.replace x (str.++ t
2846
    //                  (str.substr y i (+ (str.len x) 1 (- (str.len t))))) z)
2847
    // if j > len(x)
2848
    //
2849
    // Reasoning: If the string to be replaced is longer than x, then it does
2850
    // not matter how much longer it is, the result is always x. Thus, it is
2851
    // fine to only look at the prefix of length len(x) + 1 - len(t).
2852
2853
78
    children1.pop_back();
2854
    // Length of the non-substr components in the second argument
2855
    Node partLen1 =
2856
148
        nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(children1, stype));
2857
148
    Node maxLen1 = nm->mkNode(kind::PLUS, partLen1, lastChild1[2]);
2858
2859
148
    Node zero = nm->mkConst(Rational(0));
2860
148
    Node one = nm->mkConst(Rational(1));
2861
148
    Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]);
2862
148
    Node len0_1 = nm->mkNode(kind::PLUS, len0, one);
2863
    // Check len(t) + j > len(x) + 1
2864
78
    if (d_arithEntail.check(maxLen1, len0_1, true))
2865
    {
2866
8
      children1.push_back(nm->mkNode(
2867
          kind::STRING_SUBSTR,
2868
          lastChild1[0],
2869
          lastChild1[1],
2870
16
          nm->mkNode(
2871
16
              kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1))));
2872
      Node res = nm->mkNode(kind::STRING_REPLACE,
2873
                            node[0],
2874
16
                            utils::mkConcat(children1, stype),
2875
32
                            node[2]);
2876
8
      return returnRewrite(node, res, Rewrite::REPL_SUBST_IDX);
2877
    }
2878
  }
2879
2880
1280
  if (node[0].getKind() == STRING_REPLACE)
2881
  {
2882
174
    Node x = node[0];
2883
174
    Node y = node[1];
2884
174
    Node z = node[2];
2885
90
    if (x[0] == x[2] && x[0] == y)
2886
    {
2887
      // (str.replace (str.replace y w y) y z) -->
2888
      //   (str.replace (str.replace y w z) y z)
2889
      // if (str.len w) >= (str.len z) and w != z
2890
      //
2891
      // Reasoning: There are two cases: (1) w does not appear in y and (2) w
2892
      // does appear in y.
2893
      //
2894
      // Case (1): In this case, the reasoning is trivial. The
2895
      // inner replace does not do anything, so we can just replace its third
2896
      // argument with any string.
2897
      //
2898
      // Case (2): After the inner replace, we are guaranteed to have a string
2899
      // that contains y at the index of w in the original string y. The outer
2900
      // replace then replaces that y with z, so we can short-circuit that
2901
      // replace by directly replacing w with z in the inner replace. We can
2902
      // only do that if the result of the new inner replace does not contain
2903
      // y, otherwise we end up doing two replaces that are different from the
2904
      // original expression. We enforce that by requiring that the length of w
2905
      // has to be greater or equal to the length of z and that w and z have to
2906
      // be different. This makes sure that an inner replace changes a string
2907
      // to a string that is shorter than y, making it impossible for the outer
2908
      // replace to match.
2909
66
      Node w = x[1];
2910
2911
      // (str.len w) >= (str.len z)
2912
66
      Node wlen = nm->mkNode(kind::STRING_LENGTH, w);
2913
66
      Node zlen = nm->mkNode(kind::STRING_LENGTH, z);
2914
36
      if (d_arithEntail.check(wlen, zlen))
2915
      {
2916
        // w != z
2917
34
        Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z));
2918
20
        if (wEqZ.isConst() && !wEqZ.getConst<bool>())
2919
        {
2920
          Node ret = nm->mkNode(kind::STRING_REPLACE,
2921
12
                                nm->mkNode(kind::STRING_REPLACE, y, w, z),
2922
                                y,
2923
24
                                z);
2924
6
          return returnRewrite(node, ret, Rewrite::REPL_REPL_SHORT_CIRCUIT);
2925
        }
2926
      }
2927
    }
2928
  }
2929
2930
1274
  if (node[1].getKind() == STRING_REPLACE)
2931
  {
2932
90
    if (node[1][0] == node[0])
2933
    {
2934
14
      if (node[1][0] == node[1][2] && node[1][0] == node[2])
2935
      {
2936
        // str.replace( x, str.replace( x, y, x ), x ) ---> x
2937
        return returnRewrite(node, node[0], Rewrite::REPL_REPL2_INV_ID);
2938
      }
2939
14
      bool dualReplIteSuccess = false;
2940
26
      Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2941
14
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2942
      {
2943
        // str.contains( x, z ) ---> false
2944
        //   implies
2945
        // str.replace( x, str.replace( x, y, z ), w ) --->
2946
        // ite( str.contains( x, y ), x, w )
2947
        dualReplIteSuccess = true;
2948
      }
2949
      else
2950
      {
2951
        // str.contains( y, z ) ---> false and str.contains( z, y ) ---> false
2952
        //   implies
2953
        // str.replace( x, str.replace( x, y, z ), w ) --->
2954
        // ite( str.contains( x, y ), x, w )
2955
14
        cmp_con2 = d_stringsEntail.checkContains(node[1][1], node[1][2]);
2956
14
        if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2957
        {
2958
4
          cmp_con2 = d_stringsEntail.checkContains(node[1][2], node[1][1]);
2959
4
          if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2960
          {
2961
2
            dualReplIteSuccess = true;
2962
          }
2963
        }
2964
      }
2965
14
      if (dualReplIteSuccess)
2966
      {
2967
        Node res = nm->mkNode(ITE,
2968
4
                              nm->mkNode(STRING_CONTAINS, node[0], node[1][1]),
2969
                              node[0],
2970
8
                              node[2]);
2971
2
        return returnRewrite(node, res, Rewrite::REPL_DUAL_REPL_ITE);
2972
      }
2973
    }
2974
2975
88
    bool invSuccess = false;
2976
88
    if (node[1][1] == node[0])
2977
    {
2978
26
      if (node[1][0] == node[1][2])
2979
      {
2980
        // str.replace(x, str.replace(y, x, y), w) ---> str.replace(x, y, w)
2981
        invSuccess = true;
2982
      }
2983
26
      else if (node[1][1] == node[2] || node[1][0] == node[2])
2984
      {
2985
        // str.contains(y, z) ----> false and ( y == w or x == w ) implies
2986
        //   implies
2987
        // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w)
2988
16
        Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2989
8
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2990
      }
2991
    }
2992
    else
2993
    {
2994
      // str.contains(x, z) ----> false and str.contains(x, w) ----> false
2995
      //   implies
2996
      // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u)
2997
124
      Node cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][1]);
2998
62
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2999
      {
3000
2
        cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][2]);
3001
2
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
3002
      }
3003
    }
3004
88
    if (invSuccess)
3005
    {
3006
      Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1][0], node[2]);
3007
      return returnRewrite(node, res, Rewrite::REPL_REPL2_INV);
3008
    }
3009
  }
3010
1272
  if (node[2].getKind() == STRING_REPLACE)
3011
  {
3012
30
    if (node[2][1] == node[0])
3013
    {
3014
      // str.contains( z, w ) ----> false implies
3015
      // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z )
3016
24
      Node cmp_con2 = d_stringsEntail.checkContains(node[2][0], node[1]);
3017
12
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
3018
      {
3019
        Node res =
3020
            nm->mkNode(kind::STRING_REPLACE, node[0], node[1], node[2][0]);
3021
        return returnRewrite(node, res, Rewrite::REPL_REPL3_INV);
3022
      }
3023
    }
3024
30
    if (node[2][0] == node[1])
3025
    {
3026
4
      bool success = false;
3027
4
      if (node[2][0] == node[2][2] && node[2][1] == node[0])
3028
      {
3029
        // str.replace( x, y, str.replace( y, x, y ) ) ---> x
3030
        success = true;
3031
      }
3032
      else
3033
      {
3034
        // str.contains( x, z ) ----> false implies
3035
        // str.replace( x, y, str.replace( y, z, w ) ) ---> x
3036
4
        cmp_con = d_stringsEntail.checkContains(node[0], node[2][1]);
3037
4
        success = !cmp_con.isNull() && !cmp_con.getConst<bool>();
3038
      }
3039
4
      if (success)
3040
      {
3041
        return returnRewrite(node, node[0], Rewrite::REPL_REPL3_INV_ID);
3042
      }
3043
    }
3044
  }
3045
  // miniscope based on components that do not contribute to contains
3046
  // for example,
3047
  //   str.replace( x ++ y ++ x ++ y, "A", z ) -->
3048
  //   str.replace( x ++ y, "A", z ) ++ x ++ y
3049
  // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y.
3050
1272
  if (d_stringsEntail.checkLengthOne(node[1]))
3051
  {
3052
918
    Node lastLhs;
3053
460
    unsigned lastCheckIndex = 0;
3054
463
    for (unsigned i = 1, iend = children0.size(); i < iend; i++)
3055
    {
3056
66
      unsigned checkIndex = children0.size() - i;
3057
69
      std::vector<Node> checkLhs;
3058
66
      checkLhs.insert(
3059
132
          checkLhs.end(), children0.begin(), children0.begin() + checkIndex);
3060
69
      Node lhs = utils::mkConcat(checkLhs, stype);
3061
69
      Node rhs = children0[checkIndex];
3062
69
      Node ctn = d_stringsEntail.checkContains(lhs, rhs);
3063
66
      if (!ctn.isNull() && ctn.getConst<bool>())
3064
      {
3065
3
        lastLhs = lhs;
3066
3
        lastCheckIndex = checkIndex;
3067
      }
3068
      else
3069
      {
3070
63
        break;
3071
      }
3072
    }
3073
460
    if (!lastLhs.isNull())
3074
    {
3075
4
      std::vector<Node> remc(children0.begin() + lastCheckIndex,
3076
8
                             children0.end());
3077
4
      Node rem = utils::mkConcat(remc, stype);
3078
      Node ret =
3079
          nm->mkNode(STRING_CONCAT,
3080
4
                     nm->mkNode(STRING_REPLACE, lastLhs, node[1], node[2]),
3081
8
                     rem);
3082
      // for example:
3083
      //   str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x
3084
      // Since we know that the first occurrence of "A" cannot be in the
3085
      // second occurrence of x. Notice this is specific to single characters
3086
      // due to complications with finds that span multiple components for
3087
      // non-characters.
3088
2
      return returnRewrite(node, ret, Rewrite::REPL_CHAR_NCONTRIB_FIND);
3089
    }
3090
  }
3091
3092
  // TODO (#1180) incorporate these?
3093
  // contains( t, s ) =>
3094
  //   replace( replace( x, t, s ), s, r ) ----> replace( x, t, r )
3095
  // contains( t, s ) =>
3096
  //   contains( replace( t, s, r ), r ) ----> true
3097
3098
1270
  return node;
3099
}
3100
3101
202
Node SequencesRewriter::rewriteReplaceAll(Node node)
3102
{
3103
202
  Assert(node.getKind() == STRING_REPLACE_ALL);
3104
3105
404
  TypeNode stype = node.getType();
3106
3107
202
  if (node[0].isConst() && node[1].isConst())
3108
  {
3109
80
    std::vector<Node> children;
3110
80
    Node s = node[0];
3111
80
    Node t = node[1];
3112
40
    if (Word::isEmpty(s) || Word::isEmpty(t))
3113
    {
3114
10
      return returnRewrite(node, node[0], Rewrite::REPLALL_EMPTY_FIND);
3115
    }
3116
30
    std::size_t sizeS = Word::getLength(s);
3117
30
    std::size_t sizeT = Word::getLength(t);
3118
30
    std::size_t index = 0;
3119
30
    std::size_t curr = 0;
3120
65
    do
3121
    {
3122
95
      curr = Word::find(s, t, index);
3123
95
      if (curr != std::string::npos)
3124
      {
3125
65
        if (curr > index)
3126
        {
3127
32
          children.push_back(Word::substr(s, index, curr - index));
3128
        }
3129
65
        children.push_back(node[2]);
3130
65
        index = curr + sizeT;
3131
      }
3132
      else
3133
      {
3134
30
        children.push_back(Word::substr(s, index, sizeS - index));
3135
      }
3136
95
    } while (curr != std::string::npos && curr < sizeS);
3137
    // constant evaluation
3138
60
    Node res = utils::mkConcat(children, stype);
3139
30
    return returnRewrite(node, res, Rewrite::REPLALL_CONST);
3140
  }
3141
3142
  // rewrites that apply to both replace and replaceall
3143
324
  Node rri = rewriteReplaceInternal(node);
3144
162
  if (!rri.isNull())
3145
  {
3146
    // printing of the rewrite managed by the call above
3147
4
    return rri;
3148
  }
3149
3150
158
  return node;
3151
}
3152
3153
1753
Node SequencesRewriter::rewriteReplaceInternal(Node node)
3154
{
3155
1753
  Kind nk = node.getKind();
3156
1753
  Assert(nk == STRING_REPLACE || nk == STRING_REPLACE_ALL);
3157
3158
1753
  if (node[1] == node[2])
3159
  {
3160
48
    return returnRewrite(node, node[0], Rewrite::RPL_ID);
3161
  }
3162
3163
1705
  if (node[0] == node[1])
3164
  {
3165
    // only holds for replaceall if non-empty
3166
77
    if (nk == STRING_REPLACE || d_stringsEntail.checkNonEmpty(node[1]))
3167
    {
3168
53
      return returnRewrite(node, node[2], Rewrite::RPL_REPLACE);
3169
    }
3170
  }
3171
3172
1652
  return Node::null();
3173
}
3174
3175
283
Node SequencesRewriter::rewriteReplaceRe(Node node)
3176
{
3177
283
  Assert(node.getKind() == STRING_REPLACE_RE);
3178
283
  NodeManager* nm = NodeManager::currentNM();
3179
566
  Node x = node[0];
3180
566
  Node y = node[1];
3181
566
  Node z = node[2];
3182
3183
283
  if (RegExpEntail::isConstRegExp(y))
3184
  {
3185
209
    if (x.isConst())
3186
    {
3187
      // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z"
3188
55
      std::pair<size_t, size_t> match = firstMatch(x, y);
3189
55
      if (match.first != string::npos)
3190
      {
3191
58
        String s = x.getConst<String>();
3192
        Node ret = nm->mkNode(STRING_CONCAT,
3193
58
                              nm->mkConst(s.substr(0, match.first)),
3194
                              z,
3195
116
                              nm->mkConst(s.substr(match.second)));
3196
29
        return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL);
3197
      }
3198
      else
3199
      {
3200
26
        return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL);
3201
      }
3202
    }
3203
    // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true
3204
306
    String emptyStr("");
3205
154
    if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y))
3206
    {
3207
4
      Node ret = nm->mkNode(STRING_CONCAT, z, x);
3208
2
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE);
3209
    }
3210
  }
3211
226
  return node;
3212
}
3213
3214
186
Node SequencesRewriter::rewriteReplaceReAll(Node node)
3215
{
3216
186
  Assert(node.getKind() == STRING_REPLACE_RE_ALL);
3217
186
  NodeManager* nm = NodeManager::currentNM();
3218
372
  Node x = node[0];
3219
372
  Node y = node[1];
3220
372
  Node z = node[2];
3221
3222
186
  if (RegExpEntail::isConstRegExp(y))
3223
  {
3224
158
    if (x.isConst())
3225
    {
3226
      // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) --->
3227
      //   "Z" ++ y ++ "Z" ++ y
3228
100
      TypeNode t = x.getType();
3229
100
      Node emp = Word::mkEmptyWord(t);
3230
      Node yp = Rewriter::rewrite(
3231
100
          nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)));
3232
100
      std::vector<Node> res;
3233
100
      String rem = x.getConst<String>();
3234
50
      std::pair<size_t, size_t> match(0, 0);
3235
124
      while (rem.size() != 0)
3236
      {
3237
46
        match = firstMatch(nm->mkConst(rem), yp);
3238
46
        if (match.first == string::npos)
3239
        {
3240
9
          break;
3241
        }
3242
37
        res.push_back(nm->mkConst(rem.substr(0, match.first)));
3243
37
        res.push_back(z);
3244
37
        rem = rem.substr(match.second);
3245
      }
3246
50
      res.push_back(nm->mkConst(rem));
3247
100
      Node ret = utils::mkConcat(res, t);
3248
50
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL);
3249
    }
3250
  }
3251
3252
136
  return node;
3253
}
3254
3255
184
std::pair<size_t, size_t> SequencesRewriter::firstMatch(Node n, Node r)
3256
{
3257
184
  Assert(n.isConst() && n.getType().isStringLike());
3258
184
  Assert(r.getType().isRegExp());
3259
184
  NodeManager* nm = NodeManager::currentNM();
3260
3261
368
  Node sigmaStar = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
3262
368
  Node re = nm->mkNode(REGEXP_CONCAT, r, sigmaStar);
3263
368
  String s = n.getConst<String>();
3264
3265
184
  if (s.size() == 0)
3266
  {
3267
34
    if (RegExpEntail::testConstStringInRegExp(s, 0, r))
3268
    {
3269
3
      return std::make_pair(0, 0);
3270
    }
3271
    else
3272
    {
3273
31
      return std::make_pair(string::npos, string::npos);
3274
    }
3275
  }
3276
3277
237
  for (size_t i = 0, size = s.size(); i < size; i++)
3278
  {
3279
200
    if (RegExpEntail::testConstStringInRegExp(s, i, re))
3280
    {
3281
457
      for (size_t j = i; j <= size; j++)
3282
      {
3283
801
        String substr = s.substr(i, j - i);
3284
457
        if (RegExpEntail::testConstStringInRegExp(substr, 0, r))
3285
        {
3286
113
          return std::make_pair(i, j);
3287
        }
3288
      }
3289
    }
3290
  }
3291
3292
37
  return std::make_pair(string::npos, string::npos);
3293
}
3294
3295
176
Node SequencesRewriter::rewriteStrReverse(Node node)
3296
{
3297
176
  Assert(node.getKind() == STRING_REV);
3298
176
  NodeManager* nm = NodeManager::currentNM();
3299
352
  Node x = node[0];
3300
176
  if (x.isConst())
3301
  {
3302
    // reverse the characters in the constant
3303
92
    Node retNode = Word::reverse(x);
3304
46
    return returnRewrite(node, retNode, Rewrite::STR_CONV_CONST);
3305
  }
3306
130
  else if (x.getKind() == STRING_CONCAT)
3307
  {
3308
54
    std::vector<Node> children;
3309
83
    for (const Node& nc : x)
3310
    {
3311
56
      children.push_back(nm->mkNode(STRING_REV, nc));
3312
    }
3313
27
    std::reverse(children.begin(), children.end());
3314
    // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 )
3315
54
    Node retNode = nm->mkNode(STRING_CONCAT, children);
3316
27
    return returnRewrite(node, retNode, Rewrite::STR_REV_MINSCOPE_CONCAT);
3317
  }
3318
103
  else if (x.getKind() == STRING_REV)
3319
  {
3320
    // rev( rev( x ) ) --> x
3321
6
    Node retNode = x[0];
3322
3
    return returnRewrite(node, retNode, Rewrite::STR_REV_IDEM);
3323
  }
3324
100
  return node;
3325
}
3326
3327
98
Node SequencesRewriter::rewritePrefixSuffix(Node n)
3328
{
3329
98
  Assert(n.getKind() == kind::STRING_PREFIX
3330
         || n.getKind() == kind::STRING_SUFFIX);
3331
98
  bool isPrefix = n.getKind() == kind::STRING_PREFIX;
3332
98
  if (n[0] == n[1])
3333
  {
3334
10
    Node ret = NodeManager::currentNM()->mkConst(true);
3335
5
    return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EQ);
3336
  }
3337
93
  if (n[0].isConst())
3338
  {
3339
40
    if (Word::isEmpty(n[0]))
3340
    {
3341
2
      Node ret = NodeManager::currentNM()->mkConst(true);
3342
1
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY_CONST);
3343
    }
3344
  }
3345
92
  if (n[1].isConst())
3346
  {
3347
24
    Node s = n[1];
3348
24
    size_t lenS = Word::getLength(s);
3349
24
    if (n[0].isConst())
3350
    {
3351
      Node ret = NodeManager::currentNM()->mkConst(false);
3352
      Node t = n[0];
3353
      size_t lenT = Word::getLength(t);
3354
      if (lenS >= lenT)
3355
      {
3356
        if ((isPrefix && t == Word::prefix(s, lenT))
3357
            || (!isPrefix && t == Word::suffix(s, lenT)))
3358
        {
3359
          ret = NodeManager::currentNM()->mkConst(true);
3360
        }
3361
      }
3362
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CONST);
3363
    }
3364
24
    else if (lenS == 0)
3365
    {
3366
8
      Node ret = n[0].eqNode(n[1]);
3367
4
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY);
3368
    }
3369
20
    else if (lenS == 1)
3370
    {
3371
      // (str.prefix x "A") and (str.suffix x "A") are equivalent to
3372
      // (str.contains "A" x )
3373
      Node ret =
3374
40
          NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS, n[1], n[0]);
3375
20
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CTN);
3376
    }
3377
  }
3378
136
  Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]);
3379
136
  Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]);
3380
136
  Node val;
3381
68
  if (isPrefix)
3382
  {
3383
50
    val = NodeManager::currentNM()->mkConst(::cvc5::Rational(0));
3384
  }
3385
  else
3386
  {
3387
18
    val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens);
3388
  }
3389
3390
  // Check if we can turn the prefix/suffix into equalities by showing that the
3391
  // prefix/suffix is at least as long as the string
3392
136
  Node eqs = d_stringsEntail.inferEqsFromContains(n[1], n[0]);
3393
68
  if (!eqs.isNull())
3394
  {
3395
16
    return returnRewrite(n, eqs, Rewrite::SUF_PREFIX_TO_EQS);
3396
  }
3397
3398
  // general reduction to equality + substr
3399
  Node retNode = n[0].eqNode(
3400
104
      NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens));
3401
3402
52
  return returnRewrite(n, retNode, Rewrite::SUF_PREFIX_ELIM);
3403
}
3404
3405
80
Node SequencesRewriter::lengthPreserveRewrite(Node n)
3406
{
3407
80
  NodeManager* nm = NodeManager::currentNM();
3408
160
  Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
3409
160
  Node res = canonicalStrForSymbolicLength(len, n.getType());
3410
160
  return res.isNull() ? n : res;
3411
}
3412
3413
92
Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
3414
{
3415
92
  NodeManager* nm = NodeManager::currentNM();
3416
3417
184
  Node res;
3418
92
  if (len.getKind() == CONST_RATIONAL)
3419
  {
3420
    // c -> "A" repeated c times
3421
160
    Rational ratLen = len.getConst<Rational>();
3422
80
    Assert(ratLen.getDenominator() == 1);
3423
160
    Integer intLen = ratLen.getNumerator();
3424
80
    uint32_t u = intLen.getUnsignedInt();
3425
80
    if (stype.isString())  // string-only
3426
    {
3427
80
      res = nm->mkConst(String(std::string(u, 'A')));
3428
    }
3429
    // we could do this for sequences, but we need to be careful: some
3430
    // sorts do not permit values that the solver can handle (e.g. uninterpreted
3431
    // sorts and arrays).
3432
  }
3433
12
  else if (len.getKind() == PLUS)
3434
  {
3435
    // x + y -> norm(x) + norm(y)
3436
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3437
12
    for (const auto& n : len)
3438
    {
3439
16
      Node sn = canonicalStrForSymbolicLength(n, stype);
3440
8
      if (sn.isNull())
3441
      {
3442
        return Node::null();
3443
      }
3444
16
      std::vector<Node> snChildren;
3445
8
      utils::getConcat(sn, snChildren);
3446
8
      concatBuilder.append(snChildren);
3447
    }
3448
4
    res = concatBuilder.constructNode();
3449
  }
3450
20
  else if (len.getKind() == MULT && len.getNumChildren() == 2
3451
20
           && len[0].isConst())
3452
  {
3453
    // c * x -> norm(x) repeated c times
3454
8
    Rational ratReps = len[0].getConst<Rational>();
3455
4
    Assert(ratReps.getDenominator() == 1);
3456
8
    Integer intReps = ratReps.getNumerator();
3457
3458
8
    Node nRep = canonicalStrForSymbolicLength(len[1], stype);
3459
4
    if (nRep.isNull())
3460
    {
3461
      return Node::null();
3462
    }
3463
8
    std::vector<Node> nRepChildren;
3464
4
    utils::getConcat(nRep, nRepChildren);
3465
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3466
12
    for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
3467
    {
3468
8
      concatBuilder.append(nRepChildren);
3469
    }
3470
4
    res = concatBuilder.constructNode();
3471
  }
3472
4
  else if (len.getKind() == STRING_LENGTH)
3473
  {
3474
    // len(x) -> x
3475
4
    res = len[0];
3476
  }
3477
92
  return res;
3478
}
3479
3480
240
Node SequencesRewriter::rewriteSeqUnit(Node node)
3481
{
3482
240
  NodeManager* nm = NodeManager::currentNM();
3483
240
  if (node[0].isConst())
3484
  {
3485
164
    std::vector<Node> seq;
3486
82
    seq.push_back(node[0]);
3487
164
    TypeNode stype = node[0].getType();
3488
164
    Node ret = nm->mkConst(Sequence(stype, seq));
3489
82
    return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL);
3490
  }
3491
158
  return node;
3492
}
3493
3494
164075
Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r)
3495
{
3496
328150
  Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << r
3497
164075
                           << "." << std::endl;
3498
164075
  if (d_statistics != nullptr)
3499
  {
3500
163688
    (*d_statistics) << r;
3501
  }
3502
164075
  return ret;
3503
}
3504
3505
77543
Node SequencesRewriter::postProcessRewrite(Node node, Node ret)
3506
{
3507
77543
  NodeManager* nm = NodeManager::currentNM();
3508
  // standard post-processing
3509
  // We rewrite (string) equalities immediately here. This allows us to forego
3510
  // the standard invariant on equality rewrites (that s=t must rewrite to one
3511
  // of { s=t, t=s, true, false } ).
3512
77543
  Kind retk = ret.getKind();
3513
77543
  if (retk == OR || retk == AND)
3514
  {
3515
5198
    std::vector<Node> children;
3516
2599
    bool childChanged = false;
3517
10156
    for (const Node& cret : ret)
3518
    {
3519
15114
      Node creter = cret;
3520
7557
      if (cret.getKind() == EQUAL)
3521
      {
3522
2682
        creter = rewriteEqualityExt(cret);
3523
      }
3524
4875
      else if (cret.getKind() == NOT && cret[0].getKind() == EQUAL)
3525
      {
3526
        creter = nm->mkNode(NOT, rewriteEqualityExt(cret[0]));
3527
      }
3528
7557
      childChanged = childChanged || cret != creter;
3529
7557
      children.push_back(creter);
3530
    }
3531
2599
    if (childChanged)
3532
    {
3533
344
      ret = nm->mkNode(retk, children);
3534
2599
    }
3535
  }
3536
74944
  else if (retk == NOT && ret[0].getKind() == EQUAL)
3537
  {
3538
    ret = nm->mkNode(NOT, rewriteEqualityExt(ret[0]));
3539
  }
3540
74944
  else if (retk == EQUAL && node.getKind() != EQUAL)
3541
  {
3542
2172
    Trace("strings-rewrite")
3543
1086
        << "Apply extended equality rewrite on " << ret << std::endl;
3544
1086
    ret = rewriteEqualityExt(ret);
3545
  }
3546
77543
  return ret;
3547
}
3548
3549
}  // namespace strings
3550
}  // namespace theory
3551
22746
}  // namespace cvc5