GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/sequences_rewriter.cpp Lines: 1557 1615 96.4 %
Date: 2021-05-22 Branches: 4893 9422 51.9 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of the theory of strings.
14
 */
15
16
#include "theory/strings/sequences_rewriter.h"
17
18
#include "expr/attribute.h"
19
#include "expr/node_builder.h"
20
#include "expr/sequence.h"
21
#include "theory/rewriter.h"
22
#include "theory/strings/arith_entail.h"
23
#include "theory/strings/regexp_entail.h"
24
#include "theory/strings/skolem_cache.h"
25
#include "theory/strings/strings_rewriter.h"
26
#include "theory/strings/theory_strings_utils.h"
27
#include "theory/strings/word.h"
28
#include "util/statistics_registry.h"
29
30
using namespace std;
31
using namespace cvc5::kind;
32
33
namespace cvc5 {
34
namespace theory {
35
namespace strings {
36
37
9649
SequencesRewriter::SequencesRewriter(HistogramStat<Rewrite>* statistics)
38
9649
    : d_statistics(statistics), d_stringsEntail(*this)
39
{
40
9649
}
41
42
92308
Node SequencesRewriter::rewriteEquality(Node node)
43
{
44
92308
  Assert(node.getKind() == kind::EQUAL);
45
92308
  if (node[0] == node[1])
46
  {
47
6842
    Node ret = NodeManager::currentNM()->mkConst(true);
48
3421
    return returnRewrite(node, ret, Rewrite::EQ_REFL);
49
  }
50
88887
  else if (node[0].isConst() && node[1].isConst())
51
  {
52
4104
    Node ret = NodeManager::currentNM()->mkConst(false);
53
2052
    return returnRewrite(node, ret, Rewrite::EQ_CONST_FALSE);
54
  }
55
56
  // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false )
57
259938
  for (unsigned r = 0; r < 2; r++)
58
  {
59
    // must call rewrite contains directly to avoid infinite loop
60
    // we do a fix point since we may rewrite contains terms to simpler
61
    // contains terms.
62
346622
    Node ctn = d_stringsEntail.checkContains(node[r], node[1 - r], false);
63
173519
    if (!ctn.isNull())
64
    {
65
27772
      if (!ctn.getConst<bool>())
66
      {
67
416
        return returnRewrite(node, ctn, Rewrite::EQ_NCTN);
68
      }
69
      else
70
      {
71
        // definitely contains but not syntactically equal
72
        // We may be able to simplify, e.g.
73
        //  str.++( x, "a" ) == "a"  ----> x = ""
74
      }
75
    }
76
  }
77
78
  // ( len( s ) != len( t ) ) => ( s == t ---> false )
79
  // This covers cases like str.++( x, x ) == "a" ---> false
80
172838
  Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
81
172838
  Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
82
172838
  Node len_eq = len0.eqNode(len1);
83
86419
  len_eq = Rewriter::rewrite(len_eq);
84
86419
  if (len_eq.isConst() && !len_eq.getConst<bool>())
85
  {
86
5
    return returnRewrite(node, len_eq, Rewrite::EQ_LEN_DEQ);
87
  }
88
89
172828
  std::vector<Node> c[2];
90
259242
  for (unsigned i = 0; i < 2; i++)
91
  {
92
172828
    utils::getConcat(node[i], c[i]);
93
  }
94
95
  // check if the prefix, suffix mismatches
96
  //   For example, str.++( x, "a", y ) == str.++( x, "bc", z ) ---> false
97
86414
  unsigned minsize = std::min(c[0].size(), c[1].size());
98
259198
  for (unsigned r = 0; r < 2; r++)
99
  {
100
181558
    for (unsigned i = 0; i < minsize; i++)
101
    {
102
179949
      unsigned index1 = r == 0 ? i : (c[0].size() - 1) - i;
103
179949
      unsigned index2 = r == 0 ? i : (c[1].size() - 1) - i;
104
188698
      Node s = c[0][index1];
105
188698
      Node t = c[1][index2];
106
179949
      if (s.isConst() && t.isConst())
107
      {
108
4215
        size_t lenS = Word::getLength(s);
109
4215
        size_t lenT = Word::getLength(t);
110
4215
        size_t lenShort = lenS <= lenT ? lenS : lenT;
111
11050
        bool isSameFix = r == 1 ? Word::rstrncmp(s, t, lenShort)
112
6835
                                : Word::strncmp(s, t, lenShort);
113
4215
        if (!isSameFix)
114
        {
115
50
          Node ret = NodeManager::currentNM()->mkConst(false);
116
25
          return returnRewrite(node, ret, Rewrite::EQ_NFIX);
117
        }
118
      }
119
179924
      if (s != t)
120
      {
121
171175
        break;
122
      }
123
    }
124
  }
125
126
  // standard ordering
127
86389
  if (node[0] > node[1])
128
  {
129
37886
    Node ret = NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]);
130
18943
    return returnRewrite(node, ret, Rewrite::EQ_SYM);
131
  }
132
67446
  return node;
133
}
134
135
83573
Node SequencesRewriter::rewriteEqualityExt(Node node)
136
{
137
83573
  Assert(node.getKind() == EQUAL);
138
83573
  if (node[0].getType().isInteger())
139
  {
140
11673
    return rewriteArithEqualityExt(node);
141
  }
142
71900
  if (node[0].getType().isStringLike())
143
  {
144
71900
    return rewriteStrEqualityExt(node);
145
  }
146
  return node;
147
}
148
149
71900
Node SequencesRewriter::rewriteStrEqualityExt(Node node)
150
{
151
71900
  Assert(node.getKind() == EQUAL && node[0].getType().isStringLike());
152
143800
  TypeNode stype = node[0].getType();
153
154
71900
  NodeManager* nm = NodeManager::currentNM();
155
143800
  std::vector<Node> c[2];
156
143800
  Node new_ret;
157
215700
  for (unsigned i = 0; i < 2; i++)
158
  {
159
143800
    utils::getConcat(node[i], c[i]);
160
  }
161
  // ------- equality unification
162
71900
  bool changed = false;
163
215700
  for (unsigned i = 0; i < 2; i++)
164
  {
165
148596
    while (!c[0].empty() && !c[1].empty() && c[0].back() == c[1].back())
166
    {
167
2398
      c[0].pop_back();
168
2398
      c[1].pop_back();
169
2398
      changed = true;
170
    }
171
    // splice constants
172
428014
    if (!c[0].empty() && !c[1].empty() && c[0].back().isConst()
173
264018
        && c[1].back().isConst())
174
    {
175
910
      Node cs[2];
176
      size_t csl[2];
177
1365
      for (unsigned j = 0; j < 2; j++)
178
      {
179
910
        cs[j] = c[j].back();
180
910
        csl[j] = Word::getLength(cs[j]);
181
      }
182
455
      size_t larger = csl[0] > csl[1] ? 0 : 1;
183
455
      size_t smallerSize = csl[1 - larger];
184
910
      if (cs[1 - larger]
185
910
          == (i == 0 ? Word::suffix(cs[larger], smallerSize)
186
                     : Word::prefix(cs[larger], smallerSize)))
187
      {
188
435
        size_t sizeDiff = csl[larger] - smallerSize;
189
435
        c[larger][c[larger].size() - 1] =
190
870
            i == 0 ? Word::prefix(cs[larger], sizeDiff)
191
                   : Word::suffix(cs[larger], sizeDiff);
192
435
        c[1 - larger].pop_back();
193
435
        changed = true;
194
      }
195
    }
196
431400
    for (unsigned j = 0; j < 2; j++)
197
    {
198
287600
      std::reverse(c[j].begin(), c[j].end());
199
    }
200
  }
201
71900
  if (changed)
202
  {
203
    // e.g. x++y = x++z ---> y = z, "AB" ++ x = "A" ++ y --> "B" ++ x = y
204
5412
    Node s1 = utils::mkConcat(c[0], stype);
205
5412
    Node s2 = utils::mkConcat(c[1], stype);
206
2706
    new_ret = s1.eqNode(s2);
207
2706
    node = returnRewrite(node, new_ret, Rewrite::STR_EQ_UNIFY);
208
  }
209
210
  // ------- homogeneous constants
211
215685
  for (unsigned i = 0; i < 2; i++)
212
  {
213
287585
    Node cn = StringsEntail::checkHomogeneousString(node[i]);
214
143800
    if (!cn.isNull() && !Word::isEmpty(cn))
215
    {
216
22895
      Assert(cn.isConst());
217
22895
      Assert(Word::getLength(cn) == 1);
218
219
      // The operands of the concat on each side of the equality without
220
      // constant strings
221
68685
      std::vector<Node> trimmed[2];
222
      // Counts the number of `cn`s on each side
223
22895
      size_t numCns[2] = {0, 0};
224
68655
      for (size_t j = 0; j < 2; j++)
225
      {
226
        // Sort the operands of the concats on both sides of the equality
227
        // (since both sides may only contain one char, the order does not
228
        // matter)
229
45775
        std::sort(c[j].begin(), c[j].end());
230
91392
        for (const Node& cc : c[j])
231
        {
232
45632
          if (cc.isConst())
233
          {
234
            // Count the number of `cn`s in the string constant and make
235
            // sure that all chars are `cn`s
236
45603
            std::vector<Node> veccc = Word::getChars(cc);
237
45258
            for (const Node& cv : veccc)
238
            {
239
22464
              if (cv != cn)
240
              {
241
                // This conflict case should mostly should be taken care of by
242
                // multiset reasoning in the strings rewriter, but we recognize
243
                // this conflict just in case.
244
15
                new_ret = nm->mkConst(false);
245
                return returnRewrite(
246
15
                    node, new_ret, Rewrite::STR_EQ_CONST_NHOMOG);
247
              }
248
22449
              numCns[j]++;
249
            }
250
          }
251
          else
252
          {
253
22823
            trimmed[j].push_back(cc);
254
          }
255
        }
256
      }
257
258
      // We have to remove the same number of `cn`s from both sides, so the
259
      // side with less `cn`s determines how many we can remove
260
22880
      size_t trimmedConst = std::min(numCns[0], numCns[1]);
261
68640
      for (size_t j = 0; j < 2; j++)
262
      {
263
45760
        size_t diff = numCns[j] - trimmedConst;
264
45760
        if (diff != 0)
265
        {
266
          // Add a constant string to the side with more `cn`s to restore
267
          // the difference in number of `cn`s
268
41326
          std::vector<Node> vec(diff, cn);
269
20663
          trimmed[j].push_back(Word::mkWordFlatten(vec));
270
        }
271
      }
272
273
45760
      Node lhs = utils::mkConcat(trimmed[i], stype);
274
45760
      Node ss = utils::mkConcat(trimmed[1 - i], stype);
275
22880
      if (lhs != node[i] || ss != node[1 - i])
276
      {
277
        // e.g.
278
        //  "AA" = y ++ x ---> "AA" = x ++ y if x < y
279
        //  "AAA" = y ++ "A" ++ z ---> "AA" = y ++ z
280
24
        new_ret = lhs.eqNode(ss);
281
24
        node = returnRewrite(node, new_ret, Rewrite::STR_EQ_HOMOG_CONST);
282
      }
283
    }
284
  }
285
286
  // ------- rewrites for (= "" _)
287
143770
  Node empty = Word::mkEmptyWord(stype);
288
203772
  for (size_t i = 0; i < 2; i++)
289
  {
290
137920
    if (node[i] == empty)
291
    {
292
86919
      Node ne = node[1 - i];
293
46476
      if (ne.getKind() == STRING_STRREPL)
294
      {
295
        // (= "" (str.replace x y x)) ---> (= x "")
296
1857
        if (ne[0] == ne[2])
297
        {
298
438
          Node ret = nm->mkNode(EQUAL, ne[0], empty);
299
219
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_X_Y_X);
300
        }
301
302
        // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y "")))
303
1638
        if (StringsEntail::checkNonEmpty(ne[2]))
304
        {
305
          Node ret =
306
              nm->mkNode(AND,
307
688
                         nm->mkNode(EQUAL, ne[0], empty),
308
1376
                         nm->mkNode(NOT, nm->mkNode(EQUAL, ne[1], empty)));
309
344
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
310
        }
311
312
        // (= "" (str.replace x "A" "")) ---> (str.prefix x "A")
313
1294
        if (StringsEntail::checkLengthOne(ne[1], true) && ne[2] == empty)
314
        {
315
426
          Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]);
316
213
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
317
        }
318
      }
319
44619
      else if (ne.getKind() == STRING_SUBSTR)
320
      {
321
35121
        Node zero = nm->mkConst(Rational(0));
322
323
20189
        if (ArithEntail::check(ne[1], false) && ArithEntail::check(ne[2], true))
324
        {
325
          // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0
326
3741
          if (ne[1] == zero)
327
          {
328
2658
            Node ret = nm->mkNode(EQUAL, ne[0], empty);
329
1329
            return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
330
          }
331
332
          // (= "" (str.substr x n m)) ---> (<= (str.len x) n)
333
          // if n >= 0 and m > 0
334
4824
          Node ret = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, ne[0]), ne[1]);
335
2412
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
336
        }
337
338
        // (= "" (str.substr "A" 0 z)) ---> (<= z 0)
339
16448
        if (StringsEntail::checkNonEmpty(ne[0]) && ne[1] == zero)
340
        {
341
3032
          Node ret = nm->mkNode(LEQ, ne[2], zero);
342
1516
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_Z);
343
        }
344
      }
345
    }
346
  }
347
348
  // ------- rewrites for (= (str.replace _ _ _) _)
349
197162
  for (size_t i = 0; i < 2; i++)
350
  {
351
131658
    if (node[i].getKind() == STRING_STRREPL)
352
    {
353
5108
      Node repl = node[i];
354
5108
      Node x = node[1 - i];
355
356
      // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x))
357
2728
      if (StringsEntail::checkNonEmpty(x) && repl[0] == empty)
358
      {
359
        Node ret = nm->mkNode(
360
68
            EQUAL, empty, nm->mkNode(STRING_STRREPL, x, repl[2], repl[1]));
361
34
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_EMP);
362
      }
363
364
      // (= x (str.replace y x y)) ---> (= x y)
365
2694
      if (repl[0] == repl[2] && x == repl[1])
366
      {
367
4
        Node ret = nm->mkNode(EQUAL, x, repl[0]);
368
2
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_EQ);
369
      }
370
371
      // (= x (str.replace x "A" "B")) ---> (not (str.contains x "A"))
372
2692
      if (x == repl[0])
373
      {
374
2396
        Node eq = Rewriter::rewrite(nm->mkNode(EQUAL, repl[1], repl[2]));
375
1352
        if (eq.isConst() && !eq.getConst<bool>())
376
        {
377
616
          Node ret = nm->mkNode(NOT, nm->mkNode(STRING_STRCTN, x, repl[1]));
378
308
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_NOT_CTN);
379
        }
380
      }
381
382
      // (= (str.replace x y z) z) --> (or (= x y) (= x z))
383
      // if (str.len y) = (str.len z)
384
2384
      if (repl[2] == x)
385
      {
386
2614
        Node lenY = nm->mkNode(STRING_LENGTH, repl[1]);
387
2614
        Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]);
388
1309
        if (ArithEntail::checkEq(lenY, lenZ))
389
        {
390
          Node ret = nm->mkNode(OR,
391
8
                                nm->mkNode(EQUAL, repl[0], repl[1]),
392
16
                                nm->mkNode(EQUAL, repl[0], repl[2]));
393
4
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_DIS);
394
        }
395
      }
396
    }
397
  }
398
399
  // Try to rewrite (= x y) into a conjunction of equalities based on length
400
  // entailment.
401
  //
402
  // (<= (str.len x) (str.++ y1 ... yn)) AND (= x (str.++ y1 ... yn)) --->
403
  //  (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
404
  //
405
  // where yi' and yi'' correspond to some yj and
406
  //   (<= (str.len x) (str.++ y1' ... ym'))
407
194049
  for (unsigned i = 0; i < 2; i++)
408
  {
409
129787
    if (node[1 - i].getKind() == STRING_CONCAT)
410
    {
411
1592
      new_ret = StringsEntail::inferEqsFromContains(node[i], node[1 - i]);
412
1592
      if (!new_ret.isNull())
413
      {
414
1242
        return returnRewrite(node, new_ret, Rewrite::STR_EQ_CONJ_LEN_ENTAIL);
415
      }
416
    }
417
  }
418
419
64262
  if (node[0].getKind() == STRING_CONCAT && node[1].getKind() == STRING_CONCAT)
420
  {
421
    // (= (str.++ x_1 ... x_i x_{i + 1} ... x_n)
422
    //    (str.++ y_1 ... y_j y_{j + 1} ... y_m)) --->
423
    //  (and (= (str.++ x_1 ... x_i) (str.++ y_1 ... y_j))
424
    //       (= (str.++ x_{i + 1} ... x_n) (str.++ y_{j + 1} ... y_m)))
425
    //
426
    // if (str.len (str.++ x_1 ... x_i)) = (str.len (str.++ y_1 ... y_j))
427
    //
428
    // This rewrite performs length-based equality splitting: If we can show
429
    // that two prefixes have the same length, we can split an equality into
430
    // two equalities, one over the prefixes and another over the suffixes.
431
34
    std::vector<Node> v0, v1;
432
21
    utils::getConcat(node[0], v0);
433
21
    utils::getConcat(node[1], v1);
434
21
    size_t startRhs = 0;
435
84
    for (size_t i = 0, size0 = v0.size(); i <= size0; i++)
436
    {
437
134
      std::vector<Node> pfxv0(v0.begin(), v0.begin() + i);
438
134
      Node pfx0 = utils::mkConcat(pfxv0, stype);
439
259
      for (size_t j = startRhs, size1 = v1.size(); j <= size1; j++)
440
      {
441
219
        if (!(i == 0 && j == 0) && !(i == v0.size() && j == v1.size()))
442
        {
443
339
          std::vector<Node> pfxv1(v1.begin(), v1.begin() + j);
444
339
          Node pfx1 = utils::mkConcat(pfxv1, stype);
445
339
          Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0);
446
339
          Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1);
447
448
185
          if (ArithEntail::checkEq(lenPfx0, lenPfx1))
449
          {
450
8
            std::vector<Node> sfxv0(v0.begin() + i, v0.end());
451
8
            std::vector<Node> sfxv1(v1.begin() + j, v1.end());
452
            Node ret = nm->mkNode(kind::AND,
453
8
                                  pfx0.eqNode(pfx1),
454
8
                                  utils::mkConcat(sfxv0, stype)
455
16
                                      .eqNode(utils::mkConcat(sfxv1, stype)));
456
4
            return returnRewrite(node, ret, Rewrite::SPLIT_EQ);
457
          }
458
181
          else if (ArithEntail::check(lenPfx1, lenPfx0, true))
459
          {
460
            // The prefix on the right-hand side is strictly longer than the
461
            // prefix on the left-hand side, so we try to strip the right-hand
462
            // prefix by the length of the left-hand prefix
463
            //
464
            // Example:
465
            // (= (str.++ "A" x y) (str.++ x "AB" z)) --->
466
            //   (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z)))
467
23
            std::vector<Node> rpfxv1;
468
23
            if (StringsEntail::stripSymbolicLength(
469
                    pfxv1, rpfxv1, 1, lenPfx0, true))
470
            {
471
              // The rewrite requires the full left-hand prefix length to be
472
              // stripped (otherwise we would have to keep parts of the
473
              // left-hand prefix).
474
              if (lenPfx0.isConst() && lenPfx0.getConst<Rational>().isZero())
475
              {
476
                std::vector<Node> sfxv0(v0.begin() + i, v0.end());
477
                pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end());
478
                Node ret =
479
                    nm->mkNode(kind::AND,
480
                               pfx0.eqNode(utils::mkConcat(rpfxv1, stype)),
481
                               utils::mkConcat(sfxv0, stype)
482
                                   .eqNode(utils::mkConcat(pfxv1, stype)));
483
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_R);
484
              }
485
            }
486
487
            // If the prefix of the right-hand side is (strictly) longer than
488
            // the prefix of the left-hand side, we can advance the left-hand
489
            // side (since the length of the right-hand side is only increasing
490
            // in the inner loop)
491
23
            break;
492
          }
493
158
          else if (ArithEntail::check(lenPfx0, lenPfx1, true))
494
          {
495
            // The prefix on the left-hand side is strictly longer than the
496
            // prefix on the right-hand side, so we try to strip the left-hand
497
            // prefix by the length of the right-hand prefix
498
            //
499
            // Example:
500
            // (= (str.++ x "AB" z) (str.++ "A" x y)) --->
501
            //   (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y))
502
62
            std::vector<Node> rpfxv0;
503
33
            if (StringsEntail::stripSymbolicLength(
504
                    pfxv0, rpfxv0, 1, lenPfx1, true))
505
            {
506
              // The rewrite requires the full right-hand prefix length to be
507
              // stripped (otherwise we would have to keep parts of the
508
              // right-hand prefix).
509
6
              if (lenPfx1.isConst() && lenPfx1.getConst<Rational>().isZero())
510
              {
511
4
                pfxv0.insert(pfxv0.end(), v0.begin() + i, v0.end());
512
8
                std::vector<Node> sfxv1(v1.begin() + j, v1.end());
513
                Node ret =
514
                    nm->mkNode(kind::AND,
515
8
                               utils::mkConcat(rpfxv0, stype).eqNode(pfx1),
516
8
                               utils::mkConcat(pfxv0, stype)
517
16
                                   .eqNode(utils::mkConcat(sfxv1, stype)));
518
4
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_L);
519
              }
520
            }
521
522
            // If the prefix of the left-hand side is (strictly) longer than
523
            // the prefix of the right-hand side, then we don't need to check
524
            // that right-hand prefix for future left-hand prefixes anymore
525
            // (since they are increasing in length)
526
29
            startRhs = j + 1;
527
          }
528
        }
529
      }
530
    }
531
  }
532
533
64254
  return node;
534
}
535
536
11673
Node SequencesRewriter::rewriteArithEqualityExt(Node node)
537
{
538
11673
  Assert(node.getKind() == EQUAL && node[0].getType().isInteger());
539
540
  // cases where we can solve the equality
541
542
  // notice we cannot rewrite str.to.int(x)=n to x="n" due to leading zeroes.
543
544
11673
  return node;
545
}
546
547
158166
Node SequencesRewriter::rewriteLength(Node node)
548
{
549
158166
  Assert(node.getKind() == STRING_LENGTH);
550
158166
  NodeManager* nm = NodeManager::currentNM();
551
158166
  Kind nk0 = node[0].getKind();
552
158166
  if (node[0].isConst())
553
  {
554
11402
    Node retNode = nm->mkConst(Rational(Word::getLength(node[0])));
555
5701
    return returnRewrite(node, retNode, Rewrite::LEN_EVAL);
556
  }
557
152465
  else if (nk0 == kind::STRING_CONCAT)
558
  {
559
8011
    Node tmpNode = node[0];
560
8011
    if (tmpNode.getKind() == kind::STRING_CONCAT)
561
    {
562
16022
      std::vector<Node> node_vec;
563
31611
      for (unsigned int i = 0; i < tmpNode.getNumChildren(); ++i)
564
      {
565
23600
        if (tmpNode[i].isConst())
566
        {
567
5546
          node_vec.push_back(
568
11092
              nm->mkConst(Rational(Word::getLength(tmpNode[i]))));
569
        }
570
        else
571
        {
572
18054
          node_vec.push_back(NodeManager::currentNM()->mkNode(
573
              kind::STRING_LENGTH, tmpNode[i]));
574
        }
575
      }
576
16022
      Node retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec);
577
8011
      return returnRewrite(node, retNode, Rewrite::LEN_CONCAT);
578
    }
579
  }
580
144454
  else if (nk0 == STRING_STRREPL || nk0 == STRING_STRREPLALL)
581
  {
582
1817
    Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1]));
583
1817
    Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2]));
584
929
    if (len1 == len2)
585
    {
586
      // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x )
587
82
      Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
588
41
      return returnRewrite(node, retNode, Rewrite::LEN_REPL_INV);
589
888
    }
590
  }
591
143525
  else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER || nk0 == STRING_REV
592
143419
           || nk0 == STRING_UPDATE)
593
  {
594
    // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev.
595
    // len( update( x, n, y ) ) = len( x )
596
240
    Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
597
120
    return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV);
598
  }
599
143405
  else if (nk0 == SEQ_UNIT)
600
  {
601
186
    Node retNode = nm->mkConst(Rational(1));
602
93
    return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT);
603
  }
604
144200
  return node;
605
}
606
607
// TODO (#1180) add rewrite
608
//  str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) --->
609
//  str.substr( x, n1, n2+n3 )
610
41809
Node SequencesRewriter::rewriteConcat(Node node)
611
{
612
41809
  Assert(node.getKind() == kind::STRING_CONCAT);
613
83618
  Trace("strings-rewrite-debug")
614
41809
      << "Strings::rewriteConcat start " << node << std::endl;
615
83618
  std::vector<Node> node_vec;
616
83618
  Node preNode = Node::null();
617
167864
  for (Node tmpNode : node)
618
  {
619
126055
    if (tmpNode.getKind() == STRING_CONCAT)
620
    {
621
618
      unsigned j = 0;
622
      // combine the first term with the previous constant if applicable
623
618
      if (!preNode.isNull())
624
      {
625
202
        if (tmpNode[0].isConst())
626
        {
627
224
          std::vector<Node> wvec;
628
112
          wvec.push_back(preNode);
629
112
          wvec.push_back(tmpNode[0]);
630
112
          preNode = Word::mkWordFlatten(wvec);
631
112
          node_vec.push_back(preNode);
632
        }
633
        else
634
        {
635
90
          node_vec.push_back(preNode);
636
90
          node_vec.push_back(tmpNode[0]);
637
        }
638
202
        preNode = Node::null();
639
202
        ++j;
640
      }
641
      // insert the middle terms to node_vec
642
618
      if (j <= tmpNode.getNumChildren() - 1)
643
      {
644
618
        node_vec.insert(node_vec.end(), tmpNode.begin() + j, tmpNode.end() - 1);
645
      }
646
      // take the last term as the current
647
618
      tmpNode = tmpNode[tmpNode.getNumChildren() - 1];
648
    }
649
126055
    if (!tmpNode.isConst())
650
    {
651
70459
      if (!preNode.isNull())
652
      {
653
13667
        if (preNode.isConst() && !Word::isEmpty(preNode))
654
        {
655
13055
          node_vec.push_back(preNode);
656
        }
657
13667
        preNode = Node::null();
658
      }
659
70459
      node_vec.push_back(tmpNode);
660
    }
661
    else
662
    {
663
55596
      if (preNode.isNull())
664
      {
665
34744
        preNode = tmpNode;
666
      }
667
      else
668
      {
669
41704
        std::vector<Node> vec;
670
20852
        vec.push_back(preNode);
671
20852
        vec.push_back(tmpNode);
672
20852
        preNode = Word::mkWordFlatten(vec);
673
      }
674
    }
675
  }
676
41809
  if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode)))
677
  {
678
20038
    node_vec.push_back(preNode);
679
  }
680
681
  // Sort adjacent operands in str.++ that all result in the same string or the
682
  // empty string.
683
  //
684
  // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) -->
685
  // (str.++ ... [sort those 3 arguments] ... )
686
41809
  size_t lastIdx = 0;
687
83618
  Node lastX;
688
146881
  for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++)
689
  {
690
210144
    Node s = StringsEntail::getStringOrEmpty(node_vec[i]);
691
105072
    bool nextX = false;
692
105072
    if (s != lastX)
693
    {
694
101489
      nextX = true;
695
    }
696
697
105072
    if (nextX)
698
    {
699
101489
      std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i);
700
101489
      lastX = s;
701
101489
      lastIdx = i;
702
    }
703
  }
704
41809
  std::sort(node_vec.begin() + lastIdx, node_vec.end());
705
706
83618
  TypeNode tn = node.getType();
707
83618
  Node retNode = utils::mkConcat(node_vec, tn);
708
83618
  Trace("strings-rewrite-debug")
709
41809
      << "Strings::rewriteConcat end " << retNode << std::endl;
710
41809
  if (retNode != node)
711
  {
712
13877
    return returnRewrite(node, retNode, Rewrite::CONCAT_NORM);
713
  }
714
27932
  return node;
715
}
716
717
2368
Node SequencesRewriter::rewriteConcatRegExp(TNode node)
718
{
719
2368
  Assert(node.getKind() == kind::REGEXP_CONCAT);
720
2368
  NodeManager* nm = NodeManager::currentNM();
721
4736
  Trace("strings-rewrite-debug")
722
2368
      << "Strings::rewriteConcatRegExp flatten " << node << std::endl;
723
4736
  Node retNode = node;
724
4736
  std::vector<Node> vec;
725
2368
  bool changed = false;
726
4736
  Node emptyRe;
727
728
  // get the string type that are members of this regular expression
729
4736
  TypeNode rtype = node.getType();
730
4736
  TypeNode stype;
731
2368
  if (rtype.isRegExp())
732
  {
733
    // standard regular expressions are for strings
734
2368
    stype = nm->stringType();
735
  }
736
  else
737
  {
738
    Unimplemented();
739
  }
740
741
9205
  for (const Node& c : node)
742
  {
743
6871
    if (c.getKind() == REGEXP_CONCAT)
744
    {
745
177
      changed = true;
746
567
      for (const Node& cc : c)
747
      {
748
390
        vec.push_back(cc);
749
      }
750
    }
751
16273
    else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst()
752
16223
             && Word::isEmpty(c[0]))
753
    {
754
108
      changed = true;
755
108
      emptyRe = c;
756
    }
757
6586
    else if (c.getKind() == REGEXP_EMPTY)
758
    {
759
      // re.++( ..., empty, ... ) ---> empty
760
68
      Node ret = nm->mkNode(REGEXP_EMPTY);
761
34
      return returnRewrite(node, ret, Rewrite::RE_CONCAT_EMPTY);
762
    }
763
    else
764
    {
765
6552
      vec.push_back(c);
766
    }
767
  }
768
2334
  if (changed)
769
  {
770
    // flatten
771
    // this handles nested re.++ and elimination or str.to.re(""), e.g.:
772
    // re.++( re.++( R1, R2 ), str.to.re(""), R3 ) ---> re.++( R1, R2, R3 )
773
267
    if (vec.empty())
774
    {
775
2
      Assert(!emptyRe.isNull());
776
2
      retNode = emptyRe;
777
    }
778
    else
779
    {
780
265
      retNode = vec.size() == 1 ? vec[0] : nm->mkNode(REGEXP_CONCAT, vec);
781
    }
782
267
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_FLATTEN);
783
  }
784
4134
  Trace("strings-rewrite-debug")
785
2067
      << "Strings::rewriteConcatRegExp start " << node << std::endl;
786
4134
  std::vector<Node> cvec;
787
  // the current accumulation of constant strings
788
4134
  std::vector<Node> preReStr;
789
  // whether the last component was (_)*
790
2067
  bool lastAllStar = false;
791
4134
  String emptyStr = String("");
792
  // this loop checks to see if components can be combined or dropped
793
10334
  for (unsigned i = 0, size = vec.size(); i <= size; i++)
794
  {
795
16534
    Node curr;
796
8267
    if (i < size)
797
    {
798
6200
      curr = vec[i];
799
6200
      Assert(curr.getKind() != REGEXP_CONCAT);
800
    }
801
    // update preReStr
802
8267
    if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP)
803
    {
804
2581
      lastAllStar = false;
805
2581
      preReStr.push_back(curr[0]);
806
2581
      curr = Node::null();
807
    }
808
5686
    else if (!preReStr.empty())
809
    {
810
2127
      Assert(!lastAllStar);
811
      // this groups consecutive strings a++b ---> ab
812
4254
      Node acc = nm->mkNode(STRING_TO_REGEXP, utils::mkConcat(preReStr, stype));
813
2127
      cvec.push_back(acc);
814
2127
      preReStr.clear();
815
    }
816
3559
    else if (!curr.isNull() && lastAllStar)
817
    {
818
      // if empty, drop it
819
      // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)*
820
363
      if (RegExpEntail::isConstRegExp(curr)
821
363
          && RegExpEntail::testConstStringInRegExp(emptyStr, 0, curr))
822
      {
823
49
        curr = Node::null();
824
      }
825
    }
826
8267
    if (!curr.isNull())
827
    {
828
3570
      lastAllStar = false;
829
3570
      if (curr.getKind() == REGEXP_STAR)
830
      {
831
        // we can group stars (a)* ++ (a)* ---> (a)*
832
1820
        if (!cvec.empty() && cvec.back() == curr)
833
        {
834
8
          curr = Node::null();
835
        }
836
1812
        else if (curr[0].getKind() == REGEXP_SIGMA)
837
        {
838
842
          Assert(!lastAllStar);
839
842
          lastAllStar = true;
840
          // go back and remove empty ones from back of cvec
841
          // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)*
842
2405
          while (!cvec.empty() && RegExpEntail::isConstRegExp(cvec.back())
843
3239
                 && RegExpEntail::testConstStringInRegExp(
844
689
                     emptyStr, 0, cvec.back()))
845
          {
846
8
            cvec.pop_back();
847
          }
848
        }
849
      }
850
    }
851
8267
    if (!curr.isNull())
852
    {
853
3562
      cvec.push_back(curr);
854
    }
855
  }
856
2067
  Assert(!cvec.empty());
857
2067
  retNode = utils::mkConcat(cvec, rtype);
858
2067
  if (retNode != node)
859
  {
860
    // handles all cases where consecutive re constants are combined or
861
    // dropped as described in the loop above.
862
201
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT);
863
  }
864
865
  // flipping adjacent star arguments
866
1866
  changed = false;
867
5216
  for (size_t i = 0, size = cvec.size() - 1; i < size; i++)
868
  {
869
3350
    if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1])
870
    {
871
      // by convention, flip the order (a*)++a ---> a++(a*)
872
64
      std::swap(cvec[i], cvec[i + 1]);
873
64
      changed = true;
874
    }
875
  }
876
1866
  if (changed)
877
  {
878
48
    retNode = utils::mkConcat(cvec, rtype);
879
48
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_OPT);
880
  }
881
1818
  return node;
882
}
883
884
886
Node SequencesRewriter::rewriteStarRegExp(TNode node)
885
{
886
886
  Assert(node.getKind() == REGEXP_STAR);
887
886
  NodeManager* nm = NodeManager::currentNM();
888
1772
  Node retNode = node;
889
886
  if (node[0].getKind() == REGEXP_STAR)
890
  {
891
    // ((R)*)* ---> R*
892
4
    return returnRewrite(node, node[0], Rewrite::RE_STAR_NESTED_STAR);
893
  }
894
2988
  else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst()
895
2962
           && Word::isEmpty(node[0][0]))
896
  {
897
    // ("")* ---> ""
898
14
    return returnRewrite(node, node[0], Rewrite::RE_STAR_EMPTY_STRING);
899
  }
900
868
  else if (node[0].getKind() == REGEXP_EMPTY)
901
  {
902
    // (empty)* ---> ""
903
8
    retNode = nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")));
904
8
    return returnRewrite(node, retNode, Rewrite::RE_STAR_EMPTY);
905
  }
906
860
  else if (node[0].getKind() == REGEXP_UNION)
907
  {
908
    // simplification of unions under star
909
168
    if (RegExpEntail::hasEpsilonNode(node[0]))
910
    {
911
22
      bool changed = false;
912
22
      std::vector<Node> node_vec;
913
66
      for (const Node& nc : node[0])
914
      {
915
130
        if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst()
916
130
            && Word::isEmpty(nc[0]))
917
        {
918
          // can be removed
919
22
          changed = true;
920
        }
921
        else
922
        {
923
22
          node_vec.push_back(nc);
924
        }
925
      }
926
22
      if (changed)
927
      {
928
22
        retNode = node_vec.size() == 1 ? node_vec[0]
929
                                       : nm->mkNode(REGEXP_UNION, node_vec);
930
22
        retNode = nm->mkNode(REGEXP_STAR, retNode);
931
        // simplification of union beneath star based on loop above
932
        // for example, ( "" | "a" )* ---> ("a")*
933
22
        return returnRewrite(node, retNode, Rewrite::RE_STAR_UNION);
934
      }
935
    }
936
  }
937
838
  return node;
938
}
939
940
733
Node SequencesRewriter::rewriteAndOrRegExp(TNode node)
941
{
942
733
  Kind nk = node.getKind();
943
733
  Assert(nk == REGEXP_UNION || nk == REGEXP_INTER);
944
1466
  Trace("strings-rewrite-debug")
945
733
      << "Strings::rewriteAndOrRegExp start " << node << std::endl;
946
1466
  std::vector<Node> node_vec;
947
1466
  std::vector<Node> polRegExp[2];
948
2337
  for (const Node& ni : node)
949
  {
950
1606
    if (ni.getKind() == nk)
951
    {
952
33
      for (const Node& nic : ni)
953
      {
954
22
        if (std::find(node_vec.begin(), node_vec.end(), nic) == node_vec.end())
955
        {
956
22
          node_vec.push_back(nic);
957
        }
958
      }
959
    }
960
1595
    else if (ni.getKind() == REGEXP_EMPTY)
961
    {
962
39
      if (nk == REGEXP_INTER)
963
      {
964
2
        return returnRewrite(node, ni, Rewrite::RE_AND_EMPTY);
965
      }
966
      // otherwise, can ignore
967
    }
968
1556
    else if (ni.getKind() == REGEXP_STAR && ni[0].getKind() == REGEXP_SIGMA)
969
    {
970
8
      if (nk == REGEXP_UNION)
971
      {
972
        return returnRewrite(node, ni, Rewrite::RE_OR_ALL);
973
      }
974
      // otherwise, can ignore
975
    }
976
1548
    else if (std::find(node_vec.begin(), node_vec.end(), ni) == node_vec.end())
977
    {
978
1529
      node_vec.push_back(ni);
979
1529
      uint32_t pindex = ni.getKind() == REGEXP_COMPLEMENT ? 1 : 0;
980
3058
      Node nia = pindex == 1 ? ni[0] : ni;
981
1529
      polRegExp[pindex].push_back(nia);
982
    }
983
  }
984
731
  NodeManager* nm = NodeManager::currentNM();
985
  // use inclusion tests
986
851
  for (const Node& negMem : polRegExp[1])
987
  {
988
226
    for (const Node& posMem : polRegExp[0])
989
    {
990
196
      Node m1 = nk == REGEXP_INTER ? negMem : posMem;
991
196
      Node m2 = nk == REGEXP_INTER ? posMem : negMem;
992
      // inclusion test for conflicting case m1 contains m2
993
      // (re.inter (re.comp R1) R2) --> re.none where R1 includes R2
994
      // (re.union R1 (re.comp R2)) --> (re.* re.allchar) where R1 includes R2
995
106
      if (RegExpEntail::regExpIncludes(m1, m2))
996
      {
997
32
        Node retNode;
998
16
        if (nk == REGEXP_INTER)
999
        {
1000
16
          retNode = nm->mkNode(kind::REGEXP_EMPTY);
1001
        }
1002
        else
1003
        {
1004
          retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1005
        }
1006
16
        return returnRewrite(node, retNode, Rewrite::RE_ANDOR_INC_CONFLICT);
1007
      }
1008
    }
1009
  }
1010
1430
  Node retNode;
1011
715
  if (node_vec.empty())
1012
  {
1013
    if (nk == REGEXP_INTER)
1014
    {
1015
      retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1016
    }
1017
    else
1018
    {
1019
      retNode = nm->mkNode(kind::REGEXP_EMPTY);
1020
    }
1021
  }
1022
  else
1023
  {
1024
715
    retNode = node_vec.size() == 1 ? node_vec[0] : nm->mkNode(nk, node_vec);
1025
  }
1026
715
  if (retNode != node)
1027
  {
1028
    // flattening and removing children, based on loop above
1029
65
    return returnRewrite(node, retNode, Rewrite::RE_ANDOR_FLATTEN);
1030
  }
1031
650
  return node;
1032
}
1033
1034
24
Node SequencesRewriter::rewriteLoopRegExp(TNode node)
1035
{
1036
24
  Assert(node.getKind() == REGEXP_LOOP);
1037
48
  Node retNode = node;
1038
48
  Node r = node[0];
1039
24
  if (r.getKind() == REGEXP_STAR)
1040
  {
1041
    return returnRewrite(node, r, Rewrite::RE_LOOP_STAR);
1042
  }
1043
24
  NodeManager* nm = NodeManager::currentNM();
1044
48
  cvc5::Rational rMaxInt(String::maxSize());
1045
24
  uint32_t l = utils::getLoopMinOccurrences(node);
1046
48
  std::vector<Node> vec_nodes;
1047
110
  for (unsigned i = 0; i < l; i++)
1048
  {
1049
86
    vec_nodes.push_back(r);
1050
  }
1051
  Node n =
1052
24
      vec_nodes.size() == 0
1053
24
          ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")))
1054
48
          : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes);
1055
24
  uint32_t u = utils::getLoopMaxOccurrences(node);
1056
24
  if (u < l)
1057
  {
1058
8
    std::vector<Node> nvec;
1059
4
    retNode = nm->mkNode(REGEXP_EMPTY, nvec);
1060
  }
1061
20
  else if (u == l)
1062
  {
1063
10
    retNode = n;
1064
  }
1065
  else
1066
  {
1067
20
    std::vector<Node> vec2;
1068
10
    vec2.push_back(n);
1069
20
    TypeNode rtype = nm->regExpType();
1070
52
    for (uint32_t j = l; j < u; j++)
1071
    {
1072
42
      vec_nodes.push_back(r);
1073
42
      n = utils::mkConcat(vec_nodes, rtype);
1074
42
      vec2.push_back(n);
1075
    }
1076
10
    retNode = nm->mkNode(REGEXP_UNION, vec2);
1077
  }
1078
48
  Trace("strings-lp") << "Strings::lp " << node << " => " << retNode
1079
24
                      << std::endl;
1080
24
  if (retNode != node)
1081
  {
1082
24
    return returnRewrite(node, retNode, Rewrite::RE_LOOP);
1083
  }
1084
  return node;
1085
}
1086
1087
4
Node SequencesRewriter::rewriteRepeatRegExp(TNode node)
1088
{
1089
4
  Assert(node.getKind() == REGEXP_REPEAT);
1090
4
  NodeManager* nm = NodeManager::currentNM();
1091
  // ((_ re.^ n) R) --> ((_ re.loop n n) R)
1092
4
  unsigned r = utils::getRepeatAmount(node);
1093
8
  Node lop = nm->mkConst(RegExpLoop(r, r));
1094
8
  Node retNode = nm->mkNode(REGEXP_LOOP, lop, node[0]);
1095
8
  return returnRewrite(node, retNode, Rewrite::RE_REPEAT_ELIM);
1096
}
1097
1098
25
Node SequencesRewriter::rewriteOptionRegExp(TNode node)
1099
{
1100
25
  Assert(node.getKind() == REGEXP_OPT);
1101
25
  NodeManager* nm = NodeManager::currentNM();
1102
  Node retNode =
1103
      nm->mkNode(REGEXP_UNION,
1104
50
                 nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))),
1105
100
                 node[0]);
1106
50
  return returnRewrite(node, retNode, Rewrite::RE_OPT_ELIM);
1107
}
1108
1109
35
Node SequencesRewriter::rewritePlusRegExp(TNode node)
1110
{
1111
35
  Assert(node.getKind() == REGEXP_PLUS);
1112
35
  NodeManager* nm = NodeManager::currentNM();
1113
  Node retNode =
1114
70
      nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0]));
1115
70
  return returnRewrite(node, retNode, Rewrite::RE_PLUS_ELIM);
1116
}
1117
1118
33
Node SequencesRewriter::rewriteDifferenceRegExp(TNode node)
1119
{
1120
33
  Assert(node.getKind() == REGEXP_DIFF);
1121
33
  NodeManager* nm = NodeManager::currentNM();
1122
  Node retNode =
1123
66
      nm->mkNode(REGEXP_INTER, node[0], nm->mkNode(REGEXP_COMPLEMENT, node[1]));
1124
66
  return returnRewrite(node, retNode, Rewrite::RE_DIFF_ELIM);
1125
}
1126
1127
348
Node SequencesRewriter::rewriteRangeRegExp(TNode node)
1128
{
1129
348
  Assert(node.getKind() == REGEXP_RANGE);
1130
  unsigned ch[2];
1131
1044
  for (size_t i = 0; i < 2; ++i)
1132
  {
1133
696
    if (!node[i].isConst() || node[i].getConst<String>().size() != 1)
1134
    {
1135
      // not applied to characters, it is not handled
1136
      return node;
1137
    }
1138
696
    ch[i] = node[i].getConst<String>().front();
1139
  }
1140
1141
348
  NodeManager* nm = NodeManager::currentNM();
1142
348
  if (node[0] == node[1])
1143
  {
1144
    Node retNode = nm->mkNode(STRING_TO_REGEXP, node[0]);
1145
    // re.range( "A", "A" ) ---> str.to_re( "A" )
1146
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_SINGLE);
1147
  }
1148
1149
348
  if (ch[0] > ch[1])
1150
  {
1151
    // re.range( "B", "A" ) ---> re.none
1152
4
    Node retNode = nm->mkNode(REGEXP_EMPTY);
1153
2
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_EMPTY);
1154
  }
1155
346
  return node;
1156
}
1157
1158
7253
Node SequencesRewriter::rewriteMembership(TNode node)
1159
{
1160
7253
  NodeManager* nm = NodeManager::currentNM();
1161
14506
  Node x = node[0];
1162
14506
  Node r = node[1];
1163
1164
14506
  TypeNode stype = x.getType();
1165
14506
  TypeNode rtype = r.getType();
1166
1167
7253
  if(r.getKind() == kind::REGEXP_EMPTY)
1168
  {
1169
132
    Node retNode = NodeManager::currentNM()->mkConst(false);
1170
66
    return returnRewrite(node, retNode, Rewrite::RE_IN_EMPTY);
1171
  }
1172
7187
  else if (x.isConst() && RegExpEntail::isConstRegExp(r))
1173
  {
1174
    // test whether x in node[1]
1175
2330
    cvc5::String s = x.getConst<String>();
1176
1165
    bool test = RegExpEntail::testConstStringInRegExp(s, 0, r);
1177
2330
    Node retNode = NodeManager::currentNM()->mkConst(test);
1178
1165
    return returnRewrite(node, retNode, Rewrite::RE_IN_EVAL);
1179
  }
1180
6022
  else if (r.getKind() == kind::REGEXP_SIGMA)
1181
  {
1182
96
    Node one = nm->mkConst(Rational(1));
1183
96
    Node retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x));
1184
48
    return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA);
1185
  }
1186
5974
  else if (r.getKind() == kind::REGEXP_STAR)
1187
  {
1188
2057
    if (x.isConst())
1189
    {
1190
27
      size_t xlen = Word::getLength(x);
1191
27
      if (xlen == 0)
1192
      {
1193
18
        Node retNode = nm->mkConst(true);
1194
        // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true
1195
9
        return returnRewrite(node, retNode, Rewrite::RE_EMPTY_IN_STR_STAR);
1196
      }
1197
18
      else if (xlen == 1)
1198
      {
1199
18
        if (r[0].getKind() == STRING_TO_REGEXP)
1200
        {
1201
4
          Node retNode = r[0][0].eqNode(x);
1202
          // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x
1203
2
          return returnRewrite(node, retNode, Rewrite::RE_CHAR_IN_STR_STAR);
1204
        }
1205
      }
1206
    }
1207
2030
    else if (x.getKind() == STRING_CONCAT)
1208
    {
1209
      // (str.in.re (str.++ x1 ... xn) (re.* R)) -->
1210
      //   (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R))
1211
      //     if the length of all strings in R is one.
1212
374
      Node flr = RegExpEntail::getFixedLengthForRegexp(r[0]);
1213
248
      if (!flr.isNull())
1214
      {
1215
122
        Node one = nm->mkConst(Rational(1));
1216
122
        if (flr == one)
1217
        {
1218
244
          NodeBuilder nb(AND);
1219
442
          for (const Node& xc : x)
1220
          {
1221
320
            nb << nm->mkNode(STRING_IN_REGEXP, xc, r);
1222
          }
1223
          return returnRewrite(
1224
122
              node, nb.constructNode(), Rewrite::RE_IN_DIST_CHAR_STAR);
1225
        }
1226
      }
1227
    }
1228
1924
    if (r[0].getKind() == kind::REGEXP_SIGMA)
1229
    {
1230
212
      Node retNode = NodeManager::currentNM()->mkConst(true);
1231
106
      return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA_STAR);
1232
    }
1233
  }
1234
3917
  else if (r.getKind() == kind::REGEXP_CONCAT)
1235
  {
1236
2860
    bool allSigma = true;
1237
2860
    bool allSigmaStrict = true;
1238
2860
    unsigned allSigmaMinSize = 0;
1239
5683
    Node constStr;
1240
2860
    size_t constIdx = 0;
1241
2860
    size_t nchildren = r.getNumChildren();
1242
6363
    for (size_t i = 0; i < nchildren; i++)
1243
    {
1244
9368
      Node rc = r[i];
1245
5865
      Assert(rc.getKind() != kind::REGEXP_EMPTY);
1246
5865
      if (rc.getKind() == kind::REGEXP_SIGMA)
1247
      {
1248
813
        allSigmaMinSize++;
1249
      }
1250
5052
      else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA)
1251
      {
1252
1232
        allSigmaStrict = false;
1253
      }
1254
3820
      else if (rc.getKind() == STRING_TO_REGEXP)
1255
      {
1256
1956
        if (constStr.isNull())
1257
        {
1258
1458
          constStr = rc[0];
1259
1458
          constIdx = i;
1260
        }
1261
        else
1262
        {
1263
498
          allSigma = false;
1264
498
          break;
1265
        }
1266
      }
1267
      else
1268
      {
1269
1864
        allSigma = false;
1270
1864
        break;
1271
      }
1272
    }
1273
2860
    if (allSigma)
1274
    {
1275
498
      if (constStr.isNull())
1276
      {
1277
        // x in re.++(_*, _, _) ---> str.len(x) >= 2
1278
40
        Node num = nm->mkConst(Rational(allSigmaMinSize));
1279
40
        Node lenx = nm->mkNode(STRING_LENGTH, x);
1280
40
        Node retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num);
1281
20
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_PURE_ALLCHAR);
1282
      }
1283
478
      else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0
1284
17
               && constIdx != nchildren - 1)
1285
      {
1286
        // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc")
1287
34
        Node retNode = nm->mkNode(STRING_STRCTN, x, constStr);
1288
17
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_TO_CONTAINS);
1289
      }
1290
    }
1291
  }
1292
2114
  else if (r.getKind() == kind::REGEXP_INTER
1293
1057
           || r.getKind() == kind::REGEXP_UNION)
1294
  {
1295
460
    std::vector<Node> mvec;
1296
740
    for (unsigned i = 0; i < r.getNumChildren(); i++)
1297
    {
1298
510
      mvec.push_back(
1299
1020
          NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r[i]));
1300
    }
1301
    Node retNode = NodeManager::currentNM()->mkNode(
1302
460
        r.getKind() == kind::REGEXP_INTER ? kind::AND : kind::OR, mvec);
1303
230
    return returnRewrite(node, retNode, Rewrite::RE_IN_ANDOR);
1304
  }
1305
827
  else if (r.getKind() == kind::STRING_TO_REGEXP)
1306
  {
1307
850
    Node retNode = x.eqNode(r[0]);
1308
425
    return returnRewrite(node, retNode, Rewrite::RE_IN_CSTRING);
1309
  }
1310
402
  else if (r.getKind() == REGEXP_RANGE)
1311
  {
1312
    // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j
1313
    // we do not do this if the arguments are not constant
1314
319
    if (RegExpEntail::isConstRegExp(r))
1315
    {
1316
638
      Node xcode = nm->mkNode(STRING_TO_CODE, x);
1317
      Node retNode =
1318
          nm->mkNode(AND,
1319
638
                     nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode),
1320
1276
                     nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1])));
1321
319
      return returnRewrite(node, retNode, Rewrite::RE_IN_RANGE);
1322
    }
1323
  }
1324
83
  else if (r.getKind() == REGEXP_COMPLEMENT)
1325
  {
1326
106
    Node retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate();
1327
53
    return returnRewrite(node, retNode, Rewrite::RE_IN_COMPLEMENT);
1328
  }
1329
1330
  // do simple consumes
1331
9342
  Node retNode = node;
1332
4671
  if (r.getKind() == kind::REGEXP_STAR)
1333
  {
1334
5444
    for (unsigned dir = 0; dir <= 1; dir++)
1335
    {
1336
7262
      std::vector<Node> mchildren;
1337
3636
      utils::getConcat(x, mchildren);
1338
3636
      bool success = true;
1339
10928
      while (success)
1340
      {
1341
3646
        success = false;
1342
7292
        std::vector<Node> children;
1343
3646
        utils::getConcat(r[0], children);
1344
7292
        Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children, dir);
1345
3646
        if (!scn.isNull())
1346
        {
1347
          Trace("regexp-ext-rewrite")
1348
              << "Regexp star : const conflict : " << node << std::endl;
1349
          return returnRewrite(node, scn, Rewrite::RE_CONSUME_S_CCONF);
1350
        }
1351
3646
        else if (children.empty())
1352
        {
1353
          // fully consumed one copy of the STAR
1354
18
          if (mchildren.empty())
1355
          {
1356
            Trace("regexp-ext-rewrite")
1357
                << "Regexp star : full consume : " << node << std::endl;
1358
            Node ret = NodeManager::currentNM()->mkConst(true);
1359
            return returnRewrite(node, ret, Rewrite::RE_CONSUME_S_FULL);
1360
          }
1361
          else
1362
          {
1363
36
            Node prev = retNode;
1364
54
            retNode = nm->mkNode(
1365
36
                STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r);
1366
            // Iterate again if the node changed. It may not have changed if
1367
            // nothing was consumed from mchildren (e.g. if the body of the
1368
            // re.* accepts the empty string.
1369
18
            success = (retNode != prev);
1370
          }
1371
        }
1372
      }
1373
3636
      if (retNode != node)
1374
      {
1375
20
        Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node
1376
10
                                    << " -> " << retNode << std::endl;
1377
10
        return returnRewrite(node, retNode, Rewrite::RE_CONSUME_S);
1378
      }
1379
    }
1380
  }
1381
  else
1382
  {
1383
5375
    std::vector<Node> children;
1384
2853
    utils::getConcat(r, children);
1385
5375
    std::vector<Node> mchildren;
1386
2853
    utils::getConcat(x, mchildren);
1387
2853
    unsigned prevSize = children.size() + mchildren.size();
1388
5375
    Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children);
1389
2853
    if (!scn.isNull())
1390
    {
1391
70
      Trace("regexp-ext-rewrite")
1392
35
          << "Regexp : const conflict : " << node << std::endl;
1393
35
      return returnRewrite(node, scn, Rewrite::RE_CONSUME_CCONF);
1394
    }
1395
2818
    else if ((children.size() + mchildren.size()) != prevSize)
1396
    {
1397
      // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm),
1398
      // above, we strip components to construct an equivalent membership:
1399
      // (str.++ xi .. xj) in (re.++ rk ... rl).
1400
592
      Node xn = utils::mkConcat(mchildren, stype);
1401
592
      Node emptyStr = Word::mkEmptyWord(stype);
1402
296
      if (children.empty())
1403
      {
1404
        // If we stripped all components on the right, then the left is
1405
        // equal to the empty string.
1406
        // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "")
1407
4
        retNode = xn.eqNode(emptyStr);
1408
      }
1409
      else
1410
      {
1411
        // otherwise, construct the updated regular expression
1412
292
        retNode =
1413
584
            nm->mkNode(STRING_IN_REGEXP, xn, utils::mkConcat(children, rtype));
1414
      }
1415
592
      Trace("regexp-ext-rewrite")
1416
296
          << "Regexp : rewrite : " << node << " -> " << retNode << std::endl;
1417
296
      return returnRewrite(node, retNode, Rewrite::RE_SIMPLE_CONSUME);
1418
    }
1419
  }
1420
4330
  return node;
1421
}
1422
1423
395076
RewriteResponse SequencesRewriter::postRewrite(TNode node)
1424
{
1425
790152
  Trace("sequences-postrewrite")
1426
395076
      << "Strings::SequencesRewriter::postRewrite start " << node << std::endl;
1427
790152
  Node retNode = node;
1428
395076
  Kind nk = node.getKind();
1429
395076
  if (nk == kind::STRING_CONCAT)
1430
  {
1431
41809
    retNode = rewriteConcat(node);
1432
  }
1433
353267
  else if (nk == kind::EQUAL)
1434
  {
1435
92308
    retNode = rewriteEquality(node);
1436
  }
1437
260959
  else if (nk == kind::STRING_LENGTH)
1438
  {
1439
158166
    retNode = rewriteLength(node);
1440
  }
1441
102793
  else if (nk == kind::STRING_CHARAT)
1442
  {
1443
107
    retNode = rewriteCharAt(node);
1444
  }
1445
102686
  else if (nk == kind::STRING_SUBSTR)
1446
  {
1447
33894
    retNode = rewriteSubstr(node);
1448
  }
1449
68792
  else if (nk == kind::STRING_UPDATE)
1450
  {
1451
175
    retNode = rewriteUpdate(node);
1452
  }
1453
68617
  else if (nk == kind::STRING_STRCTN)
1454
  {
1455
25361
    retNode = rewriteContains(node);
1456
  }
1457
43256
  else if (nk == kind::STRING_STRIDOF)
1458
  {
1459
4502
    retNode = rewriteIndexof(node);
1460
  }
1461
38754
  else if (nk == kind::STRING_STRREPL)
1462
  {
1463
3152
    retNode = rewriteReplace(node);
1464
  }
1465
35602
  else if (nk == kind::STRING_STRREPLALL)
1466
  {
1467
145
    retNode = rewriteReplaceAll(node);
1468
  }
1469
35457
  else if (nk == kind::STRING_REPLACE_RE)
1470
  {
1471
119
    retNode = rewriteReplaceRe(node);
1472
  }
1473
35338
  else if (nk == kind::STRING_REPLACE_RE_ALL)
1474
  {
1475
60
    retNode = rewriteReplaceReAll(node);
1476
  }
1477
35278
  else if (nk == STRING_REV)
1478
  {
1479
237
    retNode = rewriteStrReverse(node);
1480
  }
1481
35041
  else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX)
1482
  {
1483
143
    retNode = rewritePrefixSuffix(node);
1484
  }
1485
34898
  else if (nk == kind::STRING_IN_REGEXP)
1486
  {
1487
7253
    retNode = rewriteMembership(node);
1488
  }
1489
27645
  else if (nk == REGEXP_CONCAT)
1490
  {
1491
2368
    retNode = rewriteConcatRegExp(node);
1492
  }
1493
25277
  else if (nk == REGEXP_UNION || nk == REGEXP_INTER)
1494
  {
1495
733
    retNode = rewriteAndOrRegExp(node);
1496
  }
1497
24544
  else if (nk == REGEXP_DIFF)
1498
  {
1499
33
    retNode = rewriteDifferenceRegExp(node);
1500
  }
1501
24511
  else if (nk == REGEXP_STAR)
1502
  {
1503
886
    retNode = rewriteStarRegExp(node);
1504
  }
1505
23625
  else if (nk == REGEXP_PLUS)
1506
  {
1507
35
    retNode = rewritePlusRegExp(node);
1508
  }
1509
23590
  else if (nk == REGEXP_OPT)
1510
  {
1511
25
    retNode = rewriteOptionRegExp(node);
1512
  }
1513
23565
  else if (nk == REGEXP_RANGE)
1514
  {
1515
348
    retNode = rewriteRangeRegExp(node);
1516
  }
1517
23217
  else if (nk == REGEXP_LOOP)
1518
  {
1519
24
    retNode = rewriteLoopRegExp(node);
1520
  }
1521
23193
  else if (nk == REGEXP_REPEAT)
1522
  {
1523
4
    retNode = rewriteRepeatRegExp(node);
1524
  }
1525
23189
  else if (nk == SEQ_UNIT)
1526
  {
1527
307
    retNode = rewriteSeqUnit(node);
1528
  }
1529
22882
  else if (nk == SEQ_NTH || nk == SEQ_NTH_TOTAL)
1530
  {
1531
330
    retNode = rewriteSeqNth(node);
1532
  }
1533
1534
790152
  Trace("sequences-postrewrite")
1535
395076
      << "Strings::SequencesRewriter::postRewrite returning " << retNode
1536
395076
      << std::endl;
1537
395076
  if (node != retNode)
1538
  {
1539
152088
    Trace("strings-rewrite-debug") << "Strings::SequencesRewriter::postRewrite "
1540
76044
                                   << node << " to " << retNode << std::endl;
1541
76044
    return RewriteResponse(REWRITE_AGAIN_FULL, retNode);
1542
  }
1543
319032
  Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
1544
319032
  return RewriteResponse(REWRITE_DONE, retNode);
1545
}
1546
1547
245585
RewriteResponse SequencesRewriter::preRewrite(TNode node)
1548
{
1549
245585
  return RewriteResponse(REWRITE_DONE, node);
1550
}
1551
1552
687
TrustNode SequencesRewriter::expandDefinition(Node node)
1553
{
1554
1374
  Trace("strings-exp-def") << "SequencesRewriter::expandDefinition : " << node
1555
687
                           << std::endl;
1556
1557
687
  if (node.getKind() == kind::SEQ_NTH)
1558
  {
1559
2
    NodeManager* nm = NodeManager::currentNM();
1560
4
    Node s = node[0];
1561
4
    Node n = node[1];
1562
    // seq.nth(s, n) --> ite(0 <= n < len(s), seq.nth_total(s,n), Uf(s, n))
1563
    Node cond = nm->mkNode(AND,
1564
4
                           nm->mkNode(LEQ, nm->mkConst(Rational(0)), n),
1565
8
                           nm->mkNode(LT, n, nm->mkNode(STRING_LENGTH, s)));
1566
4
    Node ss = nm->mkNode(SEQ_NTH_TOTAL, s, n);
1567
4
    Node uf = SkolemCache::mkSkolemSeqNth(s.getType(), "Uf");
1568
4
    Node u = nm->mkNode(APPLY_UF, uf, s, n);
1569
4
    Node ret = nm->mkNode(ITE, cond, ss, u);
1570
2
    Trace("strings-exp-def") << "...return " << ret << std::endl;
1571
2
    return TrustNode::mkTrustRewrite(node, ret, nullptr);
1572
  }
1573
685
  return TrustNode::null();
1574
}
1575
1576
330
Node SequencesRewriter::rewriteSeqNth(Node node)
1577
{
1578
330
  Assert(node.getKind() == SEQ_NTH || node.getKind() == SEQ_NTH_TOTAL);
1579
660
  Node s = node[0];
1580
660
  Node i = node[1];
1581
330
  if (s.isConst() && i.isConst())
1582
  {
1583
26
    size_t len = Word::getLength(s);
1584
26
    size_t pos = i.getConst<Rational>().getNumerator().toUnsignedInt();
1585
26
    if (pos < len)
1586
    {
1587
16
      std::vector<Node> elements = s.getConst<Sequence>().getVec();
1588
8
      const Node& ret = elements[pos];
1589
8
      return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL);
1590
    }
1591
18
    else if (node.getKind() == SEQ_NTH_TOTAL)
1592
    {
1593
      // return arbitrary term
1594
4
      Node ret = s.getType().getSequenceElementType().mkGroundValue();
1595
2
      return returnRewrite(node, ret, Rewrite::SEQ_NTH_TOTAL_OOB);
1596
    }
1597
    else
1598
    {
1599
16
      return node;
1600
    }
1601
  }
1602
  else
1603
  {
1604
304
    return node;
1605
  }
1606
}
1607
1608
107
Node SequencesRewriter::rewriteCharAt(Node node)
1609
{
1610
107
  Assert(node.getKind() == STRING_CHARAT);
1611
107
  NodeManager* nm = NodeManager::currentNM();
1612
214
  Node one = nm->mkConst(Rational(1));
1613
214
  Node retNode = nm->mkNode(STRING_SUBSTR, node[0], node[1], one);
1614
214
  return returnRewrite(node, retNode, Rewrite::CHARAT_ELIM);
1615
}
1616
1617
33906
Node SequencesRewriter::rewriteSubstr(Node node)
1618
{
1619
33906
  Assert(node.getKind() == kind::STRING_SUBSTR);
1620
1621
33906
  NodeManager* nm = NodeManager::currentNM();
1622
33906
  if (node[0].isConst())
1623
  {
1624
10100
    if (Word::isEmpty(node[0]))
1625
    {
1626
1604
      Node ret = node[0];
1627
802
      return returnRewrite(node, ret, Rewrite::SS_EMPTYSTR);
1628
    }
1629
    // rewriting for constant arguments
1630
9298
    if (node[1].isConst() && node[2].isConst())
1631
    {
1632
9622
      Node s = node[0];
1633
9622
      cvc5::Rational rMaxInt(String::maxSize());
1634
      uint32_t start;
1635
4811
      if (node[1].getConst<Rational>() > rMaxInt)
1636
      {
1637
        // start beyond the maximum size of strings
1638
        // thus, it must be beyond the end point of this string
1639
8
        Node ret = Word::mkEmptyWord(node.getType());
1640
4
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_MAX_OOB);
1641
      }
1642
4807
      else if (node[1].getConst<Rational>().sgn() < 0)
1643
      {
1644
        // start before the beginning of the string
1645
970
        Node ret = Word::mkEmptyWord(node.getType());
1646
485
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_NEG);
1647
      }
1648
      else
1649
      {
1650
4322
        start = node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1651
4322
        if (start >= Word::getLength(node[0]))
1652
        {
1653
          // start beyond the end of the string
1654
726
          Node ret = Word::mkEmptyWord(node.getType());
1655
363
          return returnRewrite(node, ret, Rewrite::SS_CONST_START_OOB);
1656
        }
1657
      }
1658
3959
      if (node[2].getConst<Rational>() > rMaxInt)
1659
      {
1660
        // take up to the end of the string
1661
1
        size_t lenS = Word::getLength(s);
1662
2
        Node ret = Word::suffix(s, lenS - start);
1663
1
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_MAX_OOB);
1664
      }
1665
3958
      else if (node[2].getConst<Rational>().sgn() <= 0)
1666
      {
1667
596
        Node ret = Word::mkEmptyWord(node.getType());
1668
298
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_NON_POS);
1669
      }
1670
      else
1671
      {
1672
        uint32_t len =
1673
3660
            node[2].getConst<Rational>().getNumerator().toUnsignedInt();
1674
3660
        if (start + len > Word::getLength(node[0]))
1675
        {
1676
          // take up to the end of the string
1677
305
          size_t lenS = Word::getLength(s);
1678
610
          Node ret = Word::suffix(s, lenS - start);
1679
305
          return returnRewrite(node, ret, Rewrite::SS_CONST_END_OOB);
1680
        }
1681
        else
1682
        {
1683
          // compute the substr using the constant string
1684
6710
          Node ret = Word::substr(s, start, len);
1685
3355
          return returnRewrite(node, ret, Rewrite::SS_CONST_SS);
1686
        }
1687
      }
1688
    }
1689
  }
1690
56586
  Node zero = nm->mkConst(cvc5::Rational(0));
1691
1692
  // if entailed non-positive length or negative start point
1693
28293
  if (ArithEntail::check(zero, node[1], true))
1694
  {
1695
20
    Node ret = Word::mkEmptyWord(node.getType());
1696
10
    return returnRewrite(node, ret, Rewrite::SS_START_NEG);
1697
  }
1698
28283
  else if (ArithEntail::check(zero, node[2]))
1699
  {
1700
1142
    Node ret = Word::mkEmptyWord(node.getType());
1701
571
    return returnRewrite(node, ret, Rewrite::SS_LEN_NON_POS);
1702
  }
1703
1704
27712
  if (node[0].getKind() == STRING_SUBSTR)
1705
  {
1706
    // (str.substr (str.substr x a b) c d) ---> "" if c >= b
1707
    //
1708
    // Note that this rewrite can be generalized to:
1709
    //
1710
    // (str.substr x a b) ---> "" if a >= (str.len x)
1711
    //
1712
    // This can be done when we generalize our entailment methods to
1713
    // accept an optional context. Then we could conjecture that
1714
    // (str.substr x a b) rewrites to "" and do a case analysis:
1715
    //
1716
    // - a < 0 or b < 0 (the result is trivially empty in these cases)
1717
    // - a >= (str.len x) assuming that { a >= 0, b >= 0 }
1718
    //
1719
    // For example, for (str.substr (str.substr x a a) a a), we could
1720
    // then deduce that under those assumptions, "a" is an
1721
    // over-approximation of the length of (str.substr x a a), which
1722
    // then allows us to reason that the result of the whole term must
1723
    // be empty.
1724
2663
    if (ArithEntail::check(node[1], node[0][2]))
1725
    {
1726
62
      Node ret = Word::mkEmptyWord(node.getType());
1727
31
      return returnRewrite(node, ret, Rewrite::SS_START_GEQ_LEN);
1728
    }
1729
  }
1730
25049
  else if (node[0].getKind() == STRING_STRREPL)
1731
  {
1732
    // (str.substr (str.replace x y z) 0 n)
1733
    // 	 ---> (str.replace (str.substr x 0 n) y z)
1734
    // if (str.len y) = 1 and (str.len z) = 1
1735
341
    if (node[1] == zero)
1736
    {
1737
483
      if (StringsEntail::checkLengthOne(node[0][1], true)
1738
483
          && StringsEntail::checkLengthOne(node[0][2], true))
1739
      {
1740
        Node ret = nm->mkNode(
1741
            kind::STRING_STRREPL,
1742
8
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]),
1743
            node[0][1],
1744
16
            node[0][2]);
1745
4
        return returnRewrite(node, ret, Rewrite::SUBSTR_REPL_SWAP);
1746
      }
1747
    }
1748
  }
1749
1750
55354
  std::vector<Node> n1;
1751
27677
  utils::getConcat(node[0], n1);
1752
55354
  TypeNode stype = node.getType();
1753
1754
  // definite inclusion
1755
27677
  if (node[1] == zero)
1756
  {
1757
19458
    Node curr = node[2];
1758
19458
    std::vector<Node> childrenr;
1759
9983
    if (StringsEntail::stripSymbolicLength(n1, childrenr, 1, curr))
1760
    {
1761
508
      if (curr != zero && !n1.empty())
1762
      {
1763
297
        childrenr.push_back(nm->mkNode(
1764
198
            kind::STRING_SUBSTR, utils::mkConcat(n1, stype), node[1], curr));
1765
      }
1766
1016
      Node ret = utils::mkConcat(childrenr, stype);
1767
508
      return returnRewrite(node, ret, Rewrite::SS_LEN_INCLUDE);
1768
    }
1769
  }
1770
1771
  // symbolic length analysis
1772
78530
  for (unsigned r = 0; r < 2; r++)
1773
  {
1774
    // the amount of characters we can strip
1775
104275
    Node curr;
1776
52914
    if (r == 0)
1777
    {
1778
27169
      if (node[1] != zero)
1779
      {
1780
        // strip up to start point off the start of the string
1781
17694
        curr = node[1];
1782
      }
1783
    }
1784
25745
    else if (r == 1)
1785
    {
1786
      Node tot_len =
1787
51435
          Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, node[0]));
1788
51435
      Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2]));
1789
25745
      if (node[2] != tot_len)
1790
      {
1791
25021
        if (ArithEntail::check(node[2], tot_len))
1792
        {
1793
          // end point beyond end point of string, map to tot_len
1794
64
          Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len);
1795
32
          return returnRewrite(node, ret, Rewrite::SS_END_PT_NORM);
1796
        }
1797
        else
1798
        {
1799
          // strip up to ( str.len(node[0]) - end_pt ) off the end of the string
1800
24989
          curr = Rewriter::rewrite(nm->mkNode(kind::MINUS, tot_len, end_pt));
1801
        }
1802
      }
1803
1804
      // (str.substr s x y) --> "" if x < len(s) |= 0 >= y
1805
      Node n1_lt_tot_len =
1806
51403
          Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len));
1807
25713
      if (ArithEntail::checkWithAssumption(n1_lt_tot_len, zero, node[2], false))
1808
      {
1809
32
        Node ret = Word::mkEmptyWord(node.getType());
1810
16
        return returnRewrite(node, ret, Rewrite::SS_START_ENTAILS_ZERO_LEN);
1811
      }
1812
1813
      // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s)
1814
      Node non_zero_len =
1815
51387
          Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2]));
1816
25697
      if (ArithEntail::checkWithAssumption(
1817
              non_zero_len, node[1], tot_len, false))
1818
      {
1819
4
        Node ret = Word::mkEmptyWord(node.getType());
1820
2
        return returnRewrite(node, ret, Rewrite::SS_NON_ZERO_LEN_ENTAILS_OOB);
1821
      }
1822
1823
      // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s)
1824
      Node geq_zero_start =
1825
51385
          Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero));
1826
25695
      if (ArithEntail::checkWithAssumption(
1827
              geq_zero_start, zero, tot_len, false))
1828
      {
1829
4
        Node ret = Word::mkEmptyWord(node.getType());
1830
        return returnRewrite(
1831
2
            node, ret, Rewrite::SS_GEQ_ZERO_START_ENTAILS_EMP_S);
1832
      }
1833
1834
      // (str.substr s x x) ---> "" if (str.len s) <= 1
1835
25693
      if (node[1] == node[2] && StringsEntail::checkLengthOne(node[0]))
1836
      {
1837
6
        Node ret = Word::mkEmptyWord(node.getType());
1838
3
        return returnRewrite(node, ret, Rewrite::SS_LEN_ONE_Z_Z);
1839
      }
1840
    }
1841
52859
    if (!curr.isNull())
1842
    {
1843
      // strip off components while quantity is entailed positive
1844
42660
      int dir = r == 0 ? 1 : -1;
1845
83822
      std::vector<Node> childrenr;
1846
42660
      if (StringsEntail::stripSymbolicLength(n1, childrenr, dir, curr))
1847
      {
1848
1498
        if (r == 0)
1849
        {
1850
          Node ret = nm->mkNode(
1851
2848
              kind::STRING_SUBSTR, utils::mkConcat(n1, stype), curr, node[2]);
1852
1424
          return returnRewrite(node, ret, Rewrite::SS_STRIP_START_PT);
1853
        }
1854
        else
1855
        {
1856
          Node ret = nm->mkNode(kind::STRING_SUBSTR,
1857
148
                                utils::mkConcat(n1, stype),
1858
                                node[1],
1859
296
                                node[2]);
1860
74
          return returnRewrite(node, ret, Rewrite::SS_STRIP_END_PT);
1861
        }
1862
      }
1863
    }
1864
  }
1865
  // combine substr
1866
25616
  if (node[0].getKind() == kind::STRING_SUBSTR)
1867
  {
1868
4740
    Node start_inner = node[0][1];
1869
4740
    Node start_outer = node[1];
1870
2550
    if (ArithEntail::check(start_outer) && ArithEntail::check(start_inner))
1871
    {
1872
      // both are positive
1873
      // thus, start point is definitely start_inner+start_outer.
1874
      // We can rewrite if it for certain what the length is
1875
1876
      // the length of a string from the inner substr subtracts the start point
1877
      // of the outer substr
1878
      Node len_from_inner =
1879
2844
          Rewriter::rewrite(nm->mkNode(kind::MINUS, node[0][2], start_outer));
1880
2844
      Node len_from_outer = node[2];
1881
2844
      Node new_len;
1882
      // take quantity that is for sure smaller than the other
1883
1602
      if (len_from_inner == len_from_outer)
1884
      {
1885
9
        new_len = len_from_inner;
1886
      }
1887
1593
      else if (ArithEntail::check(len_from_inner, len_from_outer))
1888
      {
1889
279
        new_len = len_from_outer;
1890
      }
1891
1314
      else if (ArithEntail::check(len_from_outer, len_from_inner))
1892
      {
1893
72
        new_len = len_from_inner;
1894
      }
1895
1602
      if (!new_len.isNull())
1896
      {
1897
720
        Node new_start = nm->mkNode(kind::PLUS, start_inner, start_outer);
1898
        Node ret =
1899
720
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], new_start, new_len);
1900
360
        return returnRewrite(node, ret, Rewrite::SS_COMBINE);
1901
      }
1902
    }
1903
  }
1904
25256
  return node;
1905
}
1906
1907
175
Node SequencesRewriter::rewriteUpdate(Node node)
1908
{
1909
175
  Assert(node.getKind() == kind::STRING_UPDATE);
1910
350
  Node s = node[0];
1911
175
  if (s.isConst())
1912
  {
1913
163
    if (Word::isEmpty(s))
1914
    {
1915
      return returnRewrite(node, s, Rewrite::UPD_EMPTYSTR);
1916
    }
1917
    // rewriting for constant arguments
1918
163
    if (node[1].isConst())
1919
    {
1920
135
      cvc5::Rational rMaxInt(String::maxSize());
1921
119
      if (node[1].getConst<Rational>() > rMaxInt)
1922
      {
1923
        // start beyond the maximum size of strings
1924
        // thus, it must be beyond the end point of this string
1925
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_MAX_OOB);
1926
      }
1927
119
      else if (node[1].getConst<Rational>().sgn() < 0)
1928
      {
1929
        // start before the beginning of the string
1930
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_NEG);
1931
      }
1932
      uint32_t start =
1933
119
          node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1934
119
      size_t len = Word::getLength(s);
1935
119
      if (start >= len)
1936
      {
1937
        // start beyond the end of the string
1938
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_OOB);
1939
      }
1940
119
      if (node[2].isConst())
1941
      {
1942
206
        Node ret = Word::update(s, start, node[2]);
1943
103
        return returnRewrite(node, ret, Rewrite::UPD_EVAL);
1944
      }
1945
    }
1946
  }
1947
1948
72
  return node;
1949
}
1950
1951
199050
Node SequencesRewriter::rewriteContains(Node node)
1952
{
1953
199050
  Assert(node.getKind() == kind::STRING_STRCTN);
1954
199050
  NodeManager* nm = NodeManager::currentNM();
1955
1956
199050
  if (node[0] == node[1])
1957
  {
1958
1086
    Node ret = NodeManager::currentNM()->mkConst(true);
1959
543
    return returnRewrite(node, ret, Rewrite::CTN_EQ);
1960
  }
1961
198507
  if (node[0].isConst())
1962
  {
1963
60573
    if (node[1].isConst())
1964
    {
1965
9612
      Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos);
1966
4806
      return returnRewrite(node, ret, Rewrite::CTN_CONST);
1967
    }
1968
    else
1969
    {
1970
107553
      Node t = node[1];
1971
55767
      if (Word::isEmpty(node[0]))
1972
      {
1973
        Node len1 =
1974
50541
            NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
1975
25354
        if (ArithEntail::check(len1, true))
1976
        {
1977
          // we handle the false case here since the rewrite for equality
1978
          // uses this function, hence we want to conclude false if possible.
1979
          // len(x)>0 => contains( "", x ) ---> false
1980
334
          Node ret = NodeManager::currentNM()->mkConst(false);
1981
167
          return returnRewrite(node, ret, Rewrite::CTN_LHS_EMPTYSTR);
1982
        }
1983
      }
1984
30413
      else if (StringsEntail::checkLengthOne(t))
1985
      {
1986
7390
        std::vector<Node> vec = Word::getChars(node[0]);
1987
7390
        Node emp = Word::mkEmptyWord(t.getType());
1988
7390
        NodeBuilder nb(OR);
1989
3695
        nb << emp.eqNode(t);
1990
8603
        for (const Node& c : vec)
1991
        {
1992
4908
          Assert(c.getType() == t.getType());
1993
4908
          nb << c.eqNode(t);
1994
        }
1995
1996
        // str.contains("ABCabc", t) --->
1997
        // t = "" v t = "A" v t = "B" v t = "C" v t = "a" v t = "b" v t = "c"
1998
        // if len(t) <= 1
1999
7390
        Node ret = nb;
2000
3695
        return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2001
      }
2002
26718
      else if (node[1].getKind() == kind::STRING_CONCAT)
2003
      {
2004
        int firstc, lastc;
2005
2586
        if (!StringsEntail::canConstantContainConcat(
2006
                node[0], node[1], firstc, lastc))
2007
        {
2008
238
          Node ret = NodeManager::currentNM()->mkConst(false);
2009
119
          return returnRewrite(node, ret, Rewrite::CTN_NCONST_CTN_CONCAT);
2010
        }
2011
      }
2012
    }
2013
  }
2014
189720
  if (node[1].isConst())
2015
  {
2016
51367
    size_t len = Word::getLength(node[1]);
2017
51367
    if (len == 0)
2018
    {
2019
      // contains( x, "" ) ---> true
2020
50216
      Node ret = NodeManager::currentNM()->mkConst(true);
2021
25108
      return returnRewrite(node, ret, Rewrite::CTN_RHS_EMPTYSTR);
2022
    }
2023
26259
    else if (len == 1)
2024
    {
2025
      // The following rewrites are specific to a single character second
2026
      // argument of contains, where we can reason that this character is
2027
      // not split over multiple components in the first argument.
2028
14875
      if (node[0].getKind() == STRING_CONCAT)
2029
      {
2030
2542
        std::vector<Node> nc1;
2031
1271
        utils::getConcat(node[0], nc1);
2032
2542
        NodeBuilder nb(OR);
2033
4599
        for (const Node& ncc : nc1)
2034
        {
2035
3328
          nb << nm->mkNode(STRING_STRCTN, ncc, node[1]);
2036
        }
2037
2542
        Node ret = nb.constructNode();
2038
        // str.contains( x ++ y, "A" ) --->
2039
        //   str.contains( x, "A" ) OR str.contains( y, "A" )
2040
1271
        return returnRewrite(node, ret, Rewrite::CTN_CONCAT_CHAR);
2041
      }
2042
13604
      else if (node[0].getKind() == STRING_STRREPL)
2043
      {
2044
949
        Node rplDomain = d_stringsEntail.checkContains(node[0][1], node[1]);
2045
540
        if (!rplDomain.isNull() && !rplDomain.getConst<bool>())
2046
        {
2047
262
          Node d1 = nm->mkNode(STRING_STRCTN, node[0][0], node[1]);
2048
          Node d2 =
2049
              nm->mkNode(AND,
2050
262
                         nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]),
2051
524
                         nm->mkNode(STRING_STRCTN, node[0][2], node[1]));
2052
262
          Node ret = nm->mkNode(OR, d1, d2);
2053
          // If str.contains( y, "A" ) ---> false, then:
2054
          // str.contains( str.replace( x, y, z ), "A" ) --->
2055
          //   str.contains( x, "A" ) OR
2056
          //   ( str.contains( x, y ) AND str.contains( z, "A" ) )
2057
131
          return returnRewrite(node, ret, Rewrite::CTN_REPL_CHAR);
2058
        }
2059
      }
2060
    }
2061
  }
2062
326420
  std::vector<Node> nc1;
2063
163210
  utils::getConcat(node[0], nc1);
2064
326420
  std::vector<Node> nc2;
2065
163210
  utils::getConcat(node[1], nc2);
2066
2067
  // component-wise containment
2068
326420
  std::vector<Node> nc1rb;
2069
326420
  std::vector<Node> nc1re;
2070
163210
  if (d_stringsEntail.componentContains(nc1, nc2, nc1rb, nc1re) != -1)
2071
  {
2072
5018
    Node ret = NodeManager::currentNM()->mkConst(true);
2073
2509
    return returnRewrite(node, ret, Rewrite::CTN_COMPONENT);
2074
  }
2075
321402
  TypeNode stype = node[0].getType();
2076
2077
  // strip endpoints
2078
321402
  std::vector<Node> nb;
2079
321402
  std::vector<Node> ne;
2080
160701
  if (StringsEntail::stripConstantEndpoints(nc1, nc2, nb, ne))
2081
  {
2082
    Node ret = NodeManager::currentNM()->mkNode(
2083
408
        kind::STRING_STRCTN, utils::mkConcat(nc1, stype), node[1]);
2084
204
    return returnRewrite(node, ret, Rewrite::CTN_STRIP_ENDPT);
2085
  }
2086
2087
400549
  for (const Node& n : nc2)
2088
  {
2089
240215
    if (n.getKind() == kind::STRING_STRREPL)
2090
    {
2091
      // (str.contains x (str.replace y z w)) --> false
2092
      // if (str.contains x y) = false and (str.contains x w) = false
2093
      //
2094
      // Reasoning: (str.contains x y) checks that x does not contain y if the
2095
      // replacement does not change y. (str.contains x w) checks that if the
2096
      // replacement changes anything in y, the w makes it impossible for it to
2097
      // occur in x.
2098
7385
      Node ctnConst = d_stringsEntail.checkContains(node[0], n[0]);
2099
3774
      if (!ctnConst.isNull() && !ctnConst.getConst<bool>())
2100
      {
2101
442
        Node ctnConst2 = d_stringsEntail.checkContains(node[0], n[2]);
2102
225
        if (!ctnConst2.isNull() && !ctnConst2.getConst<bool>())
2103
        {
2104
16
          Node res = nm->mkConst(false);
2105
8
          return returnRewrite(node, res, Rewrite::CTN_RPL_NON_CTN);
2106
        }
2107
      }
2108
2109
      // (str.contains x (str.++ w (str.replace x y x) z)) --->
2110
      //   (and (= w "") (= x (str.replace x y x)) (= z ""))
2111
      //
2112
      // TODO: Remove with under-/over-approximation
2113
3766
      if (node[0] == n[0] && node[0] == n[2])
2114
      {
2115
310
        Node ret;
2116
155
        if (nc2.size() > 1)
2117
        {
2118
16
          Node emp = Word::mkEmptyWord(stype);
2119
16
          NodeBuilder nb2(kind::AND);
2120
24
          for (const Node& n2 : nc2)
2121
          {
2122
16
            if (n2 == n)
2123
            {
2124
8
              nb2 << nm->mkNode(kind::EQUAL, node[0], node[1]);
2125
            }
2126
            else
2127
            {
2128
8
              nb2 << nm->mkNode(kind::EQUAL, emp, n2);
2129
            }
2130
          }
2131
8
          ret = nb2.constructNode();
2132
        }
2133
        else
2134
        {
2135
147
          ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2136
        }
2137
155
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SELF);
2138
      }
2139
    }
2140
  }
2141
2142
  // length entailment
2143
320668
  Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2144
320668
  Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2145
160334
  if (ArithEntail::check(len_n2, len_n1, true))
2146
  {
2147
    // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false
2148
212
    Node ret = NodeManager::currentNM()->mkConst(false);
2149
106
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ);
2150
  }
2151
2152
  // multi-set reasoning
2153
  //   For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false
2154
  //   since the number of a's in the second argument is greater than the number
2155
  //   of a's in the first argument
2156
160228
  if (StringsEntail::checkMultisetSubset(node[0], node[1]))
2157
  {
2158
88
    Node ret = nm->mkConst(false);
2159
44
    return returnRewrite(node, ret, Rewrite::CTN_MSET_NSS);
2160
  }
2161
2162
160184
  if (ArithEntail::check(len_n2, len_n1, false))
2163
  {
2164
    // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2
2165
64582
    Node ret = node[0].eqNode(node[1]);
2166
32291
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ_NSTRICT);
2167
  }
2168
2169
  // splitting
2170
127893
  if (node[0].getKind() == kind::STRING_CONCAT)
2171
  {
2172
39361
    if (node[1].isConst())
2173
    {
2174
4044
      Node t = node[1];
2175
      // Below, we are looking for a constant component of node[0]
2176
      // has no overlap with node[1], which means we can split.
2177
      // Notice that if the first or last components had no
2178
      // overlap, these would have been removed by strip
2179
      // constant endpoints above.
2180
      // Hence, we consider only the inner children.
2181
4121
      for (unsigned i = 1; i < (node[0].getNumChildren() - 1); i++)
2182
      {
2183
        // constant contains
2184
2138
        if (node[0][i].isConst())
2185
        {
2186
          // if no overlap, we can split into disjunction
2187
259
          if (Word::noOverlapWith(node[0][i], node[1]))
2188
          {
2189
156
            std::vector<Node> nc0;
2190
78
            utils::getConcat(node[0], nc0);
2191
156
            std::vector<Node> spl[2];
2192
78
            spl[0].insert(spl[0].end(), nc0.begin(), nc0.begin() + i);
2193
78
            Assert(i < nc0.size() - 1);
2194
78
            spl[1].insert(spl[1].end(), nc0.begin() + i + 1, nc0.end());
2195
            Node ret = NodeManager::currentNM()->mkNode(
2196
                kind::OR,
2197
312
                NodeManager::currentNM()->mkNode(kind::STRING_STRCTN,
2198
156
                                                 utils::mkConcat(spl[0], stype),
2199
                                                 node[1]),
2200
312
                NodeManager::currentNM()->mkNode(kind::STRING_STRCTN,
2201
156
                                                 utils::mkConcat(spl[1], stype),
2202
390
                                                 node[1]));
2203
78
            return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2204
          }
2205
        }
2206
      }
2207
    }
2208
  }
2209
88532
  else if (node[0].getKind() == kind::STRING_SUBSTR)
2210
  {
2211
    // (str.contains (str.substr x n (str.len y)) y) --->
2212
    //   (= (str.substr x n (str.len y)) y)
2213
    //
2214
    // TODO: Remove with under-/over-approximation
2215
8794
    if (node[0][2] == nm->mkNode(kind::STRING_LENGTH, node[1]))
2216
    {
2217
56
      Node ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2218
28
      return returnRewrite(node, ret, Rewrite::CTN_SUBSTR);
2219
    }
2220
  }
2221
79738
  else if (node[0].getKind() == kind::STRING_STRREPL)
2222
  {
2223
1863
    if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst())
2224
    {
2225
468
      if (Word::noOverlapWith(node[1], node[0][1])
2226
468
          && Word::noOverlapWith(node[1], node[0][2]))
2227
      {
2228
        // (str.contains (str.replace x c1 c2) c3) ---> (str.contains x c3)
2229
        // if there is no overlap between c1 and c3 and none between c2 and c3
2230
4
        Node ret = nm->mkNode(STRING_STRCTN, node[0][0], node[1]);
2231
2
        return returnRewrite(node, ret, Rewrite::CTN_REPL_CNSTS_TO_CTN);
2232
      }
2233
    }
2234
2235
1861
    if (node[0][0] == node[0][2])
2236
    {
2237
      // (str.contains (str.replace x y x) y) ---> (str.contains x y)
2238
313
      if (node[0][1] == node[1])
2239
      {
2240
20
        Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]);
2241
10
        return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN);
2242
      }
2243
2244
      // (str.contains (str.replace x y x) z) ---> (str.contains x z)
2245
      // if (str.len z) <= 1
2246
303
      if (StringsEntail::checkLengthOne(node[1]))
2247
      {
2248
24
        Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]);
2249
12
        return returnRewrite(node, ret, Rewrite::CTN_REPL_LEN_ONE_TO_CTN);
2250
      }
2251
    }
2252
2253
    // (str.contains (str.replace x y z) z) --->
2254
    //   (or (str.contains x y) (str.contains x z))
2255
1839
    if (node[0][2] == node[1])
2256
    {
2257
      Node ret = nm->mkNode(OR,
2258
140
                            nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]),
2259
280
                            nm->mkNode(STRING_STRCTN, node[0][0], node[0][2]));
2260
70
      return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN_DISJ);
2261
    }
2262
2263
    // (str.contains (str.replace x y z) w) --->
2264
    //   (str.contains (str.replace x y "") w)
2265
    // if (str.contains z w) ---> false and (str.len w) = 1
2266
1769
    if (StringsEntail::checkLengthOne(node[1]))
2267
    {
2268
467
      Node ctn = d_stringsEntail.checkContains(node[0][2], node[1]);
2269
271
      if (!ctn.isNull() && !ctn.getConst<bool>())
2270
      {
2271
150
        Node empty = Word::mkEmptyWord(stype);
2272
        Node ret = nm->mkNode(
2273
            kind::STRING_STRCTN,
2274
150
            nm->mkNode(kind::STRING_STRREPL, node[0][0], node[0][1], empty),
2275
300
            node[1]);
2276
75
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SIMP_REPL);
2277
      }
2278
    }
2279
  }
2280
2281
127618
  if (node[1].getKind() == kind::STRING_STRREPL)
2282
  {
2283
    // (str.contains x (str.replace y x y)) --->
2284
    //   (str.contains x y)
2285
1950
    if (node[0] == node[1][1] && node[1][0] == node[1][2])
2286
    {
2287
12
      Node ret = nm->mkNode(kind::STRING_STRCTN, node[0], node[1][0]);
2288
6
      return returnRewrite(node, ret, Rewrite::CTN_REPL);
2289
    }
2290
2291
    // (str.contains x (str.replace "" x y)) --->
2292
    //   (= "" (str.replace "" x y))
2293
    //
2294
    // Note: Length-based reasoning is not sufficient to get this rewrite. We
2295
    // can neither show that str.len(str.replace("", x, y)) - str.len(x) >= 0
2296
    // nor str.len(x) - str.len(str.replace("", x, y)) >= 0
2297
3886
    Node emp = Word::mkEmptyWord(stype);
2298
1944
    if (node[0] == node[1][1] && node[1][0] == emp)
2299
    {
2300
4
      Node ret = nm->mkNode(kind::EQUAL, emp, node[1]);
2301
2
      return returnRewrite(node, ret, Rewrite::CTN_REPL_EMPTY);
2302
    }
2303
  }
2304
2305
127610
  return node;
2306
}
2307
2308
4502
Node SequencesRewriter::rewriteIndexof(Node node)
2309
{
2310
4502
  Assert(node.getKind() == kind::STRING_STRIDOF);
2311
4502
  NodeManager* nm = NodeManager::currentNM();
2312
2313
4502
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() < 0)
2314
  {
2315
    // z<0  implies  str.indexof( x, y, z ) --> -1
2316
322
    Node negone = nm->mkConst(Rational(-1));
2317
161
    return returnRewrite(node, negone, Rewrite::IDOF_NEG);
2318
  }
2319
2320
  // the string type
2321
8682
  TypeNode stype = node[0].getType();
2322
2323
  // evaluation and simple cases
2324
8682
  std::vector<Node> children0;
2325
4341
  utils::getConcat(node[0], children0);
2326
4341
  if (children0[0].isConst() && node[1].isConst() && node[2].isConst())
2327
  {
2328
1064
    cvc5::Rational rMaxInt(cvc5::String::maxSize());
2329
894
    if (node[2].getConst<Rational>() > rMaxInt)
2330
    {
2331
      // We know that, due to limitations on the size of string constants
2332
      // in our implementation, that accessing a position greater than
2333
      // rMaxInt is guaranteed to be out of bounds.
2334
      Node negone = nm->mkConst(Rational(-1));
2335
      return returnRewrite(node, negone, Rewrite::IDOF_MAX);
2336
    }
2337
894
    Assert(node[2].getConst<Rational>().sgn() >= 0);
2338
1064
    Node s = children0[0];
2339
1064
    Node t = node[1];
2340
    uint32_t start =
2341
894
        node[2].getConst<Rational>().getNumerator().toUnsignedInt();
2342
894
    std::size_t ret = Word::find(s, t, start);
2343
894
    if (ret != std::string::npos)
2344
    {
2345
682
      Node retv = nm->mkConst(Rational(static_cast<unsigned>(ret)));
2346
341
      return returnRewrite(node, retv, Rewrite::IDOF_FIND);
2347
    }
2348
553
    else if (children0.size() == 1)
2349
    {
2350
766
      Node negone = nm->mkConst(Rational(-1));
2351
383
      return returnRewrite(node, negone, Rewrite::IDOF_NFIND);
2352
    }
2353
  }
2354
2355
3617
  if (node[0] == node[1])
2356
  {
2357
88
    if (node[2].isConst())
2358
    {
2359
14
      if (node[2].getConst<Rational>().sgn() == 0)
2360
      {
2361
        // indexof( x, x, 0 ) --> 0
2362
28
        Node zero = nm->mkConst(Rational(0));
2363
14
        return returnRewrite(node, zero, Rewrite::IDOF_EQ_CST_START);
2364
      }
2365
    }
2366
74
    if (ArithEntail::check(node[2], true))
2367
    {
2368
      // y>0  implies  indexof( x, x, y ) --> -1
2369
      Node negone = nm->mkConst(Rational(-1));
2370
      return returnRewrite(node, negone, Rewrite::IDOF_EQ_NSTART);
2371
    }
2372
128
    Node emp = Word::mkEmptyWord(stype);
2373
74
    if (node[0] != emp)
2374
    {
2375
      // indexof( x, x, z ) ---> indexof( "", "", z )
2376
40
      Node ret = nm->mkNode(STRING_STRIDOF, emp, emp, node[2]);
2377
20
      return returnRewrite(node, ret, Rewrite::IDOF_EQ_NORM);
2378
    }
2379
  }
2380
2381
7166
  Node len0 = nm->mkNode(STRING_LENGTH, node[0]);
2382
7166
  Node len1 = nm->mkNode(STRING_LENGTH, node[1]);
2383
7166
  Node len0m2 = nm->mkNode(MINUS, len0, node[2]);
2384
2385
3583
  if (node[1].isConst())
2386
  {
2387
3081
    if (Word::isEmpty(node[1]))
2388
    {
2389
80
      if (ArithEntail::check(len0, node[2]) && ArithEntail::check(node[2]))
2390
      {
2391
        // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z
2392
10
        return returnRewrite(node, node[2], Rewrite::IDOF_EMP_IDOF);
2393
      }
2394
    }
2395
  }
2396
2397
3573
  if (ArithEntail::check(len1, len0m2, true))
2398
  {
2399
    // len(x)-z < len(y)  implies  indexof( x, y, z ) ----> -1
2400
12
    Node negone = nm->mkConst(Rational(-1));
2401
6
    return returnRewrite(node, negone, Rewrite::IDOF_LEN);
2402
  }
2403
2404
7134
  Node fstr = node[0];
2405
3567
  if (!node[2].isConst() || node[2].getConst<Rational>().sgn() != 0)
2406
  {
2407
543
    fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0);
2408
543
    fstr = Rewriter::rewrite(fstr);
2409
  }
2410
2411
7134
  Node cmp_conr = d_stringsEntail.checkContains(fstr, node[1]);
2412
7134
  Trace("strings-rewrite-debug") << "For " << node << ", check contains("
2413
3567
                                 << fstr << ", " << node[1] << ")" << std::endl;
2414
3567
  Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl;
2415
7134
  std::vector<Node> children1;
2416
3567
  utils::getConcat(node[1], children1);
2417
3567
  if (!cmp_conr.isNull())
2418
  {
2419
286
    if (cmp_conr.getConst<bool>())
2420
    {
2421
281
      if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2422
      {
2423
        // past the first position in node[0] that contains node[1], we can drop
2424
349
        std::vector<Node> nb;
2425
349
        std::vector<Node> ne;
2426
205
        int cc = d_stringsEntail.componentContains(
2427
205
            children0, children1, nb, ne, true, 1);
2428
205
        if (cc != -1 && !ne.empty())
2429
        {
2430
          // For example:
2431
          // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0)
2432
98
          Node nn = utils::mkConcat(children0, stype);
2433
98
          Node ret = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]);
2434
49
          return returnRewrite(node, ret, Rewrite::IDOF_DEF_CTN);
2435
        }
2436
2437
        // Strip components from the beginning that are guaranteed not to match
2438
156
        if (StringsEntail::stripConstantEndpoints(
2439
                children0, children1, nb, ne, 1))
2440
        {
2441
          // str.indexof(str.++("AB", x, "C"), "C", 0) --->
2442
          // 2 + str.indexof(str.++(x, "C"), "C", 0)
2443
          Node ret = nm->mkNode(
2444
              kind::PLUS,
2445
24
              nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(nb, stype)),
2446
48
              nm->mkNode(kind::STRING_STRIDOF,
2447
24
                         utils::mkConcat(children0, stype),
2448
                         node[1],
2449
48
                         node[2]));
2450
12
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_CNST_ENDPTS);
2451
        }
2452
      }
2453
      // To show that the first argument definitely contains the second, the
2454
      // index must be a valid index in the first argument. This ensures that
2455
      // (str.indexof t "" n) is not rewritten to something other than -1 when n
2456
      // is beyond the length of t. This is not required for the above rewrites,
2457
      // which only apply when n=0.
2458
220
      if (ArithEntail::check(node[2]) && ArithEntail::check(len0, node[2]))
2459
      {
2460
        // strip symbolic length
2461
294
        Node new_len = node[2];
2462
294
        std::vector<Node> nr;
2463
150
        if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
2464
        {
2465
          // For example:
2466
          // z>=0 and z>str.len( x1 ) and str.contains( x2, y )-->true
2467
          // implies
2468
          // str.indexof( str.++( x1, x2 ), y, z ) --->
2469
          // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) )
2470
12
          Node nn = utils::mkConcat(children0, stype);
2471
          Node ret =
2472
              nm->mkNode(PLUS,
2473
12
                         nm->mkNode(MINUS, node[2], new_len),
2474
24
                         nm->mkNode(STRING_STRIDOF, nn, node[1], new_len));
2475
6
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_SYM_LEN);
2476
        }
2477
      }
2478
    }
2479
    else
2480
    {
2481
      // str.contains( x, y ) --> false  implies  str.indexof(x,y,z) --> -1
2482
10
      Node negone = nm->mkConst(Rational(-1));
2483
5
      return returnRewrite(node, negone, Rewrite::IDOF_NCTN);
2484
    }
2485
  }
2486
  else
2487
  {
2488
6532
    Node new_len = node[2];
2489
6532
    std::vector<Node> nr;
2490
3281
    if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
2491
    {
2492
      // Normalize the string before the start index.
2493
      //
2494
      // For example:
2495
      // str.indexof(str.++("ABCD", x), y, 3) --->
2496
      // str.indexof(str.++("AAAD", x), y, 3)
2497
114
      Node nodeNr = utils::mkConcat(nr, stype);
2498
114
      Node normNr = lengthPreserveRewrite(nodeNr);
2499
72
      if (normNr != nodeNr)
2500
      {
2501
60
        std::vector<Node> normNrChildren;
2502
30
        utils::getConcat(normNr, normNrChildren);
2503
60
        std::vector<Node> children(normNrChildren);
2504
30
        children.insert(children.end(), children0.begin(), children0.end());
2505
60
        Node nn = utils::mkConcat(children, stype);
2506
60
        Node res = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]);
2507
30
        return returnRewrite(node, res, Rewrite::IDOF_NORM_PREFIX);
2508
      }
2509
    }
2510
  }
2511
2512
3465
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2513
  {
2514
5913
    std::vector<Node> cb;
2515
5913
    std::vector<Node> ce;
2516
2963
    if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce, -1))
2517
    {
2518
26
      Node ret = utils::mkConcat(children0, stype);
2519
13
      ret = nm->mkNode(STRING_STRIDOF, ret, node[1], node[2]);
2520
      // For example:
2521
      // str.indexof( str.++( x, "A" ), "B", 0 ) ---> str.indexof( x, "B", 0 )
2522
13
      return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2523
    }
2524
  }
2525
2526
3452
  return node;
2527
}
2528
2529
3152
Node SequencesRewriter::rewriteReplace(Node node)
2530
{
2531
3152
  Assert(node.getKind() == kind::STRING_STRREPL);
2532
3152
  NodeManager* nm = NodeManager::currentNM();
2533
2534
3152
  if (node[1].isConst() && Word::isEmpty(node[1]))
2535
  {
2536
822
    Node ret = nm->mkNode(STRING_CONCAT, node[2], node[0]);
2537
411
    return returnRewrite(node, ret, Rewrite::RPL_RPL_EMPTY);
2538
  }
2539
  // the string type
2540
5482
  TypeNode stype = node.getType();
2541
2542
5482
  std::vector<Node> children0;
2543
2741
  utils::getConcat(node[0], children0);
2544
2545
2741
  if (node[1].isConst() && children0[0].isConst())
2546
  {
2547
837
    Node s = children0[0];
2548
837
    Node t = node[1];
2549
828
    std::size_t p = Word::find(s, t);
2550
828
    if (p == std::string::npos)
2551
    {
2552
558
      if (children0.size() == 1)
2553
      {
2554
549
        return returnRewrite(node, node[0], Rewrite::RPL_CONST_NFIND);
2555
      }
2556
    }
2557
    else
2558
    {
2559
540
      Node s1 = Word::substr(s, 0, p);
2560
540
      Node s3 = Word::substr(s, p + Word::getLength(t));
2561
540
      std::vector<Node> children;
2562
270
      if (!Word::isEmpty(s1))
2563
      {
2564
51
        children.push_back(s1);
2565
      }
2566
270
      children.push_back(node[2]);
2567
270
      if (!Word::isEmpty(s3))
2568
      {
2569
86
        children.push_back(s3);
2570
      }
2571
270
      children.insert(children.end(), children0.begin() + 1, children0.end());
2572
540
      Node ret = utils::mkConcat(children, stype);
2573
270
      return returnRewrite(node, ret, Rewrite::RPL_CONST_FIND);
2574
    }
2575
  }
2576
2577
  // rewrites that apply to both replace and replaceall
2578
3844
  Node rri = rewriteReplaceInternal(node);
2579
1922
  if (!rri.isNull())
2580
  {
2581
    // printing of the rewrite managed by the call above
2582
134
    return rri;
2583
  }
2584
2585
1788
  if (node[0] == node[2])
2586
  {
2587
    // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x
2588
511
    Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2589
511
    Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2590
274
    if (ArithEntail::check(l1, l0))
2591
    {
2592
35
      return returnRewrite(node, node[0], Rewrite::RPL_RPL_LEN_ID);
2593
    }
2594
2595
    // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x)
2596
    // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "")
2597
239
    if (StringsEntail::checkLengthOne(node[0]))
2598
    {
2599
110
      Node empty = Word::mkEmptyWord(stype);
2600
      Node rn1 = Rewriter::rewrite(
2601
110
          rewriteEqualityExt(nm->mkNode(EQUAL, node[1], empty)));
2602
56
      if (rn1 != node[1])
2603
      {
2604
110
        std::vector<Node> emptyNodes;
2605
        bool allEmptyEqs;
2606
56
        std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(rn1);
2607
2608
56
        if (allEmptyEqs)
2609
        {
2610
62
          Node nn1 = utils::mkConcat(emptyNodes, stype);
2611
32
          if (node[1] != nn1)
2612
          {
2613
4
            Node ret = nm->mkNode(STRING_STRREPL, node[0], nn1, node[2]);
2614
2
            return returnRewrite(node, ret, Rewrite::RPL_X_Y_X_SIMP);
2615
          }
2616
        }
2617
      }
2618
    }
2619
  }
2620
2621
3502
  std::vector<Node> children1;
2622
1751
  utils::getConcat(node[1], children1);
2623
2624
  // check if contains definitely does (or does not) hold
2625
3502
  Node cmp_con = nm->mkNode(kind::STRING_STRCTN, node[0], node[1]);
2626
3502
  Node cmp_conr = Rewriter::rewrite(cmp_con);
2627
1751
  if (cmp_conr.isConst())
2628
  {
2629
188
    if (cmp_conr.getConst<bool>())
2630
    {
2631
      // component-wise containment
2632
314
      std::vector<Node> cb;
2633
314
      std::vector<Node> ce;
2634
178
      int cc = d_stringsEntail.componentContains(
2635
178
          children0, children1, cb, ce, true, 1);
2636
178
      if (cc != -1)
2637
      {
2638
102
        if (cc == 0 && children0[0] == children1[0])
2639
        {
2640
          // definitely a prefix, can do the replace
2641
          // for example,
2642
          //   str.replace( str.++( x, "ab" ), str.++( x, "a" ), y )  --->
2643
          //   str.++( y, "b" )
2644
54
          std::vector<Node> cres;
2645
27
          cres.push_back(node[2]);
2646
27
          cres.insert(cres.end(), ce.begin(), ce.end());
2647
54
          Node ret = utils::mkConcat(cres, stype);
2648
27
          return returnRewrite(node, ret, Rewrite::RPL_CCTN_RPL);
2649
        }
2650
75
        else if (!ce.empty())
2651
        {
2652
          // we can pull remainder past first definite containment
2653
          // for example,
2654
          //   str.replace( str.++( x, "ab" ), "a", y ) --->
2655
          //   str.++( str.replace( str.++( x, "a" ), "a", y ), "b" )
2656
          // this is independent of whether the second argument may be empty
2657
30
          std::vector<Node> scc;
2658
45
          scc.push_back(NodeManager::currentNM()->mkNode(
2659
              kind::STRING_STRREPL,
2660
30
              utils::mkConcat(children0, stype),
2661
              node[1],
2662
              node[2]));
2663
15
          scc.insert(scc.end(), ce.begin(), ce.end());
2664
30
          Node ret = utils::mkConcat(scc, stype);
2665
15
          return returnRewrite(node, ret, Rewrite::RPL_CCTN);
2666
        }
2667
      }
2668
    }
2669
    else
2670
    {
2671
      // ~contains( t, s ) => ( replace( t, s, r ) ----> t )
2672
10
      return returnRewrite(node, node[0], Rewrite::RPL_NCTN);
2673
    }
2674
  }
2675
1563
  else if (cmp_conr.getKind() == kind::EQUAL || cmp_conr.getKind() == kind::AND)
2676
  {
2677
    // Rewriting the str.contains may return equalities of the form (= x "").
2678
    // In that case, we can substitute the variables appearing in those
2679
    // equalities with the empty string in the third argument of the
2680
    // str.replace. For example:
2681
    //
2682
    // (str.replace x (str.++ x y) y) --> (str.replace x (str.++ x y) "")
2683
    //
2684
    // This can be done because str.replace changes x iff (str.++ x y) is in x
2685
    // but that means that y must be empty in that case. Thus, we can
2686
    // substitute y with "" in the third argument. Note that the third argument
2687
    // does not matter when the str.replace does not apply.
2688
    //
2689
542
    Node empty = Word::mkEmptyWord(stype);
2690
2691
542
    std::vector<Node> emptyNodes;
2692
    bool allEmptyEqs;
2693
280
    std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(cmp_conr);
2694
2695
280
    if (emptyNodes.size() > 0)
2696
    {
2697
      // Perform the substitutions
2698
306
      std::vector<TNode> substs(emptyNodes.size(), TNode(empty));
2699
      Node nn2 = node[2].substitute(
2700
306
          emptyNodes.begin(), emptyNodes.end(), substs.begin(), substs.end());
2701
2702
      // If the contains rewrites to a conjunction of empty-string equalities
2703
      // and we are doing the replacement in an empty string, we can rewrite
2704
      // the string-to-replace with a concatenation of all the terms that must
2705
      // be empty:
2706
      //
2707
      // (str.replace "" y z) ---> (str.replace "" (str.++ y1 ... yn)  z)
2708
      // if (str.contains "" y) ---> (and (= y1 "") ... (= yn ""))
2709
162
      if (node[0] == empty && allEmptyEqs)
2710
      {
2711
192
        std::vector<Node> emptyNodesList(emptyNodes.begin(), emptyNodes.end());
2712
192
        Node nn1 = utils::mkConcat(emptyNodesList, stype);
2713
100
        if (nn1 != node[1] || nn2 != node[2])
2714
        {
2715
16
          Node res = nm->mkNode(kind::STRING_STRREPL, node[0], nn1, nn2);
2716
8
          return returnRewrite(node, res, Rewrite::RPL_EMP_CNTS_SUBSTS);
2717
        }
2718
      }
2719
2720
154
      if (nn2 != node[2])
2721
      {
2722
20
        Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1], nn2);
2723
10
        return returnRewrite(node, res, Rewrite::RPL_CNTS_SUBSTS);
2724
      }
2725
    }
2726
  }
2727
2728
1681
  if (cmp_conr != cmp_con)
2729
  {
2730
519
    if (StringsEntail::checkNonEmpty(node[1]))
2731
    {
2732
      // pull endpoints that can be stripped
2733
      // for example,
2734
      //   str.replace( str.++( "b", x, "b" ), "a", y ) --->
2735
      //   str.++( "b", str.replace( x, "a", y ), "b" )
2736
549
      std::vector<Node> cb;
2737
549
      std::vector<Node> ce;
2738
283
      if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce))
2739
      {
2740
34
        std::vector<Node> cc;
2741
17
        cc.insert(cc.end(), cb.begin(), cb.end());
2742
17
        cc.push_back(
2743
68
            NodeManager::currentNM()->mkNode(kind::STRING_STRREPL,
2744
34
                                             utils::mkConcat(children0, stype),
2745
                                             node[1],
2746
                                             node[2]));
2747
17
        cc.insert(cc.end(), ce.begin(), ce.end());
2748
34
        Node ret = utils::mkConcat(cc, stype);
2749
17
        return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2750
      }
2751
    }
2752
  }
2753
2754
1664
  children1.clear();
2755
1664
  utils::getConcat(node[1], children1);
2756
3328
  Node lastChild1 = children1[children1.size() - 1];
2757
1664
  if (lastChild1.getKind() == kind::STRING_SUBSTR)
2758
  {
2759
    // (str.replace x (str.++ t (str.substr y i j)) z) --->
2760
    // (str.replace x (str.++ t
2761
    //                  (str.substr y i (+ (str.len x) 1 (- (str.len t))))) z)
2762
    // if j > len(x)
2763
    //
2764
    // Reasoning: If the string to be replaced is longer than x, then it does
2765
    // not matter how much longer it is, the result is always x. Thus, it is
2766
    // fine to only look at the prefix of length len(x) + 1 - len(t).
2767
2768
116
    children1.pop_back();
2769
    // Length of the non-substr components in the second argument
2770
    Node partLen1 =
2771
224
        nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(children1, stype));
2772
224
    Node maxLen1 = nm->mkNode(kind::PLUS, partLen1, lastChild1[2]);
2773
2774
224
    Node zero = nm->mkConst(Rational(0));
2775
224
    Node one = nm->mkConst(Rational(1));
2776
224
    Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]);
2777
224
    Node len0_1 = nm->mkNode(kind::PLUS, len0, one);
2778
    // Check len(t) + j > len(x) + 1
2779
116
    if (ArithEntail::check(maxLen1, len0_1, true))
2780
    {
2781
8
      children1.push_back(nm->mkNode(
2782
          kind::STRING_SUBSTR,
2783
          lastChild1[0],
2784
          lastChild1[1],
2785
16
          nm->mkNode(
2786
16
              kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1))));
2787
      Node res = nm->mkNode(kind::STRING_STRREPL,
2788
                            node[0],
2789
16
                            utils::mkConcat(children1, stype),
2790
32
                            node[2]);
2791
8
      return returnRewrite(node, res, Rewrite::REPL_SUBST_IDX);
2792
    }
2793
  }
2794
2795
1656
  if (node[0].getKind() == STRING_STRREPL)
2796
  {
2797
352
    Node x = node[0];
2798
352
    Node y = node[1];
2799
352
    Node z = node[2];
2800
182
    if (x[0] == x[2] && x[0] == y)
2801
    {
2802
      // (str.replace (str.replace y w y) y z) -->
2803
      //   (str.replace (str.replace y w z) y z)
2804
      // if (str.len w) >= (str.len z) and w != z
2805
      //
2806
      // Reasoning: There are two cases: (1) w does not appear in y and (2) w
2807
      // does appear in y.
2808
      //
2809
      // Case (1): In this case, the reasoning is trivial. The
2810
      // inner replace does not do anything, so we can just replace its third
2811
      // argument with any string.
2812
      //
2813
      // Case (2): After the inner replace, we are guaranteed to have a string
2814
      // that contains y at the index of w in the original string y. The outer
2815
      // replace then replaces that y with z, so we can short-circuit that
2816
      // replace by directly replacing w with z in the inner replace. We can
2817
      // only do that if the result of the new inner replace does not contain
2818
      // y, otherwise we end up doing two replaces that are different from the
2819
      // original expression. We enforce that by requiring that the length of w
2820
      // has to be greater or equal to the length of z and that w and z have to
2821
      // be different. This makes sure that an inner replace changes a string
2822
      // to a string that is shorter than y, making it impossible for the outer
2823
      // replace to match.
2824
108
      Node w = x[1];
2825
2826
      // (str.len w) >= (str.len z)
2827
108
      Node wlen = nm->mkNode(kind::STRING_LENGTH, w);
2828
108
      Node zlen = nm->mkNode(kind::STRING_LENGTH, z);
2829
60
      if (ArithEntail::check(wlen, zlen))
2830
      {
2831
        // w != z
2832
44
        Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z));
2833
28
        if (wEqZ.isConst() && !wEqZ.getConst<bool>())
2834
        {
2835
          Node ret = nm->mkNode(kind::STRING_STRREPL,
2836
24
                                nm->mkNode(kind::STRING_STRREPL, y, w, z),
2837
                                y,
2838
48
                                z);
2839
12
          return returnRewrite(node, ret, Rewrite::REPL_REPL_SHORT_CIRCUIT);
2840
        }
2841
      }
2842
    }
2843
  }
2844
2845
1644
  if (node[1].getKind() == STRING_STRREPL)
2846
  {
2847
122
    if (node[1][0] == node[0])
2848
    {
2849
56
      if (node[1][0] == node[1][2] && node[1][0] == node[2])
2850
      {
2851
        // str.replace( x, str.replace( x, y, x ), x ) ---> x
2852
        return returnRewrite(node, node[0], Rewrite::REPL_REPL2_INV_ID);
2853
      }
2854
56
      bool dualReplIteSuccess = false;
2855
112
      Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2856
56
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2857
      {
2858
        // str.contains( x, z ) ---> false
2859
        //   implies
2860
        // str.replace( x, str.replace( x, y, z ), w ) --->
2861
        // ite( str.contains( x, y ), x, w )
2862
        dualReplIteSuccess = true;
2863
      }
2864
      else
2865
      {
2866
        // str.contains( y, z ) ---> false and str.contains( z, y ) ---> false
2867
        //   implies
2868
        // str.replace( x, str.replace( x, y, z ), w ) --->
2869
        // ite( str.contains( x, y ), x, w )
2870
56
        cmp_con2 = d_stringsEntail.checkContains(node[1][1], node[1][2]);
2871
56
        if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2872
        {
2873
10
          cmp_con2 = d_stringsEntail.checkContains(node[1][2], node[1][1]);
2874
10
          if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2875
          {
2876
            dualReplIteSuccess = true;
2877
          }
2878
        }
2879
      }
2880
56
      if (dualReplIteSuccess)
2881
      {
2882
        Node res = nm->mkNode(ITE,
2883
                              nm->mkNode(STRING_STRCTN, node[0], node[1][1]),
2884
                              node[0],
2885
                              node[2]);
2886
        return returnRewrite(node, res, Rewrite::REPL_DUAL_REPL_ITE);
2887
      }
2888
    }
2889
2890
122
    bool invSuccess = false;
2891
122
    if (node[1][1] == node[0])
2892
    {
2893
46
      if (node[1][0] == node[1][2])
2894
      {
2895
        // str.replace(x, str.replace(y, x, y), w) ---> str.replace(x, y, w)
2896
        invSuccess = true;
2897
      }
2898
46
      else if (node[1][1] == node[2] || node[1][0] == node[2])
2899
      {
2900
        // str.contains(y, z) ----> false and ( y == w or x == w ) implies
2901
        //   implies
2902
        // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w)
2903
40
        Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2904
20
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2905
      }
2906
    }
2907
    else
2908
    {
2909
      // str.contains(x, z) ----> false and str.contains(x, w) ----> false
2910
      //   implies
2911
      // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u)
2912
152
      Node cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][1]);
2913
76
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2914
      {
2915
8
        cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][2]);
2916
8
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2917
      }
2918
    }
2919
122
    if (invSuccess)
2920
    {
2921
      Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1][0], node[2]);
2922
      return returnRewrite(node, res, Rewrite::REPL_REPL2_INV);
2923
    }
2924
  }
2925
1644
  if (node[2].getKind() == STRING_STRREPL)
2926
  {
2927
48
    if (node[2][1] == node[0])
2928
    {
2929
      // str.contains( z, w ) ----> false implies
2930
      // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z )
2931
24
      Node cmp_con2 = d_stringsEntail.checkContains(node[2][0], node[1]);
2932
12
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2933
      {
2934
        Node res =
2935
            nm->mkNode(kind::STRING_STRREPL, node[0], node[1], node[2][0]);
2936
        return returnRewrite(node, res, Rewrite::REPL_REPL3_INV);
2937
      }
2938
    }
2939
48
    if (node[2][0] == node[1])
2940
    {
2941
4
      bool success = false;
2942
4
      if (node[2][0] == node[2][2] && node[2][1] == node[0])
2943
      {
2944
        // str.replace( x, y, str.replace( y, x, y ) ) ---> x
2945
        success = true;
2946
      }
2947
      else
2948
      {
2949
        // str.contains( x, z ) ----> false implies
2950
        // str.replace( x, y, str.replace( y, z, w ) ) ---> x
2951
4
        cmp_con = d_stringsEntail.checkContains(node[0], node[2][1]);
2952
4
        success = !cmp_con.isNull() && !cmp_con.getConst<bool>();
2953
      }
2954
4
      if (success)
2955
      {
2956
        return returnRewrite(node, node[0], Rewrite::REPL_REPL3_INV_ID);
2957
      }
2958
    }
2959
  }
2960
  // miniscope based on components that do not contribute to contains
2961
  // for example,
2962
  //   str.replace( x ++ y ++ x ++ y, "A", z ) -->
2963
  //   str.replace( x ++ y, "A", z ) ++ x ++ y
2964
  // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y.
2965
1644
  if (StringsEntail::checkLengthOne(node[1]))
2966
  {
2967
1280
    Node lastLhs;
2968
644
    unsigned lastCheckIndex = 0;
2969
656
    for (unsigned i = 1, iend = children0.size(); i < iend; i++)
2970
    {
2971
102
      unsigned checkIndex = children0.size() - i;
2972
114
      std::vector<Node> checkLhs;
2973
102
      checkLhs.insert(
2974
204
          checkLhs.end(), children0.begin(), children0.begin() + checkIndex);
2975
114
      Node lhs = utils::mkConcat(checkLhs, stype);
2976
114
      Node rhs = children0[checkIndex];
2977
114
      Node ctn = d_stringsEntail.checkContains(lhs, rhs);
2978
102
      if (!ctn.isNull() && ctn.getConst<bool>())
2979
      {
2980
12
        lastLhs = lhs;
2981
12
        lastCheckIndex = checkIndex;
2982
      }
2983
      else
2984
      {
2985
90
        break;
2986
      }
2987
    }
2988
644
    if (!lastLhs.isNull())
2989
    {
2990
16
      std::vector<Node> remc(children0.begin() + lastCheckIndex,
2991
32
                             children0.end());
2992
16
      Node rem = utils::mkConcat(remc, stype);
2993
      Node ret =
2994
          nm->mkNode(STRING_CONCAT,
2995
16
                     nm->mkNode(STRING_STRREPL, lastLhs, node[1], node[2]),
2996
32
                     rem);
2997
      // for example:
2998
      //   str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x
2999
      // Since we know that the first occurrence of "A" cannot be in the
3000
      // second occurrence of x. Notice this is specific to single characters
3001
      // due to complications with finds that span multiple components for
3002
      // non-characters.
3003
8
      return returnRewrite(node, ret, Rewrite::REPL_CHAR_NCONTRIB_FIND);
3004
    }
3005
  }
3006
3007
  // TODO (#1180) incorporate these?
3008
  // contains( t, s ) =>
3009
  //   replace( replace( x, t, s ), s, r ) ----> replace( x, t, r )
3010
  // contains( t, s ) =>
3011
  //   contains( replace( t, s, r ), r ) ----> true
3012
3013
1636
  return node;
3014
}
3015
3016
145
Node SequencesRewriter::rewriteReplaceAll(Node node)
3017
{
3018
145
  Assert(node.getKind() == STRING_STRREPLALL);
3019
3020
290
  TypeNode stype = node.getType();
3021
3022
145
  if (node[0].isConst() && node[1].isConst())
3023
  {
3024
54
    std::vector<Node> children;
3025
54
    Node s = node[0];
3026
54
    Node t = node[1];
3027
27
    if (Word::isEmpty(s) || Word::isEmpty(t))
3028
    {
3029
6
      return returnRewrite(node, node[0], Rewrite::REPLALL_EMPTY_FIND);
3030
    }
3031
21
    std::size_t sizeS = Word::getLength(s);
3032
21
    std::size_t sizeT = Word::getLength(t);
3033
21
    std::size_t index = 0;
3034
21
    std::size_t curr = 0;
3035
56
    do
3036
    {
3037
77
      curr = Word::find(s, t, index);
3038
77
      if (curr != std::string::npos)
3039
      {
3040
56
        if (curr > index)
3041
        {
3042
34
          children.push_back(Word::substr(s, index, curr - index));
3043
        }
3044
56
        children.push_back(node[2]);
3045
56
        index = curr + sizeT;
3046
      }
3047
      else
3048
      {
3049
21
        children.push_back(Word::substr(s, index, sizeS - index));
3050
      }
3051
77
    } while (curr != std::string::npos && curr < sizeS);
3052
    // constant evaluation
3053
42
    Node res = utils::mkConcat(children, stype);
3054
21
    return returnRewrite(node, res, Rewrite::REPLALL_CONST);
3055
  }
3056
3057
  // rewrites that apply to both replace and replaceall
3058
236
  Node rri = rewriteReplaceInternal(node);
3059
118
  if (!rri.isNull())
3060
  {
3061
    // printing of the rewrite managed by the call above
3062
4
    return rri;
3063
  }
3064
3065
114
  return node;
3066
}
3067
3068
2040
Node SequencesRewriter::rewriteReplaceInternal(Node node)
3069
{
3070
2040
  Kind nk = node.getKind();
3071
2040
  Assert(nk == STRING_STRREPL || nk == STRING_STRREPLALL);
3072
3073
2040
  if (node[1] == node[2])
3074
  {
3075
63
    return returnRewrite(node, node[0], Rewrite::RPL_ID);
3076
  }
3077
3078
1977
  if (node[0] == node[1])
3079
  {
3080
    // only holds for replaceall if non-empty
3081
95
    if (nk == STRING_STRREPL || StringsEntail::checkNonEmpty(node[1]))
3082
    {
3083
75
      return returnRewrite(node, node[2], Rewrite::RPL_REPLACE);
3084
    }
3085
  }
3086
3087
1902
  return Node::null();
3088
}
3089
3090
119
Node SequencesRewriter::rewriteReplaceRe(Node node)
3091
{
3092
119
  Assert(node.getKind() == STRING_REPLACE_RE);
3093
119
  NodeManager* nm = NodeManager::currentNM();
3094
238
  Node x = node[0];
3095
238
  Node y = node[1];
3096
238
  Node z = node[2];
3097
3098
119
  if (RegExpEntail::isConstRegExp(y))
3099
  {
3100
117
    if (x.isConst())
3101
    {
3102
      // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z"
3103
37
      std::pair<size_t, size_t> match = firstMatch(x, y);
3104
37
      if (match.first != string::npos)
3105
      {
3106
38
        String s = x.getConst<String>();
3107
        Node ret = nm->mkNode(STRING_CONCAT,
3108
38
                              nm->mkConst(s.substr(0, match.first)),
3109
                              z,
3110
76
                              nm->mkConst(s.substr(match.second)));
3111
19
        return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL);
3112
      }
3113
      else
3114
      {
3115
18
        return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL);
3116
      }
3117
    }
3118
    // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true
3119
156
    String emptyStr("");
3120
80
    if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y))
3121
    {
3122
8
      Node ret = nm->mkNode(STRING_CONCAT, z, x);
3123
4
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE);
3124
    }
3125
  }
3126
78
  return node;
3127
}
3128
3129
60
Node SequencesRewriter::rewriteReplaceReAll(Node node)
3130
{
3131
60
  Assert(node.getKind() == STRING_REPLACE_RE_ALL);
3132
60
  NodeManager* nm = NodeManager::currentNM();
3133
120
  Node x = node[0];
3134
120
  Node y = node[1];
3135
120
  Node z = node[2];
3136
3137
60
  if (RegExpEntail::isConstRegExp(y))
3138
  {
3139
60
    if (x.isConst())
3140
    {
3141
      // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) --->
3142
      //   "Z" ++ y ++ "Z" ++ y
3143
44
      TypeNode t = x.getType();
3144
44
      Node emp = Word::mkEmptyWord(t);
3145
      Node yp = Rewriter::rewrite(
3146
44
          nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)));
3147
44
      std::vector<Node> res;
3148
44
      String rem = x.getConst<String>();
3149
22
      std::pair<size_t, size_t> match(0, 0);
3150
68
      while (rem.size() >= 0)
3151
      {
3152
45
        match = firstMatch(nm->mkConst(rem), yp);
3153
45
        if (match.first == string::npos)
3154
        {
3155
22
          break;
3156
        }
3157
23
        res.push_back(nm->mkConst(rem.substr(0, match.first)));
3158
23
        res.push_back(z);
3159
23
        rem = rem.substr(match.second);
3160
      }
3161
22
      res.push_back(nm->mkConst(rem));
3162
44
      Node ret = utils::mkConcat(res, t);
3163
22
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL);
3164
    }
3165
  }
3166
3167
38
  return node;
3168
}
3169
3170
82
std::pair<size_t, size_t> SequencesRewriter::firstMatch(Node n, Node r)
3171
{
3172
82
  Assert(n.isConst() && n.getType().isStringLike());
3173
82
  Assert(r.getType().isRegExp());
3174
82
  NodeManager* nm = NodeManager::currentNM();
3175
3176
164
  Node sigmaStar = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
3177
164
  Node re = nm->mkNode(REGEXP_CONCAT, r, sigmaStar);
3178
164
  String s = n.getConst<String>();
3179
3180
82
  if (s.size() == 0)
3181
  {
3182
29
    if (RegExpEntail::testConstStringInRegExp(s, 0, r))
3183
    {
3184
2
      return std::make_pair(0, 0);
3185
    }
3186
    else
3187
    {
3188
27
      return std::make_pair(string::npos, string::npos);
3189
    }
3190
  }
3191
3192
115
  for (size_t i = 0, size = s.size(); i < size; i++)
3193
  {
3194
102
    if (RegExpEntail::testConstStringInRegExp(s, i, re))
3195
    {
3196
144
      for (size_t j = i; j <= size; j++)
3197
      {
3198
248
        String substr = s.substr(i, j - i);
3199
144
        if (RegExpEntail::testConstStringInRegExp(substr, 0, r))
3200
        {
3201
40
          return std::make_pair(i, j);
3202
        }
3203
      }
3204
    }
3205
  }
3206
3207
13
  return std::make_pair(string::npos, string::npos);
3208
}
3209
3210
237
Node SequencesRewriter::rewriteStrReverse(Node node)
3211
{
3212
237
  Assert(node.getKind() == STRING_REV);
3213
237
  NodeManager* nm = NodeManager::currentNM();
3214
474
  Node x = node[0];
3215
237
  if (x.isConst())
3216
  {
3217
    // reverse the characters in the constant
3218
130
    Node retNode = Word::reverse(x);
3219
65
    return returnRewrite(node, retNode, Rewrite::STR_CONV_CONST);
3220
  }
3221
172
  else if (x.getKind() == STRING_CONCAT)
3222
  {
3223
74
    std::vector<Node> children;
3224
113
    for (const Node& nc : x)
3225
    {
3226
76
      children.push_back(nm->mkNode(STRING_REV, nc));
3227
    }
3228
37
    std::reverse(children.begin(), children.end());
3229
    // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 )
3230
74
    Node retNode = nm->mkNode(STRING_CONCAT, children);
3231
37
    return returnRewrite(node, retNode, Rewrite::STR_REV_MINSCOPE_CONCAT);
3232
  }
3233
135
  else if (x.getKind() == STRING_REV)
3234
  {
3235
    // rev( rev( x ) ) --> x
3236
10
    Node retNode = x[0];
3237
5
    return returnRewrite(node, retNode, Rewrite::STR_REV_IDEM);
3238
  }
3239
130
  return node;
3240
}
3241
3242
143
Node SequencesRewriter::rewritePrefixSuffix(Node n)
3243
{
3244
143
  Assert(n.getKind() == kind::STRING_PREFIX
3245
         || n.getKind() == kind::STRING_SUFFIX);
3246
143
  bool isPrefix = n.getKind() == kind::STRING_PREFIX;
3247
143
  if (n[0] == n[1])
3248
  {
3249
16
    Node ret = NodeManager::currentNM()->mkConst(true);
3250
8
    return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EQ);
3251
  }
3252
135
  if (n[0].isConst())
3253
  {
3254
56
    if (Word::isEmpty(n[0]))
3255
    {
3256
8
      Node ret = NodeManager::currentNM()->mkConst(true);
3257
4
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY_CONST);
3258
    }
3259
  }
3260
131
  if (n[1].isConst())
3261
  {
3262
39
    Node s = n[1];
3263
39
    size_t lenS = Word::getLength(s);
3264
39
    if (n[0].isConst())
3265
    {
3266
      Node ret = NodeManager::currentNM()->mkConst(false);
3267
      Node t = n[0];
3268
      size_t lenT = Word::getLength(t);
3269
      if (lenS >= lenT)
3270
      {
3271
        if ((isPrefix && t == Word::prefix(s, lenT))
3272
            || (!isPrefix && t == Word::suffix(s, lenT)))
3273
        {
3274
          ret = NodeManager::currentNM()->mkConst(true);
3275
        }
3276
      }
3277
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CONST);
3278
    }
3279
39
    else if (lenS == 0)
3280
    {
3281
4
      Node ret = n[0].eqNode(n[1]);
3282
2
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY);
3283
    }
3284
37
    else if (lenS == 1)
3285
    {
3286
      // (str.prefix x "A") and (str.suffix x "A") are equivalent to
3287
      // (str.contains "A" x )
3288
      Node ret =
3289
74
          NodeManager::currentNM()->mkNode(kind::STRING_STRCTN, n[1], n[0]);
3290
37
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CTN);
3291
    }
3292
  }
3293
184
  Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]);
3294
184
  Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]);
3295
184
  Node val;
3296
92
  if (isPrefix)
3297
  {
3298
67
    val = NodeManager::currentNM()->mkConst(::cvc5::Rational(0));
3299
  }
3300
  else
3301
  {
3302
25
    val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens);
3303
  }
3304
3305
  // Check if we can turn the prefix/suffix into equalities by showing that the
3306
  // prefix/suffix is at least as long as the string
3307
184
  Node eqs = StringsEntail::inferEqsFromContains(n[1], n[0]);
3308
92
  if (!eqs.isNull())
3309
  {
3310
8
    return returnRewrite(n, eqs, Rewrite::SUF_PREFIX_TO_EQS);
3311
  }
3312
3313
  // general reduction to equality + substr
3314
  Node retNode = n[0].eqNode(
3315
168
      NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens));
3316
3317
84
  return returnRewrite(n, retNode, Rewrite::SUF_PREFIX_ELIM);
3318
}
3319
3320
76
Node SequencesRewriter::lengthPreserveRewrite(Node n)
3321
{
3322
76
  NodeManager* nm = NodeManager::currentNM();
3323
152
  Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
3324
152
  Node res = canonicalStrForSymbolicLength(len, n.getType());
3325
152
  return res.isNull() ? n : res;
3326
}
3327
3328
88
Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
3329
{
3330
88
  NodeManager* nm = NodeManager::currentNM();
3331
3332
176
  Node res;
3333
88
  if (len.getKind() == CONST_RATIONAL)
3334
  {
3335
    // c -> "A" repeated c times
3336
152
    Rational ratLen = len.getConst<Rational>();
3337
76
    Assert(ratLen.getDenominator() == 1);
3338
152
    Integer intLen = ratLen.getNumerator();
3339
76
    uint32_t u = intLen.getUnsignedInt();
3340
76
    if (stype.isString())  // string-only
3341
    {
3342
76
      res = nm->mkConst(String(std::string(u, 'A')));
3343
    }
3344
    // we could do this for sequences, but we need to be careful: some
3345
    // sorts do not permit values that the solver can handle (e.g. uninterpreted
3346
    // sorts and arrays).
3347
  }
3348
12
  else if (len.getKind() == PLUS)
3349
  {
3350
    // x + y -> norm(x) + norm(y)
3351
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3352
12
    for (const auto& n : len)
3353
    {
3354
16
      Node sn = canonicalStrForSymbolicLength(n, stype);
3355
8
      if (sn.isNull())
3356
      {
3357
        return Node::null();
3358
      }
3359
16
      std::vector<Node> snChildren;
3360
8
      utils::getConcat(sn, snChildren);
3361
8
      concatBuilder.append(snChildren);
3362
    }
3363
4
    res = concatBuilder.constructNode();
3364
  }
3365
20
  else if (len.getKind() == MULT && len.getNumChildren() == 2
3366
20
           && len[0].isConst())
3367
  {
3368
    // c * x -> norm(x) repeated c times
3369
8
    Rational ratReps = len[0].getConst<Rational>();
3370
4
    Assert(ratReps.getDenominator() == 1);
3371
8
    Integer intReps = ratReps.getNumerator();
3372
3373
8
    Node nRep = canonicalStrForSymbolicLength(len[1], stype);
3374
4
    if (nRep.isNull())
3375
    {
3376
      return Node::null();
3377
    }
3378
8
    std::vector<Node> nRepChildren;
3379
4
    utils::getConcat(nRep, nRepChildren);
3380
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3381
12
    for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
3382
    {
3383
8
      concatBuilder.append(nRepChildren);
3384
    }
3385
4
    res = concatBuilder.constructNode();
3386
  }
3387
4
  else if (len.getKind() == STRING_LENGTH)
3388
  {
3389
    // len(x) -> x
3390
4
    res = len[0];
3391
  }
3392
88
  return res;
3393
}
3394
3395
307
Node SequencesRewriter::rewriteSeqUnit(Node node)
3396
{
3397
307
  NodeManager* nm = NodeManager::currentNM();
3398
307
  if (node[0].isConst())
3399
  {
3400
234
    std::vector<Node> seq;
3401
117
    seq.push_back(node[0]);
3402
234
    TypeNode stype = node[0].getType();
3403
234
    Node ret = nm->mkConst(Sequence(stype, seq));
3404
117
    return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL);
3405
  }
3406
190
  return node;
3407
}
3408
3409
151763
Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r)
3410
{
3411
303526
  Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << r
3412
151763
                           << "." << std::endl;
3413
3414
151763
  NodeManager* nm = NodeManager::currentNM();
3415
3416
151763
  if (d_statistics != nullptr)
3417
  {
3418
151693
    (*d_statistics) << r;
3419
  }
3420
3421
  // standard post-processing
3422
  // We rewrite (string) equalities immediately here. This allows us to forego
3423
  // the standard invariant on equality rewrites (that s=t must rewrite to one
3424
  // of { s=t, t=s, true, false } ).
3425
151763
  Kind retk = ret.getKind();
3426
151763
  if (retk == OR || retk == AND)
3427
  {
3428
14220
    std::vector<Node> children;
3429
7110
    bool childChanged = false;
3430
23916
    for (const Node& cret : ret)
3431
    {
3432
33612
      Node creter = cret;
3433
16806
      if (cret.getKind() == EQUAL)
3434
      {
3435
11058
        creter = rewriteEqualityExt(cret);
3436
      }
3437
5748
      else if (cret.getKind() == NOT && cret[0].getKind() == EQUAL)
3438
      {
3439
361
        creter = nm->mkNode(NOT, rewriteEqualityExt(cret[0]));
3440
      }
3441
16806
      childChanged = childChanged || cret != creter;
3442
16806
      children.push_back(creter);
3443
    }
3444
7110
    if (childChanged)
3445
    {
3446
2247
      ret = nm->mkNode(retk, children);
3447
7110
    }
3448
  }
3449
144653
  else if (retk == NOT && ret[0].getKind() == EQUAL)
3450
  {
3451
    ret = nm->mkNode(NOT, rewriteEqualityExt(ret[0]));
3452
  }
3453
144653
  else if (retk == EQUAL && node.getKind() != EQUAL)
3454
  {
3455
66098
    Trace("strings-rewrite")
3456
33049
        << "Apply extended equality rewrite on " << ret << std::endl;
3457
33049
    ret = rewriteEqualityExt(ret);
3458
  }
3459
151763
  return ret;
3460
}
3461
3462
}  // namespace strings
3463
}  // namespace theory
3464
28194
}  // namespace cvc5