GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/sequences_rewriter.cpp Lines: 1600 1654 96.7 %
Date: 2021-08-06 Branches: 5054 9698 52.1 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of the theory of strings.
14
 */
15
16
#include "theory/strings/sequences_rewriter.h"
17
18
#include "expr/attribute.h"
19
#include "expr/node_builder.h"
20
#include "expr/sequence.h"
21
#include "theory/rewriter.h"
22
#include "theory/strings/arith_entail.h"
23
#include "theory/strings/regexp_entail.h"
24
#include "theory/strings/skolem_cache.h"
25
#include "theory/strings/strings_rewriter.h"
26
#include "theory/strings/theory_strings_utils.h"
27
#include "theory/strings/word.h"
28
#include "util/rational.h"
29
#include "util/regexp.h"
30
#include "util/statistics_registry.h"
31
#include "util/string.h"
32
33
using namespace std;
34
using namespace cvc5::kind;
35
36
namespace cvc5 {
37
namespace theory {
38
namespace strings {
39
40
10068
SequencesRewriter::SequencesRewriter(HistogramStat<Rewrite>* statistics)
41
10068
    : d_statistics(statistics), d_stringsEntail(*this)
42
{
43
10068
}
44
45
126057
Node SequencesRewriter::rewriteEquality(Node node)
46
{
47
126057
  Assert(node.getKind() == kind::EQUAL);
48
126057
  if (node[0] == node[1])
49
  {
50
8612
    Node ret = NodeManager::currentNM()->mkConst(true);
51
4306
    return returnRewrite(node, ret, Rewrite::EQ_REFL);
52
  }
53
121751
  else if (node[0].isConst() && node[1].isConst())
54
  {
55
4402
    Node ret = NodeManager::currentNM()->mkConst(false);
56
2201
    return returnRewrite(node, ret, Rewrite::EQ_CONST_FALSE);
57
  }
58
  // standard ordering
59
119550
  if (node[0] > node[1])
60
  {
61
52460
    Node ret = NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]);
62
26230
    return returnRewrite(node, ret, Rewrite::EQ_SYM);
63
  }
64
93320
  return node;
65
}
66
67
63045
Node SequencesRewriter::rewriteEqualityExt(Node node)
68
{
69
63045
  Assert(node.getKind() == EQUAL);
70
126090
  TypeNode tn = node[0].getType();
71
63045
  if (tn.isInteger())
72
  {
73
20591
    return rewriteArithEqualityExt(node);
74
  }
75
42454
  if (tn.isStringLike())
76
  {
77
42454
    return rewriteStrEqualityExt(node);
78
  }
79
  return node;
80
}
81
82
42898
Node SequencesRewriter::rewriteStrEqualityExt(Node node)
83
{
84
42898
  Assert(node.getKind() == EQUAL && node[0].getType().isStringLike());
85
85796
  TypeNode stype = node[0].getType();
86
87
42898
  NodeManager* nm = NodeManager::currentNM();
88
  // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false )
89
128180
  for (unsigned r = 0; r < 2; r++)
90
  {
91
    // must call rewrite contains directly to avoid infinite loop
92
170884
    Node ctn = nm->mkNode(STRING_CONTAINS, node[r], node[1 - r]);
93
85602
    ctn = rewriteContains(ctn);
94
85602
    Assert(!ctn.isNull());
95
85602
    if (ctn.isConst())
96
    {
97
21134
      if (!ctn.getConst<bool>())
98
      {
99
320
        return returnRewrite(node, ctn, Rewrite::EQ_NCTN);
100
      }
101
      else
102
      {
103
        // definitely contains but not syntactically equal
104
        // We may be able to simplify, e.g.
105
        //  str.++( x, "a" ) == "a"  ----> x = ""
106
      }
107
    }
108
  }
109
110
  // ( len( s ) != len( t ) ) => ( s == t ---> false )
111
  // This covers cases like str.++( x, x ) == "a" ---> false
112
85156
  Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
113
85156
  Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
114
85156
  Node len_eq = len0.eqNode(len1);
115
42578
  len_eq = Rewriter::rewrite(len_eq);
116
42578
  if (len_eq.isConst() && !len_eq.getConst<bool>())
117
  {
118
5
    return returnRewrite(node, len_eq, Rewrite::EQ_LEN_DEQ);
119
  }
120
121
85146
  std::vector<Node> c[2];
122
127719
  for (unsigned i = 0; i < 2; i++)
123
  {
124
85146
    utils::getConcat(node[i], c[i]);
125
  }
126
127
  // check if the prefix, suffix mismatches
128
  //   For example, str.++( x, "a", y ) == str.++( x, "bc", z ) ---> false
129
42573
  unsigned minsize = std::min(c[0].size(), c[1].size());
130
127713
  for (unsigned r = 0; r < 2; r++)
131
  {
132
85437
    for (unsigned i = 0; i < minsize; i++)
133
    {
134
85285
      unsigned index1 = r == 0 ? i : (c[0].size() - 1) - i;
135
85285
      unsigned index2 = r == 0 ? i : (c[1].size() - 1) - i;
136
85578
      Node s = c[0][index1];
137
85578
      Node t = c[1][index2];
138
85285
      if (s.isConst() && t.isConst())
139
      {
140
367
        size_t lenS = Word::getLength(s);
141
367
        size_t lenT = Word::getLength(t);
142
367
        size_t lenShort = lenS <= lenT ? lenS : lenT;
143
1172
        bool isSameFix = r == 1 ? Word::rstrncmp(s, t, lenShort)
144
805
                                : Word::strncmp(s, t, lenShort);
145
367
        if (!isSameFix)
146
        {
147
8
          Node ret = NodeManager::currentNM()->mkConst(false);
148
4
          return returnRewrite(node, ret, Rewrite::EQ_NFIX);
149
        }
150
      }
151
85281
      if (s != t)
152
      {
153
84988
        break;
154
      }
155
    }
156
  }
157
158
85138
  Node new_ret;
159
  // ------- equality unification
160
42569
  bool changed = false;
161
127707
  for (unsigned i = 0; i < 2; i++)
162
  {
163
85668
    while (!c[0].empty() && !c[1].empty() && c[0].back() == c[1].back())
164
    {
165
265
      c[0].pop_back();
166
265
      c[1].pop_back();
167
265
      changed = true;
168
    }
169
    // splice constants
170
255242
    if (!c[0].empty() && !c[1].empty() && c[0].back().isConst()
171
160366
        && c[1].back().isConst())
172
    {
173
432
      Node cs[2];
174
      size_t csl[2];
175
648
      for (unsigned j = 0; j < 2; j++)
176
      {
177
432
        cs[j] = c[j].back();
178
432
        csl[j] = Word::getLength(cs[j]);
179
      }
180
216
      size_t larger = csl[0] > csl[1] ? 0 : 1;
181
216
      size_t smallerSize = csl[1 - larger];
182
432
      if (cs[1 - larger]
183
432
          == (i == 0 ? Word::suffix(cs[larger], smallerSize)
184
                     : Word::prefix(cs[larger], smallerSize)))
185
      {
186
216
        size_t sizeDiff = csl[larger] - smallerSize;
187
216
        c[larger][c[larger].size() - 1] =
188
432
            i == 0 ? Word::prefix(cs[larger], sizeDiff)
189
                   : Word::suffix(cs[larger], sizeDiff);
190
216
        c[1 - larger].pop_back();
191
216
        changed = true;
192
      }
193
    }
194
255414
    for (unsigned j = 0; j < 2; j++)
195
    {
196
170276
      std::reverse(c[j].begin(), c[j].end());
197
    }
198
  }
199
42569
  if (changed)
200
  {
201
    // e.g. x++y = x++z ---> y = z, "AB" ++ x = "A" ++ y --> "B" ++ x = y
202
414
    Node s1 = utils::mkConcat(c[0], stype);
203
414
    Node s2 = utils::mkConcat(c[1], stype);
204
405
    if (s1 != node[0] || s2 != node[1])
205
    {
206
396
      new_ret = s1.eqNode(s2);
207
      // We generally don't apply the extended equality rewriter if the
208
      // original node was an equality but we may be able to do additional
209
      // rewriting here, e.g.,
210
      // x++y = "" --> x = "" and y = ""
211
396
      new_ret = returnRewrite(node, new_ret, Rewrite::STR_EQ_UNIFY);
212
396
      return rewriteStrEqualityExt(new_ret);
213
    }
214
  }
215
216
  // ------- homogeneous constants
217
126421
  for (unsigned i = 0; i < 2; i++)
218
  {
219
168547
    Node cn = StringsEntail::checkHomogeneousString(node[i]);
220
84299
    if (!cn.isNull() && !Word::isEmpty(cn))
221
    {
222
18787
      Assert(cn.isConst());
223
18787
      Assert(Word::getLength(cn) == 1);
224
225
      // The operands of the concat on each side of the equality without
226
      // constant strings
227
56361
      std::vector<Node> trimmed[2];
228
      // Counts the number of `cn`s on each side
229
18787
      size_t numCns[2] = {0, 0};
230
56356
      for (size_t j = 0; j < 2; j++)
231
      {
232
        // Sort the operands of the concats on both sides of the equality
233
        // (since both sides may only contain one char, the order does not
234
        // matter)
235
37572
        std::sort(c[j].begin(), c[j].end());
236
75272
        for (const Node& cc : c[j])
237
        {
238
37703
          if (cc.isConst())
239
          {
240
            // Count the number of `cn`s in the string constant and make
241
            // sure that all chars are `cn`s
242
37527
            std::vector<Node> veccc = Word::getChars(cc);
243
37970
            for (const Node& cv : veccc)
244
            {
245
19208
              if (cv != cn)
246
              {
247
                // This conflict case should mostly should be taken care of by
248
                // multiset reasoning in the strings rewriter, but we recognize
249
                // this conflict just in case.
250
3
                new_ret = nm->mkConst(false);
251
                return returnRewrite(
252
3
                    node, new_ret, Rewrite::STR_EQ_CONST_NHOMOG);
253
              }
254
19205
              numCns[j]++;
255
            }
256
          }
257
          else
258
          {
259
18938
            trimmed[j].push_back(cc);
260
          }
261
        }
262
      }
263
264
      // We have to remove the same number of `cn`s from both sides, so the
265
      // side with less `cn`s determines how many we can remove
266
18784
      size_t trimmedConst = std::min(numCns[0], numCns[1]);
267
56352
      for (size_t j = 0; j < 2; j++)
268
      {
269
37568
        size_t diff = numCns[j] - trimmedConst;
270
37568
        if (diff != 0)
271
        {
272
          // Add a constant string to the side with more `cn`s to restore
273
          // the difference in number of `cn`s
274
33768
          std::vector<Node> vec(diff, cn);
275
16884
          trimmed[j].push_back(Word::mkWordFlatten(vec));
276
        }
277
      }
278
279
37520
      Node lhs = utils::mkConcat(trimmed[i], stype);
280
37520
      Node ss = utils::mkConcat(trimmed[1 - i], stype);
281
18784
      if (lhs != node[i] || ss != node[1 - i])
282
      {
283
        // e.g.
284
        //  "AA" = y ++ x ---> "AA" = x ++ y if x < y
285
        //  "AAA" = y ++ "A" ++ z ---> "AA" = y ++ z
286
        //
287
        // We generally don't apply the extended equality rewriter if the
288
        // original node was an equality but we may be able to do additional
289
        // rewriting here.
290
48
        new_ret = lhs.eqNode(ss);
291
48
        new_ret = returnRewrite(node, new_ret, Rewrite::STR_EQ_HOMOG_CONST);
292
48
        return rewriteStrEqualityExt(new_ret);
293
      }
294
    }
295
  }
296
297
  // ------- rewrites for (= "" _)
298
84244
  Node empty = Word::mkEmptyWord(stype);
299
116404
  for (size_t i = 0; i < 2; i++)
300
  {
301
79299
    if (node[i] == empty)
302
    {
303
32415
      Node ne = node[1 - i];
304
18716
      if (ne.getKind() == STRING_REPLACE)
305
      {
306
        // (= "" (str.replace x y x)) ---> (= x "")
307
1081
        if (ne[0] == ne[2])
308
        {
309
64
          Node ret = nm->mkNode(EQUAL, ne[0], empty);
310
32
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_X_Y_X);
311
        }
312
313
        // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y "")))
314
1049
        if (StringsEntail::checkNonEmpty(ne[2]))
315
        {
316
          Node ret =
317
              nm->mkNode(AND,
318
210
                         nm->mkNode(EQUAL, ne[0], empty),
319
420
                         nm->mkNode(NOT, nm->mkNode(EQUAL, ne[1], empty)));
320
105
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
321
        }
322
323
        // (= "" (str.replace x "A" "")) ---> (str.prefix x "A")
324
944
        if (StringsEntail::checkLengthOne(ne[1], true) && ne[2] == empty)
325
        {
326
302
          Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]);
327
151
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
328
        }
329
      }
330
17635
      else if (ne.getKind() == STRING_SUBSTR)
331
      {
332
27991
        Node zero = nm->mkConst(Rational(0));
333
334
16360
        if (ArithEntail::check(ne[1], false) && ArithEntail::check(ne[2], true))
335
        {
336
          // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0
337
1361
          if (ne[1] == zero)
338
          {
339
742
            Node ret = nm->mkNode(EQUAL, ne[0], empty);
340
371
            return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
341
          }
342
343
          // (= "" (str.substr x n m)) ---> (<= (str.len x) n)
344
          // if n >= 0 and m > 0
345
1980
          Node ret = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, ne[0]), ne[1]);
346
990
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
347
        }
348
349
        // (= "" (str.substr "A" 0 z)) ---> (<= z 0)
350
14999
        if (StringsEntail::checkNonEmpty(ne[0]) && ne[1] == zero)
351
        {
352
6736
          Node ret = nm->mkNode(LEQ, ne[2], zero);
353
3368
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_Z);
354
        }
355
      }
356
    }
357
  }
358
359
  // ------- rewrites for (= (str.replace _ _ _) _)
360
111259
  for (size_t i = 0; i < 2; i++)
361
  {
362
74196
    if (node[i].getKind() == STRING_REPLACE)
363
    {
364
5090
      Node repl = node[i];
365
5090
      Node x = node[1 - i];
366
367
      // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x))
368
2566
      if (StringsEntail::checkNonEmpty(x) && repl[0] == empty)
369
      {
370
        Node ret = nm->mkNode(
371
12
            EQUAL, empty, nm->mkNode(STRING_REPLACE, x, repl[2], repl[1]));
372
6
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_EMP);
373
      }
374
375
      // (= x (str.replace y x y)) ---> (= x y)
376
2560
      if (repl[0] == repl[2] && x == repl[1])
377
      {
378
4
        Node ret = nm->mkNode(EQUAL, x, repl[0]);
379
2
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_EQ);
380
      }
381
382
      // (= x (str.replace x "A" "B")) ---> (not (str.contains x "A"))
383
2558
      if (x == repl[0])
384
      {
385
2668
        Node eq = Rewriter::rewrite(nm->mkNode(EQUAL, repl[1], repl[2]));
386
1349
        if (eq.isConst() && !eq.getConst<bool>())
387
        {
388
60
          Node ret = nm->mkNode(NOT, nm->mkNode(STRING_CONTAINS, x, repl[1]));
389
30
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_NOT_CTN);
390
        }
391
      }
392
393
      // (= (str.replace x y z) z) --> (or (= x y) (= x z))
394
      // if (str.len y) = (str.len z)
395
2528
      if (repl[2] == x)
396
      {
397
2438
        Node lenY = nm->mkNode(STRING_LENGTH, repl[1]);
398
2438
        Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]);
399
1221
        if (ArithEntail::checkEq(lenY, lenZ))
400
        {
401
          Node ret = nm->mkNode(OR,
402
8
                                nm->mkNode(EQUAL, repl[0], repl[1]),
403
16
                                nm->mkNode(EQUAL, repl[0], repl[2]));
404
4
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_DIS);
405
        }
406
      }
407
    }
408
  }
409
410
  // Try to rewrite (= x y) into a conjunction of equalities based on length
411
  // entailment.
412
  //
413
  // (<= (str.len x) (str.++ y1 ... yn)) AND (= x (str.++ y1 ... yn)) --->
414
  //  (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
415
  //
416
  // where yi' and yi'' correspond to some yj and
417
  //   (<= (str.len x) (str.++ y1' ... ym'))
418
110895
  for (unsigned i = 0; i < 2; i++)
419
  {
420
73996
    if (node[1 - i].getKind() == STRING_CONCAT)
421
    {
422
942
      new_ret = StringsEntail::inferEqsFromContains(node[i], node[1 - i]);
423
942
      if (!new_ret.isNull())
424
      {
425
164
        return returnRewrite(node, new_ret, Rewrite::STR_EQ_CONJ_LEN_ENTAIL);
426
      }
427
    }
428
  }
429
430
36899
  if (node[0].getKind() == STRING_CONCAT && node[1].getKind() == STRING_CONCAT)
431
  {
432
    // (= (str.++ x_1 ... x_i x_{i + 1} ... x_n)
433
    //    (str.++ y_1 ... y_j y_{j + 1} ... y_m)) --->
434
    //  (and (= (str.++ x_1 ... x_i) (str.++ y_1 ... y_j))
435
    //       (= (str.++ x_{i + 1} ... x_n) (str.++ y_{j + 1} ... y_m)))
436
    //
437
    // if (str.len (str.++ x_1 ... x_i)) = (str.len (str.++ y_1 ... y_j))
438
    //
439
    // This rewrite performs length-based equality splitting: If we can show
440
    // that two prefixes have the same length, we can split an equality into
441
    // two equalities, one over the prefixes and another over the suffixes.
442
90
    std::vector<Node> v0, v1;
443
51
    utils::getConcat(node[0], v0);
444
51
    utils::getConcat(node[1], v1);
445
51
    size_t startRhs = 0;
446
234
    for (size_t i = 0, size0 = v0.size(); i <= size0; i++)
447
    {
448
378
      const std::vector<Node> pfxv0(v0.begin(), v0.begin() + i);
449
378
      Node pfx0 = utils::mkConcat(pfxv0, stype);
450
735
      for (size_t j = startRhs, size1 = v1.size(); j <= size1; j++)
451
      {
452
607
        if (!(i == 0 && j == 0) && !(i == v0.size() && j == v1.size()))
453
        {
454
967
          std::vector<Node> pfxv1(v1.begin(), v1.begin() + j);
455
967
          Node pfx1 = utils::mkConcat(pfxv1, stype);
456
967
          Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0);
457
967
          Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1);
458
459
517
          if (ArithEntail::checkEq(lenPfx0, lenPfx1))
460
          {
461
16
            std::vector<Node> sfxv0(v0.begin() + i, v0.end());
462
16
            std::vector<Node> sfxv1(v1.begin() + j, v1.end());
463
            Node ret = nm->mkNode(kind::AND,
464
16
                                  pfx0.eqNode(pfx1),
465
16
                                  utils::mkConcat(sfxv0, stype)
466
32
                                      .eqNode(utils::mkConcat(sfxv1, stype)));
467
8
            return returnRewrite(node, ret, Rewrite::SPLIT_EQ);
468
          }
469
509
          else if (ArithEntail::check(lenPfx1, lenPfx0, true))
470
          {
471
            // The prefix on the right-hand side is strictly longer than the
472
            // prefix on the left-hand side, so we try to strip the right-hand
473
            // prefix by the length of the left-hand prefix
474
            //
475
            // Example:
476
            // (= (str.++ "A" x y) (str.++ x "AB" z)) --->
477
            //   (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z)))
478
55
            std::vector<Node> rpfxv1;
479
55
            if (StringsEntail::stripSymbolicLength(
480
                    pfxv1, rpfxv1, 1, lenPfx0, true))
481
            {
482
              // The rewrite requires the full left-hand prefix length to be
483
              // stripped (otherwise we would have to keep parts of the
484
              // left-hand prefix).
485
              if (lenPfx0.isConst() && lenPfx0.getConst<Rational>().isZero())
486
              {
487
                std::vector<Node> sfxv0(v0.begin() + i, v0.end());
488
                pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end());
489
                Node ret =
490
                    nm->mkNode(kind::AND,
491
                               pfx0.eqNode(utils::mkConcat(rpfxv1, stype)),
492
                               utils::mkConcat(sfxv0, stype)
493
                                   .eqNode(utils::mkConcat(pfxv1, stype)));
494
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_R);
495
              }
496
            }
497
498
            // If the prefix of the right-hand side is (strictly) longer than
499
            // the prefix of the left-hand side, we can advance the left-hand
500
            // side (since the length of the right-hand side is only increasing
501
            // in the inner loop)
502
55
            break;
503
          }
504
454
          else if (ArithEntail::check(lenPfx0, lenPfx1, true))
505
          {
506
            // The prefix on the left-hand side is strictly longer than the
507
            // prefix on the right-hand side, so we try to strip the left-hand
508
            // prefix by the length of the right-hand prefix
509
            //
510
            // Example:
511
            // (= (str.++ x "AB" z) (str.++ "A" x y)) --->
512
            //   (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y))
513
138
            std::vector<Node> sfxv0 = pfxv0;
514
138
            std::vector<Node> rpfxv0;
515
71
            if (StringsEntail::stripSymbolicLength(
516
                    sfxv0, rpfxv0, 1, lenPfx1, true))
517
            {
518
              // The rewrite requires the full right-hand prefix length to be
519
              // stripped (otherwise we would have to keep parts of the
520
              // right-hand prefix).
521
12
              if (lenPfx1.isConst() && lenPfx1.getConst<Rational>().isZero())
522
              {
523
4
                sfxv0.insert(sfxv0.end(), v0.begin() + i, v0.end());
524
8
                std::vector<Node> sfxv1(v1.begin() + j, v1.end());
525
                Node ret =
526
                    nm->mkNode(kind::AND,
527
8
                               utils::mkConcat(rpfxv0, stype).eqNode(pfx1),
528
8
                               utils::mkConcat(sfxv0, stype)
529
16
                                   .eqNode(utils::mkConcat(sfxv1, stype)));
530
4
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_L);
531
              }
532
            }
533
534
            // If the prefix of the left-hand side is (strictly) longer than
535
            // the prefix of the right-hand side, then we don't need to check
536
            // that right-hand prefix for future left-hand prefixes anymore
537
            // (since they are increasing in length)
538
67
            startRhs = j + 1;
539
          }
540
        }
541
      }
542
    }
543
  }
544
36887
  return node;
545
}
546
547
20591
Node SequencesRewriter::rewriteArithEqualityExt(Node node)
548
{
549
20591
  Assert(node.getKind() == EQUAL && node[0].getType().isInteger());
550
551
  // cases where we can solve the equality
552
553
  // notice we cannot rewrite str.to.int(x)=n to x="n" due to leading zeroes.
554
555
20591
  return node;
556
}
557
558
219293
Node SequencesRewriter::rewriteLength(Node node)
559
{
560
219293
  Assert(node.getKind() == STRING_LENGTH);
561
219293
  NodeManager* nm = NodeManager::currentNM();
562
219293
  Kind nk0 = node[0].getKind();
563
219293
  if (node[0].isConst())
564
  {
565
13710
    Node retNode = nm->mkConst(Rational(Word::getLength(node[0])));
566
6855
    return returnRewrite(node, retNode, Rewrite::LEN_EVAL);
567
  }
568
212438
  else if (nk0 == kind::STRING_CONCAT)
569
  {
570
4399
    Node tmpNode = node[0];
571
4399
    if (tmpNode.getKind() == kind::STRING_CONCAT)
572
    {
573
8798
      std::vector<Node> node_vec;
574
17076
      for (unsigned int i = 0; i < tmpNode.getNumChildren(); ++i)
575
      {
576
12677
        if (tmpNode[i].isConst())
577
        {
578
3335
          node_vec.push_back(
579
6670
              nm->mkConst(Rational(Word::getLength(tmpNode[i]))));
580
        }
581
        else
582
        {
583
9342
          node_vec.push_back(NodeManager::currentNM()->mkNode(
584
              kind::STRING_LENGTH, tmpNode[i]));
585
        }
586
      }
587
8798
      Node retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec);
588
4399
      return returnRewrite(node, retNode, Rewrite::LEN_CONCAT);
589
    }
590
  }
591
208039
  else if (nk0 == STRING_REPLACE || nk0 == STRING_REPLACE_ALL)
592
  {
593
1971
    Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1]));
594
1971
    Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2]));
595
1009
    if (len1 == len2)
596
    {
597
      // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x )
598
94
      Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
599
47
      return returnRewrite(node, retNode, Rewrite::LEN_REPL_INV);
600
962
    }
601
  }
602
207030
  else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER || nk0 == STRING_REV
603
206938
           || nk0 == STRING_UPDATE)
604
  {
605
    // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev.
606
    // len( update( x, n, y ) ) = len( x )
607
212
    Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
608
106
    return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV);
609
  }
610
206924
  else if (nk0 == SEQ_UNIT)
611
  {
612
232
    Node retNode = nm->mkConst(Rational(1));
613
116
    return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT);
614
  }
615
207770
  return node;
616
}
617
618
// TODO (#1180) add rewrite
619
//  str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) --->
620
//  str.substr( x, n1, n2+n3 )
621
61595
Node SequencesRewriter::rewriteConcat(Node node)
622
{
623
61595
  Assert(node.getKind() == kind::STRING_CONCAT);
624
123190
  Trace("strings-rewrite-debug")
625
61595
      << "Strings::rewriteConcat start " << node << std::endl;
626
123190
  std::vector<Node> node_vec;
627
123190
  Node preNode = Node::null();
628
268981
  for (Node tmpNode : node)
629
  {
630
207386
    if (tmpNode.getKind() == STRING_CONCAT)
631
    {
632
1374
      unsigned j = 0;
633
      // combine the first term with the previous constant if applicable
634
1374
      if (!preNode.isNull())
635
      {
636
320
        if (tmpNode[0].isConst())
637
        {
638
282
          std::vector<Node> wvec;
639
141
          wvec.push_back(preNode);
640
141
          wvec.push_back(tmpNode[0]);
641
141
          preNode = Word::mkWordFlatten(wvec);
642
141
          node_vec.push_back(preNode);
643
        }
644
        else
645
        {
646
179
          node_vec.push_back(preNode);
647
179
          node_vec.push_back(tmpNode[0]);
648
        }
649
320
        preNode = Node::null();
650
320
        ++j;
651
      }
652
      // insert the middle terms to node_vec
653
1374
      if (j <= tmpNode.getNumChildren() - 1)
654
      {
655
1374
        node_vec.insert(node_vec.end(), tmpNode.begin() + j, tmpNode.end() - 1);
656
      }
657
      // take the last term as the current
658
1374
      tmpNode = tmpNode[tmpNode.getNumChildren() - 1];
659
    }
660
207386
    if (!tmpNode.isConst())
661
    {
662
103503
      if (!preNode.isNull())
663
      {
664
20471
        if (preNode.isConst() && !Word::isEmpty(preNode))
665
        {
666
19684
          node_vec.push_back(preNode);
667
        }
668
20471
        preNode = Node::null();
669
      }
670
103503
      node_vec.push_back(tmpNode);
671
    }
672
    else
673
    {
674
103883
      if (preNode.isNull())
675
      {
676
52108
        preNode = tmpNode;
677
      }
678
      else
679
      {
680
103550
        std::vector<Node> vec;
681
51775
        vec.push_back(preNode);
682
51775
        vec.push_back(tmpNode);
683
51775
        preNode = Word::mkWordFlatten(vec);
684
      }
685
    }
686
  }
687
61595
  if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode)))
688
  {
689
30291
    node_vec.push_back(preNode);
690
  }
691
692
  // Sort adjacent operands in str.++ that all result in the same string or the
693
  // empty string.
694
  //
695
  // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) -->
696
  // (str.++ ... [sort those 3 arguments] ... )
697
61595
  size_t lastIdx = 0;
698
123190
  Node lastX;
699
217800
  for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++)
700
  {
701
312410
    Node s = StringsEntail::getStringOrEmpty(node_vec[i]);
702
156205
    bool nextX = false;
703
156205
    if (s != lastX)
704
    {
705
152645
      nextX = true;
706
    }
707
708
156205
    if (nextX)
709
    {
710
152645
      std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i);
711
152645
      lastX = s;
712
152645
      lastIdx = i;
713
    }
714
  }
715
61595
  std::sort(node_vec.begin() + lastIdx, node_vec.end());
716
717
123190
  TypeNode tn = node.getType();
718
123190
  Node retNode = utils::mkConcat(node_vec, tn);
719
123190
  Trace("strings-rewrite-debug")
720
61595
      << "Strings::rewriteConcat end " << retNode << std::endl;
721
61595
  if (retNode != node)
722
  {
723
19117
    return returnRewrite(node, retNode, Rewrite::CONCAT_NORM);
724
  }
725
42478
  return node;
726
}
727
728
2864
Node SequencesRewriter::rewriteConcatRegExp(TNode node)
729
{
730
2864
  Assert(node.getKind() == kind::REGEXP_CONCAT);
731
2864
  NodeManager* nm = NodeManager::currentNM();
732
5728
  Trace("strings-rewrite-debug")
733
2864
      << "Strings::rewriteConcatRegExp flatten " << node << std::endl;
734
5728
  Node retNode = node;
735
5728
  std::vector<Node> vec;
736
2864
  bool changed = false;
737
5728
  Node emptyRe;
738
739
  // get the string type that are members of this regular expression
740
5728
  TypeNode rtype = node.getType();
741
5728
  TypeNode stype;
742
2864
  if (rtype.isRegExp())
743
  {
744
    // standard regular expressions are for strings
745
2864
    stype = nm->stringType();
746
  }
747
  else
748
  {
749
    Unimplemented();
750
  }
751
752
10948
  for (const Node& c : node)
753
  {
754
8120
    if (c.getKind() == REGEXP_CONCAT)
755
    {
756
218
      changed = true;
757
698
      for (const Node& cc : c)
758
      {
759
480
        vec.push_back(cc);
760
      }
761
    }
762
19174
    else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst()
763
19094
             && Word::isEmpty(c[0]))
764
    {
765
128
      changed = true;
766
128
      emptyRe = c;
767
    }
768
7774
    else if (c.getKind() == REGEXP_EMPTY)
769
    {
770
      // re.++( ..., empty, ... ) ---> empty
771
72
      Node ret = nm->mkNode(REGEXP_EMPTY);
772
36
      return returnRewrite(node, ret, Rewrite::RE_CONCAT_EMPTY);
773
    }
774
    else
775
    {
776
7738
      vec.push_back(c);
777
    }
778
  }
779
2828
  if (changed)
780
  {
781
    // flatten
782
    // this handles nested re.++ and elimination or str.to.re(""), e.g.:
783
    // re.++( re.++( R1, R2 ), str.to.re(""), R3 ) ---> re.++( R1, R2, R3 )
784
328
    if (vec.empty())
785
    {
786
2
      Assert(!emptyRe.isNull());
787
2
      retNode = emptyRe;
788
    }
789
    else
790
    {
791
326
      retNode = vec.size() == 1 ? vec[0] : nm->mkNode(REGEXP_CONCAT, vec);
792
    }
793
328
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_FLATTEN);
794
  }
795
5000
  Trace("strings-rewrite-debug")
796
2500
      << "Strings::rewriteConcatRegExp start " << node << std::endl;
797
5000
  std::vector<Node> cvec;
798
  // the current accumulation of constant strings
799
5000
  std::vector<Node> preReStr;
800
  // whether the last component was (_)*
801
2500
  bool lastAllStar = false;
802
5000
  String emptyStr = String("");
803
  // this loop checks to see if components can be combined or dropped
804
12324
  for (unsigned i = 0, size = vec.size(); i <= size; i++)
805
  {
806
19648
    Node curr;
807
9824
    if (i < size)
808
    {
809
7324
      curr = vec[i];
810
7324
      Assert(curr.getKind() != REGEXP_CONCAT);
811
    }
812
    // update preReStr
813
9824
    if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP)
814
    {
815
3008
      lastAllStar = false;
816
3008
      preReStr.push_back(curr[0]);
817
3008
      curr = Node::null();
818
    }
819
6816
    else if (!preReStr.empty())
820
    {
821
2552
      Assert(!lastAllStar);
822
      // this groups consecutive strings a++b ---> ab
823
5104
      Node acc = nm->mkNode(STRING_TO_REGEXP, utils::mkConcat(preReStr, stype));
824
2552
      cvec.push_back(acc);
825
2552
      preReStr.clear();
826
    }
827
4264
    else if (!curr.isNull() && lastAllStar)
828
    {
829
      // if empty, drop it
830
      // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)*
831
324
      if (RegExpEntail::isConstRegExp(curr)
832
324
          && RegExpEntail::testConstStringInRegExp(emptyStr, 0, curr))
833
      {
834
44
        curr = Node::null();
835
      }
836
    }
837
9824
    if (!curr.isNull())
838
    {
839
4272
      lastAllStar = false;
840
4272
      if (curr.getKind() == REGEXP_STAR)
841
      {
842
        // we can group stars (a)* ++ (a)* ---> (a)*
843
2252
        if (!cvec.empty() && cvec.back() == curr)
844
        {
845
8
          curr = Node::null();
846
        }
847
2244
        else if (curr[0].getKind() == REGEXP_SIGMA)
848
        {
849
902
          Assert(!lastAllStar);
850
902
          lastAllStar = true;
851
          // go back and remove empty ones from back of cvec
852
          // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)*
853
2572
          while (!cvec.empty() && RegExpEntail::isConstRegExp(cvec.back())
854
3452
                 && RegExpEntail::testConstStringInRegExp(
855
722
                     emptyStr, 0, cvec.back()))
856
          {
857
8
            cvec.pop_back();
858
          }
859
        }
860
      }
861
    }
862
9824
    if (!curr.isNull())
863
    {
864
4264
      cvec.push_back(curr);
865
    }
866
  }
867
2500
  Assert(!cvec.empty());
868
2500
  retNode = utils::mkConcat(cvec, rtype);
869
2500
  if (retNode != node)
870
  {
871
    // handles all cases where consecutive re constants are combined or
872
    // dropped as described in the loop above.
873
198
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT);
874
  }
875
876
  // flipping adjacent star arguments
877
2302
  changed = false;
878
6356
  for (size_t i = 0, size = cvec.size() - 1; i < size; i++)
879
  {
880
4054
    if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1])
881
    {
882
      // by convention, flip the order (a*)++a ---> a++(a*)
883
64
      std::swap(cvec[i], cvec[i + 1]);
884
64
      changed = true;
885
    }
886
  }
887
2302
  if (changed)
888
  {
889
48
    retNode = utils::mkConcat(cvec, rtype);
890
48
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_OPT);
891
  }
892
2254
  return node;
893
}
894
895
1170
Node SequencesRewriter::rewriteStarRegExp(TNode node)
896
{
897
1170
  Assert(node.getKind() == REGEXP_STAR);
898
1170
  NodeManager* nm = NodeManager::currentNM();
899
2340
  Node retNode = node;
900
1170
  if (node[0].getKind() == REGEXP_STAR)
901
  {
902
    // ((R)*)* ---> R*
903
4
    return returnRewrite(node, node[0], Rewrite::RE_STAR_NESTED_STAR);
904
  }
905
3962
  else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst()
906
3902
           && Word::isEmpty(node[0][0]))
907
  {
908
    // ("")* ---> ""
909
18
    return returnRewrite(node, node[0], Rewrite::RE_STAR_EMPTY_STRING);
910
  }
911
1148
  else if (node[0].getKind() == REGEXP_EMPTY)
912
  {
913
    // (empty)* ---> ""
914
8
    retNode = nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")));
915
8
    return returnRewrite(node, retNode, Rewrite::RE_STAR_EMPTY);
916
  }
917
1140
  else if (node[0].getKind() == REGEXP_UNION)
918
  {
919
    // simplification of unions under star
920
256
    if (RegExpEntail::hasEpsilonNode(node[0]))
921
    {
922
38
      bool changed = false;
923
38
      std::vector<Node> node_vec;
924
120
      for (const Node& nc : node[0])
925
      {
926
240
        if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst()
927
236
            && Word::isEmpty(nc[0]))
928
        {
929
          // can be removed
930
38
          changed = true;
931
        }
932
        else
933
        {
934
44
          node_vec.push_back(nc);
935
        }
936
      }
937
38
      if (changed)
938
      {
939
38
        retNode = node_vec.size() == 1 ? node_vec[0]
940
                                       : nm->mkNode(REGEXP_UNION, node_vec);
941
38
        retNode = nm->mkNode(REGEXP_STAR, retNode);
942
        // simplification of union beneath star based on loop above
943
        // for example, ( "" | "a" )* ---> ("a")*
944
38
        return returnRewrite(node, retNode, Rewrite::RE_STAR_UNION);
945
      }
946
    }
947
  }
948
1102
  return node;
949
}
950
951
1080
Node SequencesRewriter::rewriteAndOrRegExp(TNode node)
952
{
953
1080
  Kind nk = node.getKind();
954
1080
  Assert(nk == REGEXP_UNION || nk == REGEXP_INTER);
955
2160
  Trace("strings-rewrite-debug")
956
1080
      << "Strings::rewriteAndOrRegExp start " << node << std::endl;
957
2160
  std::vector<Node> node_vec;
958
2160
  std::vector<Node> polRegExp[2];
959
3463
  for (const Node& ni : node)
960
  {
961
2385
    if (ni.getKind() == nk)
962
    {
963
42
      for (const Node& nic : ni)
964
      {
965
28
        if (std::find(node_vec.begin(), node_vec.end(), nic) == node_vec.end())
966
        {
967
26
          node_vec.push_back(nic);
968
        }
969
      }
970
    }
971
2371
    else if (ni.getKind() == REGEXP_EMPTY)
972
    {
973
65
      if (nk == REGEXP_INTER)
974
      {
975
2
        return returnRewrite(node, ni, Rewrite::RE_AND_EMPTY);
976
      }
977
      // otherwise, can ignore
978
    }
979
2306
    else if (ni.getKind() == REGEXP_STAR && ni[0].getKind() == REGEXP_SIGMA)
980
    {
981
8
      if (nk == REGEXP_UNION)
982
      {
983
        return returnRewrite(node, ni, Rewrite::RE_OR_ALL);
984
      }
985
      // otherwise, can ignore
986
    }
987
2298
    else if (std::find(node_vec.begin(), node_vec.end(), ni) == node_vec.end())
988
    {
989
2259
      node_vec.push_back(ni);
990
2259
      uint32_t pindex = ni.getKind() == REGEXP_COMPLEMENT ? 1 : 0;
991
4518
      Node nia = pindex == 1 ? ni[0] : ni;
992
2259
      polRegExp[pindex].push_back(nia);
993
    }
994
  }
995
1078
  NodeManager* nm = NodeManager::currentNM();
996
  // use inclusion tests
997
1274
  for (const Node& negMem : polRegExp[1])
998
  {
999
346
    for (const Node& posMem : polRegExp[0])
1000
    {
1001
284
      Node m1 = nk == REGEXP_INTER ? negMem : posMem;
1002
284
      Node m2 = nk == REGEXP_INTER ? posMem : negMem;
1003
      // inclusion test for conflicting case m1 contains m2
1004
      // (re.inter (re.comp R1) R2) --> re.none where R1 includes R2
1005
      // (re.union R1 (re.comp R2)) --> (re.* re.allchar) where R1 includes R2
1006
150
      if (RegExpEntail::regExpIncludes(m1, m2))
1007
      {
1008
32
        Node retNode;
1009
16
        if (nk == REGEXP_INTER)
1010
        {
1011
16
          retNode = nm->mkNode(kind::REGEXP_EMPTY);
1012
        }
1013
        else
1014
        {
1015
          retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1016
        }
1017
16
        return returnRewrite(node, retNode, Rewrite::RE_ANDOR_INC_CONFLICT);
1018
      }
1019
    }
1020
  }
1021
2124
  Node retNode;
1022
1062
  if (node_vec.empty())
1023
  {
1024
    if (nk == REGEXP_INTER)
1025
    {
1026
      retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1027
    }
1028
    else
1029
    {
1030
      retNode = nm->mkNode(kind::REGEXP_EMPTY);
1031
    }
1032
  }
1033
  else
1034
  {
1035
1062
    retNode = node_vec.size() == 1 ? node_vec[0] : nm->mkNode(nk, node_vec);
1036
  }
1037
1062
  if (retNode != node)
1038
  {
1039
    // flattening and removing children, based on loop above
1040
104
    return returnRewrite(node, retNode, Rewrite::RE_ANDOR_FLATTEN);
1041
  }
1042
958
  return node;
1043
}
1044
1045
24
Node SequencesRewriter::rewriteLoopRegExp(TNode node)
1046
{
1047
24
  Assert(node.getKind() == REGEXP_LOOP);
1048
48
  Node retNode = node;
1049
48
  Node r = node[0];
1050
24
  if (r.getKind() == REGEXP_STAR)
1051
  {
1052
    return returnRewrite(node, r, Rewrite::RE_LOOP_STAR);
1053
  }
1054
24
  NodeManager* nm = NodeManager::currentNM();
1055
48
  cvc5::Rational rMaxInt(String::maxSize());
1056
24
  uint32_t l = utils::getLoopMinOccurrences(node);
1057
48
  std::vector<Node> vec_nodes;
1058
110
  for (unsigned i = 0; i < l; i++)
1059
  {
1060
86
    vec_nodes.push_back(r);
1061
  }
1062
  Node n =
1063
24
      vec_nodes.size() == 0
1064
24
          ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")))
1065
48
          : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes);
1066
24
  uint32_t u = utils::getLoopMaxOccurrences(node);
1067
24
  if (u < l)
1068
  {
1069
8
    std::vector<Node> nvec;
1070
4
    retNode = nm->mkNode(REGEXP_EMPTY, nvec);
1071
  }
1072
20
  else if (u == l)
1073
  {
1074
10
    retNode = n;
1075
  }
1076
  else
1077
  {
1078
20
    std::vector<Node> vec2;
1079
10
    vec2.push_back(n);
1080
20
    TypeNode rtype = nm->regExpType();
1081
52
    for (uint32_t j = l; j < u; j++)
1082
    {
1083
42
      vec_nodes.push_back(r);
1084
42
      n = utils::mkConcat(vec_nodes, rtype);
1085
42
      vec2.push_back(n);
1086
    }
1087
10
    retNode = nm->mkNode(REGEXP_UNION, vec2);
1088
  }
1089
48
  Trace("strings-lp") << "Strings::lp " << node << " => " << retNode
1090
24
                      << std::endl;
1091
24
  if (retNode != node)
1092
  {
1093
24
    return returnRewrite(node, retNode, Rewrite::RE_LOOP);
1094
  }
1095
  return node;
1096
}
1097
1098
4
Node SequencesRewriter::rewriteRepeatRegExp(TNode node)
1099
{
1100
4
  Assert(node.getKind() == REGEXP_REPEAT);
1101
4
  NodeManager* nm = NodeManager::currentNM();
1102
  // ((_ re.^ n) R) --> ((_ re.loop n n) R)
1103
4
  unsigned r = utils::getRepeatAmount(node);
1104
8
  Node lop = nm->mkConst(RegExpLoop(r, r));
1105
8
  Node retNode = nm->mkNode(REGEXP_LOOP, lop, node[0]);
1106
8
  return returnRewrite(node, retNode, Rewrite::RE_REPEAT_ELIM);
1107
}
1108
1109
31
Node SequencesRewriter::rewriteOptionRegExp(TNode node)
1110
{
1111
31
  Assert(node.getKind() == REGEXP_OPT);
1112
31
  NodeManager* nm = NodeManager::currentNM();
1113
  Node retNode =
1114
      nm->mkNode(REGEXP_UNION,
1115
62
                 nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))),
1116
124
                 node[0]);
1117
62
  return returnRewrite(node, retNode, Rewrite::RE_OPT_ELIM);
1118
}
1119
1120
45
Node SequencesRewriter::rewritePlusRegExp(TNode node)
1121
{
1122
45
  Assert(node.getKind() == REGEXP_PLUS);
1123
45
  NodeManager* nm = NodeManager::currentNM();
1124
  Node retNode =
1125
90
      nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0]));
1126
90
  return returnRewrite(node, retNode, Rewrite::RE_PLUS_ELIM);
1127
}
1128
1129
55
Node SequencesRewriter::rewriteDifferenceRegExp(TNode node)
1130
{
1131
55
  Assert(node.getKind() == REGEXP_DIFF);
1132
55
  NodeManager* nm = NodeManager::currentNM();
1133
  Node retNode =
1134
110
      nm->mkNode(REGEXP_INTER, node[0], nm->mkNode(REGEXP_COMPLEMENT, node[1]));
1135
110
  return returnRewrite(node, retNode, Rewrite::RE_DIFF_ELIM);
1136
}
1137
1138
376
Node SequencesRewriter::rewriteRangeRegExp(TNode node)
1139
{
1140
376
  Assert(node.getKind() == REGEXP_RANGE);
1141
  unsigned ch[2];
1142
1128
  for (size_t i = 0; i < 2; ++i)
1143
  {
1144
752
    if (!node[i].isConst() || node[i].getConst<String>().size() != 1)
1145
    {
1146
      // not applied to characters, it is not handled
1147
      return node;
1148
    }
1149
752
    ch[i] = node[i].getConst<String>().front();
1150
  }
1151
1152
376
  NodeManager* nm = NodeManager::currentNM();
1153
376
  if (node[0] == node[1])
1154
  {
1155
    Node retNode = nm->mkNode(STRING_TO_REGEXP, node[0]);
1156
    // re.range( "A", "A" ) ---> str.to_re( "A" )
1157
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_SINGLE);
1158
  }
1159
1160
376
  if (ch[0] > ch[1])
1161
  {
1162
    // re.range( "B", "A" ) ---> re.none
1163
4
    Node retNode = nm->mkNode(REGEXP_EMPTY);
1164
2
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_EMPTY);
1165
  }
1166
374
  return node;
1167
}
1168
1169
11193
Node SequencesRewriter::rewriteMembership(TNode node)
1170
{
1171
11193
  NodeManager* nm = NodeManager::currentNM();
1172
22386
  Node x = node[0];
1173
22386
  Node r = node[1];
1174
1175
22386
  TypeNode stype = x.getType();
1176
22386
  TypeNode rtype = r.getType();
1177
1178
11193
  if(r.getKind() == kind::REGEXP_EMPTY)
1179
  {
1180
128
    Node retNode = NodeManager::currentNM()->mkConst(false);
1181
64
    return returnRewrite(node, retNode, Rewrite::RE_IN_EMPTY);
1182
  }
1183
11129
  else if (x.isConst() && RegExpEntail::isConstRegExp(r))
1184
  {
1185
    // test whether x in node[1]
1186
3466
    cvc5::String s = x.getConst<String>();
1187
1733
    bool test = RegExpEntail::testConstStringInRegExp(s, 0, r);
1188
3466
    Node retNode = NodeManager::currentNM()->mkConst(test);
1189
1733
    return returnRewrite(node, retNode, Rewrite::RE_IN_EVAL);
1190
  }
1191
9396
  else if (r.getKind() == kind::REGEXP_SIGMA)
1192
  {
1193
124
    Node one = nm->mkConst(Rational(1));
1194
124
    Node retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x));
1195
62
    return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA);
1196
  }
1197
9334
  else if (r.getKind() == kind::REGEXP_STAR)
1198
  {
1199
3063
    if (x.isConst())
1200
    {
1201
40
      size_t xlen = Word::getLength(x);
1202
40
      if (xlen == 0)
1203
      {
1204
36
        Node retNode = nm->mkConst(true);
1205
        // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true
1206
18
        return returnRewrite(node, retNode, Rewrite::RE_EMPTY_IN_STR_STAR);
1207
      }
1208
22
      else if (xlen == 1)
1209
      {
1210
22
        if (r[0].getKind() == STRING_TO_REGEXP)
1211
        {
1212
4
          Node retNode = r[0][0].eqNode(x);
1213
          // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x
1214
2
          return returnRewrite(node, retNode, Rewrite::RE_CHAR_IN_STR_STAR);
1215
        }
1216
      }
1217
    }
1218
3023
    else if (x.getKind() == STRING_CONCAT)
1219
    {
1220
      // (str.in.re (str.++ x1 ... xn) (re.* R)) -->
1221
      //   (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R))
1222
      //     if the length of all strings in R is one.
1223
1068
      Node flr = RegExpEntail::getFixedLengthForRegexp(r[0]);
1224
620
      if (!flr.isNull())
1225
      {
1226
232
        Node one = nm->mkConst(Rational(1));
1227
202
        if (flr == one)
1228
        {
1229
344
          NodeBuilder nb(AND);
1230
606
          for (const Node& xc : x)
1231
          {
1232
434
            nb << nm->mkNode(STRING_IN_REGEXP, xc, r);
1233
          }
1234
          return returnRewrite(
1235
172
              node, nb.constructNode(), Rewrite::RE_IN_DIST_CHAR_STAR);
1236
        }
1237
      }
1238
    }
1239
2871
    if (r[0].getKind() == kind::REGEXP_SIGMA)
1240
    {
1241
202
      Node retNode = NodeManager::currentNM()->mkConst(true);
1242
101
      return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA_STAR);
1243
    }
1244
  }
1245
6271
  else if (r.getKind() == kind::REGEXP_CONCAT)
1246
  {
1247
4367
    bool allSigma = true;
1248
4367
    bool allSigmaStrict = true;
1249
4367
    unsigned allSigmaMinSize = 0;
1250
8561
    Node constStr;
1251
4367
    size_t constIdx = 0;
1252
4367
    size_t nchildren = r.getNumChildren();
1253
9988
    for (size_t i = 0; i < nchildren; i++)
1254
    {
1255
14895
      Node rc = r[i];
1256
9274
      Assert(rc.getKind() != kind::REGEXP_EMPTY);
1257
9274
      if (rc.getKind() == kind::REGEXP_SIGMA)
1258
      {
1259
964
        allSigmaMinSize++;
1260
      }
1261
8310
      else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA)
1262
      {
1263
2096
        allSigmaStrict = false;
1264
      }
1265
6214
      else if (rc.getKind() == STRING_TO_REGEXP)
1266
      {
1267
3516
        if (constStr.isNull())
1268
        {
1269
2561
          constStr = rc[0];
1270
2561
          constIdx = i;
1271
        }
1272
        else
1273
        {
1274
955
          allSigma = false;
1275
955
          break;
1276
        }
1277
      }
1278
      else
1279
      {
1280
2698
        allSigma = false;
1281
2698
        break;
1282
      }
1283
    }
1284
4367
    if (allSigma)
1285
    {
1286
714
      if (constStr.isNull())
1287
      {
1288
        // x in re.++(_*, _, _) ---> str.len(x) >= 2
1289
40
        Node num = nm->mkConst(Rational(allSigmaMinSize));
1290
40
        Node lenx = nm->mkNode(STRING_LENGTH, x);
1291
40
        Node retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num);
1292
20
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_PURE_ALLCHAR);
1293
      }
1294
694
      else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0
1295
153
               && constIdx != nchildren - 1)
1296
      {
1297
        // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc")
1298
306
        Node retNode = nm->mkNode(STRING_CONTAINS, x, constStr);
1299
153
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_TO_CONTAINS);
1300
      }
1301
    }
1302
  }
1303
3808
  else if (r.getKind() == kind::REGEXP_INTER
1304
1904
           || r.getKind() == kind::REGEXP_UNION)
1305
  {
1306
906
    std::vector<Node> mvec;
1307
1429
    for (unsigned i = 0; i < r.getNumChildren(); i++)
1308
    {
1309
976
      mvec.push_back(
1310
1952
          NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r[i]));
1311
    }
1312
    Node retNode = NodeManager::currentNM()->mkNode(
1313
906
        r.getKind() == kind::REGEXP_INTER ? kind::AND : kind::OR, mvec);
1314
453
    return returnRewrite(node, retNode, Rewrite::RE_IN_ANDOR);
1315
  }
1316
1451
  else if (r.getKind() == kind::STRING_TO_REGEXP)
1317
  {
1318
1724
    Node retNode = x.eqNode(r[0]);
1319
862
    return returnRewrite(node, retNode, Rewrite::RE_IN_CSTRING);
1320
  }
1321
589
  else if (r.getKind() == REGEXP_RANGE)
1322
  {
1323
    // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j
1324
    // we do not do this if the arguments are not constant
1325
354
    if (RegExpEntail::isConstRegExp(r))
1326
    {
1327
708
      Node xcode = nm->mkNode(STRING_TO_CODE, x);
1328
      Node retNode =
1329
          nm->mkNode(AND,
1330
708
                     nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode),
1331
1416
                     nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1])));
1332
354
      return returnRewrite(node, retNode, Rewrite::RE_IN_RANGE);
1333
    }
1334
  }
1335
235
  else if (r.getKind() == REGEXP_COMPLEMENT)
1336
  {
1337
410
    Node retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate();
1338
205
    return returnRewrite(node, retNode, Rewrite::RE_IN_COMPLEMENT);
1339
  }
1340
1341
  // do simple consumes
1342
13988
  Node retNode = node;
1343
6994
  if (r.getKind() == kind::REGEXP_STAR)
1344
  {
1345
8180
    for (unsigned dir = 0; dir <= 1; dir++)
1346
    {
1347
10944
      std::vector<Node> mchildren;
1348
5534
      utils::getConcat(x, mchildren);
1349
5534
      bool success = true;
1350
16806
      while (success)
1351
      {
1352
5648
        success = false;
1353
11284
        std::vector<Node> children;
1354
5648
        utils::getConcat(r[0], children);
1355
11284
        Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children, dir);
1356
5648
        if (!scn.isNull())
1357
        {
1358
24
          Trace("regexp-ext-rewrite")
1359
12
              << "Regexp star : const conflict : " << node << std::endl;
1360
12
          return returnRewrite(node, scn, Rewrite::RE_CONSUME_S_CCONF);
1361
        }
1362
5636
        else if (children.empty())
1363
        {
1364
          // fully consumed one copy of the STAR
1365
122
          if (mchildren.empty())
1366
          {
1367
            Trace("regexp-ext-rewrite")
1368
                << "Regexp star : full consume : " << node << std::endl;
1369
            Node ret = NodeManager::currentNM()->mkConst(true);
1370
            return returnRewrite(node, ret, Rewrite::RE_CONSUME_S_FULL);
1371
          }
1372
          else
1373
          {
1374
244
            Node prev = retNode;
1375
366
            retNode = nm->mkNode(
1376
244
                STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r);
1377
            // Iterate again if the node changed. It may not have changed if
1378
            // nothing was consumed from mchildren (e.g. if the body of the
1379
            // re.* accepts the empty string.
1380
122
            success = (retNode != prev);
1381
          }
1382
        }
1383
      }
1384
5522
      if (retNode != node)
1385
      {
1386
224
        Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node
1387
112
                                    << " -> " << retNode << std::endl;
1388
112
        return returnRewrite(node, retNode, Rewrite::RE_CONSUME_S);
1389
      }
1390
    }
1391
  }
1392
  else
1393
  {
1394
7826
    std::vector<Node> children;
1395
4224
    utils::getConcat(r, children);
1396
7826
    std::vector<Node> mchildren;
1397
4224
    utils::getConcat(x, mchildren);
1398
4224
    unsigned prevSize = children.size() + mchildren.size();
1399
7826
    Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children);
1400
4224
    if (!scn.isNull())
1401
    {
1402
250
      Trace("regexp-ext-rewrite")
1403
125
          << "Regexp : const conflict : " << node << std::endl;
1404
125
      return returnRewrite(node, scn, Rewrite::RE_CONSUME_CCONF);
1405
    }
1406
4099
    else if ((children.size() + mchildren.size()) != prevSize)
1407
    {
1408
      // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm),
1409
      // above, we strip components to construct an equivalent membership:
1410
      // (str.++ xi .. xj) in (re.++ rk ... rl).
1411
994
      Node xn = utils::mkConcat(mchildren, stype);
1412
994
      Node emptyStr = Word::mkEmptyWord(stype);
1413
497
      if (children.empty())
1414
      {
1415
        // If we stripped all components on the right, then the left is
1416
        // equal to the empty string.
1417
        // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "")
1418
52
        retNode = xn.eqNode(emptyStr);
1419
      }
1420
      else
1421
      {
1422
        // otherwise, construct the updated regular expression
1423
445
        retNode =
1424
890
            nm->mkNode(STRING_IN_REGEXP, xn, utils::mkConcat(children, rtype));
1425
      }
1426
994
      Trace("regexp-ext-rewrite")
1427
497
          << "Regexp : rewrite : " << node << " -> " << retNode << std::endl;
1428
497
      return returnRewrite(node, retNode, Rewrite::RE_SIMPLE_CONSUME);
1429
    }
1430
  }
1431
6248
  return node;
1432
}
1433
1434
538377
RewriteResponse SequencesRewriter::postRewrite(TNode node)
1435
{
1436
1076754
  Trace("sequences-postrewrite")
1437
538377
      << "Strings::SequencesRewriter::postRewrite start " << node << std::endl;
1438
1076754
  Node retNode = node;
1439
538377
  Kind nk = node.getKind();
1440
538377
  if (nk == kind::STRING_CONCAT)
1441
  {
1442
61595
    retNode = rewriteConcat(node);
1443
  }
1444
476782
  else if (nk == kind::EQUAL)
1445
  {
1446
126057
    retNode = rewriteEquality(node);
1447
  }
1448
350725
  else if (nk == kind::STRING_LENGTH)
1449
  {
1450
219293
    retNode = rewriteLength(node);
1451
  }
1452
131432
  else if (nk == kind::STRING_CHARAT)
1453
  {
1454
111
    retNode = rewriteCharAt(node);
1455
  }
1456
131321
  else if (nk == kind::STRING_SUBSTR)
1457
  {
1458
46805
    retNode = rewriteSubstr(node);
1459
  }
1460
84516
  else if (nk == kind::STRING_UPDATE)
1461
  {
1462
174
    retNode = rewriteUpdate(node);
1463
  }
1464
84342
  else if (nk == kind::STRING_CONTAINS)
1465
  {
1466
26223
    retNode = rewriteContains(node);
1467
  }
1468
58119
  else if (nk == kind::STRING_INDEXOF)
1469
  {
1470
5999
    retNode = rewriteIndexof(node);
1471
  }
1472
52120
  else if (nk == kind::STRING_INDEXOF_RE)
1473
  {
1474
929
    retNode = rewriteIndexofRe(node);
1475
  }
1476
51191
  else if (nk == kind::STRING_REPLACE)
1477
  {
1478
4083
    retNode = rewriteReplace(node);
1479
  }
1480
47108
  else if (nk == kind::STRING_REPLACE_ALL)
1481
  {
1482
194
    retNode = rewriteReplaceAll(node);
1483
  }
1484
46914
  else if (nk == kind::STRING_REPLACE_RE)
1485
  {
1486
397
    retNode = rewriteReplaceRe(node);
1487
  }
1488
46517
  else if (nk == kind::STRING_REPLACE_RE_ALL)
1489
  {
1490
227
    retNode = rewriteReplaceReAll(node);
1491
  }
1492
46290
  else if (nk == STRING_REV)
1493
  {
1494
241
    retNode = rewriteStrReverse(node);
1495
  }
1496
46049
  else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX)
1497
  {
1498
168
    retNode = rewritePrefixSuffix(node);
1499
  }
1500
45881
  else if (nk == kind::STRING_IN_REGEXP)
1501
  {
1502
11193
    retNode = rewriteMembership(node);
1503
  }
1504
34688
  else if (nk == REGEXP_CONCAT)
1505
  {
1506
2864
    retNode = rewriteConcatRegExp(node);
1507
  }
1508
31824
  else if (nk == REGEXP_UNION || nk == REGEXP_INTER)
1509
  {
1510
1080
    retNode = rewriteAndOrRegExp(node);
1511
  }
1512
30744
  else if (nk == REGEXP_DIFF)
1513
  {
1514
55
    retNode = rewriteDifferenceRegExp(node);
1515
  }
1516
30689
  else if (nk == REGEXP_STAR)
1517
  {
1518
1170
    retNode = rewriteStarRegExp(node);
1519
  }
1520
29519
  else if (nk == REGEXP_PLUS)
1521
  {
1522
45
    retNode = rewritePlusRegExp(node);
1523
  }
1524
29474
  else if (nk == REGEXP_OPT)
1525
  {
1526
31
    retNode = rewriteOptionRegExp(node);
1527
  }
1528
29443
  else if (nk == REGEXP_RANGE)
1529
  {
1530
376
    retNode = rewriteRangeRegExp(node);
1531
  }
1532
29067
  else if (nk == REGEXP_LOOP)
1533
  {
1534
24
    retNode = rewriteLoopRegExp(node);
1535
  }
1536
29043
  else if (nk == REGEXP_REPEAT)
1537
  {
1538
4
    retNode = rewriteRepeatRegExp(node);
1539
  }
1540
29039
  else if (nk == SEQ_UNIT)
1541
  {
1542
399
    retNode = rewriteSeqUnit(node);
1543
  }
1544
28640
  else if (nk == SEQ_NTH || nk == SEQ_NTH_TOTAL)
1545
  {
1546
388
    retNode = rewriteSeqNth(node);
1547
  }
1548
1549
1076754
  Trace("sequences-postrewrite")
1550
538377
      << "Strings::SequencesRewriter::postRewrite returning " << retNode
1551
538377
      << std::endl;
1552
538377
  if (node != retNode)
1553
  {
1554
    // also post process the rewrite, which may apply extended rewriting to
1555
    // equalities, if we rewrite to an equality from a non-equality
1556
91547
    retNode = postProcessRewrite(node, retNode);
1557
183094
    Trace("strings-rewrite-debug") << "Strings::SequencesRewriter::postRewrite "
1558
91547
                                   << node << " to " << retNode << std::endl;
1559
91547
    return RewriteResponse(REWRITE_AGAIN_FULL, retNode);
1560
  }
1561
446830
  Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
1562
446830
  return RewriteResponse(REWRITE_DONE, retNode);
1563
}
1564
1565
328544
RewriteResponse SequencesRewriter::preRewrite(TNode node)
1566
{
1567
328544
  return RewriteResponse(REWRITE_DONE, node);
1568
}
1569
1570
687
TrustNode SequencesRewriter::expandDefinition(Node node)
1571
{
1572
1374
  Trace("strings-exp-def") << "SequencesRewriter::expandDefinition : " << node
1573
687
                           << std::endl;
1574
1575
687
  if (node.getKind() == kind::SEQ_NTH)
1576
  {
1577
2
    NodeManager* nm = NodeManager::currentNM();
1578
4
    Node s = node[0];
1579
4
    Node n = node[1];
1580
    // seq.nth(s, n) --> ite(0 <= n < len(s), seq.nth_total(s,n), Uf(s, n))
1581
    Node cond = nm->mkNode(AND,
1582
4
                           nm->mkNode(LEQ, nm->mkConst(Rational(0)), n),
1583
8
                           nm->mkNode(LT, n, nm->mkNode(STRING_LENGTH, s)));
1584
4
    Node ss = nm->mkNode(SEQ_NTH_TOTAL, s, n);
1585
4
    Node uf = SkolemCache::mkSkolemSeqNth(s.getType(), "Uf");
1586
4
    Node u = nm->mkNode(APPLY_UF, uf, s, n);
1587
4
    Node ret = nm->mkNode(ITE, cond, ss, u);
1588
2
    Trace("strings-exp-def") << "...return " << ret << std::endl;
1589
2
    return TrustNode::mkTrustRewrite(node, ret, nullptr);
1590
  }
1591
685
  return TrustNode::null();
1592
}
1593
1594
388
Node SequencesRewriter::rewriteSeqNth(Node node)
1595
{
1596
388
  Assert(node.getKind() == SEQ_NTH || node.getKind() == SEQ_NTH_TOTAL);
1597
776
  Node s = node[0];
1598
776
  Node i = node[1];
1599
388
  if (s.isConst() && i.isConst())
1600
  {
1601
28
    size_t len = Word::getLength(s);
1602
28
    if (i.getConst<Rational>().sgn() != -1)
1603
    {
1604
28
      size_t pos = i.getConst<Rational>().getNumerator().toUnsignedInt();
1605
28
      if (pos < len)
1606
      {
1607
20
        std::vector<Node> elements = s.getConst<Sequence>().getVec();
1608
10
        const Node& ret = elements[pos];
1609
10
        return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL);
1610
      }
1611
    }
1612
18
    if (node.getKind() == SEQ_NTH_TOTAL)
1613
    {
1614
      // return arbitrary term
1615
4
      Node ret = s.getType().getSequenceElementType().mkGroundValue();
1616
2
      return returnRewrite(node, ret, Rewrite::SEQ_NTH_TOTAL_OOB);
1617
    }
1618
    else
1619
    {
1620
16
      return node;
1621
    }
1622
  }
1623
  else
1624
  {
1625
360
    return node;
1626
  }
1627
}
1628
1629
111
Node SequencesRewriter::rewriteCharAt(Node node)
1630
{
1631
111
  Assert(node.getKind() == STRING_CHARAT);
1632
111
  NodeManager* nm = NodeManager::currentNM();
1633
222
  Node one = nm->mkConst(Rational(1));
1634
222
  Node retNode = nm->mkNode(STRING_SUBSTR, node[0], node[1], one);
1635
222
  return returnRewrite(node, retNode, Rewrite::CHARAT_ELIM);
1636
}
1637
1638
46817
Node SequencesRewriter::rewriteSubstr(Node node)
1639
{
1640
46817
  Assert(node.getKind() == kind::STRING_SUBSTR);
1641
1642
46817
  NodeManager* nm = NodeManager::currentNM();
1643
46817
  if (node[0].isConst())
1644
  {
1645
10886
    if (Word::isEmpty(node[0]))
1646
    {
1647
1892
      Node ret = node[0];
1648
946
      return returnRewrite(node, ret, Rewrite::SS_EMPTYSTR);
1649
    }
1650
    // rewriting for constant arguments
1651
9940
    if (node[1].isConst() && node[2].isConst())
1652
    {
1653
9908
      Node s = node[0];
1654
9908
      cvc5::Rational rMaxInt(String::maxSize());
1655
      uint32_t start;
1656
4954
      if (node[1].getConst<Rational>() > rMaxInt)
1657
      {
1658
        // start beyond the maximum size of strings
1659
        // thus, it must be beyond the end point of this string
1660
8
        Node ret = Word::mkEmptyWord(node.getType());
1661
4
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_MAX_OOB);
1662
      }
1663
4950
      else if (node[1].getConst<Rational>().sgn() < 0)
1664
      {
1665
        // start before the beginning of the string
1666
1052
        Node ret = Word::mkEmptyWord(node.getType());
1667
526
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_NEG);
1668
      }
1669
      else
1670
      {
1671
4424
        start = node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1672
4424
        if (start >= Word::getLength(node[0]))
1673
        {
1674
          // start beyond the end of the string
1675
766
          Node ret = Word::mkEmptyWord(node.getType());
1676
383
          return returnRewrite(node, ret, Rewrite::SS_CONST_START_OOB);
1677
        }
1678
      }
1679
4041
      if (node[2].getConst<Rational>() > rMaxInt)
1680
      {
1681
        // take up to the end of the string
1682
1
        size_t lenS = Word::getLength(s);
1683
2
        Node ret = Word::suffix(s, lenS - start);
1684
1
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_MAX_OOB);
1685
      }
1686
4040
      else if (node[2].getConst<Rational>().sgn() <= 0)
1687
      {
1688
720
        Node ret = Word::mkEmptyWord(node.getType());
1689
360
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_NON_POS);
1690
      }
1691
      else
1692
      {
1693
        uint32_t len =
1694
3680
            node[2].getConst<Rational>().getNumerator().toUnsignedInt();
1695
3680
        if (start + len > Word::getLength(node[0]))
1696
        {
1697
          // take up to the end of the string
1698
327
          size_t lenS = Word::getLength(s);
1699
654
          Node ret = Word::suffix(s, lenS - start);
1700
327
          return returnRewrite(node, ret, Rewrite::SS_CONST_END_OOB);
1701
        }
1702
        else
1703
        {
1704
          // compute the substr using the constant string
1705
6706
          Node ret = Word::substr(s, start, len);
1706
3353
          return returnRewrite(node, ret, Rewrite::SS_CONST_SS);
1707
        }
1708
      }
1709
    }
1710
  }
1711
81834
  Node zero = nm->mkConst(cvc5::Rational(0));
1712
1713
  // if entailed non-positive length or negative start point
1714
40917
  if (ArithEntail::check(zero, node[1], true))
1715
  {
1716
66
    Node ret = Word::mkEmptyWord(node.getType());
1717
33
    return returnRewrite(node, ret, Rewrite::SS_START_NEG);
1718
  }
1719
40884
  else if (ArithEntail::check(zero, node[2]))
1720
  {
1721
1614
    Node ret = Word::mkEmptyWord(node.getType());
1722
807
    return returnRewrite(node, ret, Rewrite::SS_LEN_NON_POS);
1723
  }
1724
1725
40077
  if (node[0].getKind() == STRING_SUBSTR)
1726
  {
1727
    // (str.substr (str.substr x a b) c d) ---> "" if c >= b
1728
    //
1729
    // Note that this rewrite can be generalized to:
1730
    //
1731
    // (str.substr x a b) ---> "" if a >= (str.len x)
1732
    //
1733
    // This can be done when we generalize our entailment methods to
1734
    // accept an optional context. Then we could conjecture that
1735
    // (str.substr x a b) rewrites to "" and do a case analysis:
1736
    //
1737
    // - a < 0 or b < 0 (the result is trivially empty in these cases)
1738
    // - a >= (str.len x) assuming that { a >= 0, b >= 0 }
1739
    //
1740
    // For example, for (str.substr (str.substr x a a) a a), we could
1741
    // then deduce that under those assumptions, "a" is an
1742
    // over-approximation of the length of (str.substr x a a), which
1743
    // then allows us to reason that the result of the whole term must
1744
    // be empty.
1745
3965
    if (ArithEntail::check(node[1], node[0][2]))
1746
    {
1747
76
      Node ret = Word::mkEmptyWord(node.getType());
1748
38
      return returnRewrite(node, ret, Rewrite::SS_START_GEQ_LEN);
1749
    }
1750
  }
1751
36112
  else if (node[0].getKind() == STRING_REPLACE)
1752
  {
1753
    // (str.substr (str.replace x y z) 0 n)
1754
    // 	 ---> (str.replace (str.substr x 0 n) y z)
1755
    // if (str.len y) = 1 and (str.len z) = 1
1756
546
    if (node[1] == zero)
1757
    {
1758
840
      if (StringsEntail::checkLengthOne(node[0][1], true)
1759
840
          && StringsEntail::checkLengthOne(node[0][2], true))
1760
      {
1761
        Node ret = nm->mkNode(
1762
            kind::STRING_REPLACE,
1763
8
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]),
1764
            node[0][1],
1765
16
            node[0][2]);
1766
4
        return returnRewrite(node, ret, Rewrite::SUBSTR_REPL_SWAP);
1767
      }
1768
    }
1769
  }
1770
1771
80070
  std::vector<Node> n1;
1772
40035
  utils::getConcat(node[0], n1);
1773
80070
  TypeNode stype = node.getType();
1774
1775
  // definite inclusion
1776
40035
  if (node[1] == zero)
1777
  {
1778
28728
    Node curr = node[2];
1779
28728
    std::vector<Node> childrenr;
1780
14717
    if (StringsEntail::stripSymbolicLength(n1, childrenr, 1, curr))
1781
    {
1782
706
      if (curr != zero && !n1.empty())
1783
      {
1784
579
        childrenr.push_back(nm->mkNode(
1785
386
            kind::STRING_SUBSTR, utils::mkConcat(n1, stype), node[1], curr));
1786
      }
1787
1412
      Node ret = utils::mkConcat(childrenr, stype);
1788
706
      return returnRewrite(node, ret, Rewrite::SS_LEN_INCLUDE);
1789
    }
1790
  }
1791
1792
  // symbolic length analysis
1793
114839
  for (unsigned r = 0; r < 2; r++)
1794
  {
1795
    // the amount of characters we can strip
1796
152722
    Node curr;
1797
77212
    if (r == 0)
1798
    {
1799
39329
      if (node[1] != zero)
1800
      {
1801
        // strip up to start point off the start of the string
1802
25318
        curr = node[1];
1803
      }
1804
    }
1805
37883
    else if (r == 1)
1806
    {
1807
      Node tot_len =
1808
75703
          Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, node[0]));
1809
75703
      Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2]));
1810
37883
      if (node[2] != tot_len)
1811
      {
1812
36167
        if (ArithEntail::check(node[2], tot_len))
1813
        {
1814
          // end point beyond end point of string, map to tot_len
1815
76
          Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len);
1816
38
          return returnRewrite(node, ret, Rewrite::SS_END_PT_NORM);
1817
        }
1818
        else
1819
        {
1820
          // strip up to ( str.len(node[0]) - end_pt ) off the end of the string
1821
36129
          curr = Rewriter::rewrite(nm->mkNode(kind::MINUS, tot_len, end_pt));
1822
        }
1823
      }
1824
1825
      // (str.substr s x y) --> "" if x < len(s) |= 0 >= y
1826
      Node n1_lt_tot_len =
1827
75665
          Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len));
1828
37845
      if (ArithEntail::checkWithAssumption(n1_lt_tot_len, zero, node[2], false))
1829
      {
1830
36
        Node ret = Word::mkEmptyWord(node.getType());
1831
18
        return returnRewrite(node, ret, Rewrite::SS_START_ENTAILS_ZERO_LEN);
1832
      }
1833
1834
      // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s)
1835
      Node non_zero_len =
1836
75647
          Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2]));
1837
37827
      if (ArithEntail::checkWithAssumption(
1838
              non_zero_len, node[1], tot_len, false))
1839
      {
1840
4
        Node ret = Word::mkEmptyWord(node.getType());
1841
2
        return returnRewrite(node, ret, Rewrite::SS_NON_ZERO_LEN_ENTAILS_OOB);
1842
      }
1843
1844
      // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s)
1845
      Node geq_zero_start =
1846
75645
          Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero));
1847
37825
      if (ArithEntail::checkWithAssumption(
1848
              geq_zero_start, zero, tot_len, false))
1849
      {
1850
4
        Node ret = Word::mkEmptyWord(node.getType());
1851
        return returnRewrite(
1852
2
            node, ret, Rewrite::SS_GEQ_ZERO_START_ENTAILS_EMP_S);
1853
      }
1854
1855
      // (str.substr s x x) ---> "" if (str.len s) <= 1
1856
37823
      if (node[1] == node[2] && StringsEntail::checkLengthOne(node[0]))
1857
      {
1858
6
        Node ret = Word::mkEmptyWord(node.getType());
1859
3
        return returnRewrite(node, ret, Rewrite::SS_LEN_ONE_Z_Z);
1860
      }
1861
    }
1862
77149
    if (!curr.isNull())
1863
    {
1864
      // strip off components while quantity is entailed positive
1865
61422
      int dir = r == 0 ? 1 : -1;
1866
121205
      std::vector<Node> childrenr;
1867
61422
      if (StringsEntail::stripSymbolicLength(n1, childrenr, dir, curr))
1868
      {
1869
1639
        if (r == 0)
1870
        {
1871
          Node ret = nm->mkNode(
1872
2892
              kind::STRING_SUBSTR, utils::mkConcat(n1, stype), curr, node[2]);
1873
1446
          return returnRewrite(node, ret, Rewrite::SS_STRIP_START_PT);
1874
        }
1875
        else
1876
        {
1877
          Node ret = nm->mkNode(kind::STRING_SUBSTR,
1878
386
                                utils::mkConcat(n1, stype),
1879
                                node[1],
1880
772
                                node[2]);
1881
193
          return returnRewrite(node, ret, Rewrite::SS_STRIP_END_PT);
1882
        }
1883
      }
1884
    }
1885
  }
1886
  // combine substr
1887
37627
  if (node[0].getKind() == kind::STRING_SUBSTR)
1888
  {
1889
6953
    Node start_inner = node[0][1];
1890
6953
    Node start_outer = node[1];
1891
3835
    if (ArithEntail::check(start_outer) && ArithEntail::check(start_inner))
1892
    {
1893
      // both are positive
1894
      // thus, start point is definitely start_inner+start_outer.
1895
      // We can rewrite if it for certain what the length is
1896
1897
      // the length of a string from the inner substr subtracts the start point
1898
      // of the outer substr
1899
      Node len_from_inner =
1900
4433
          Rewriter::rewrite(nm->mkNode(kind::MINUS, node[0][2], start_outer));
1901
4433
      Node len_from_outer = node[2];
1902
4433
      Node new_len;
1903
      // take quantity that is for sure smaller than the other
1904
2575
      if (len_from_inner == len_from_outer)
1905
      {
1906
9
        new_len = len_from_inner;
1907
      }
1908
2566
      else if (ArithEntail::check(len_from_inner, len_from_outer))
1909
      {
1910
516
        new_len = len_from_outer;
1911
      }
1912
2050
      else if (ArithEntail::check(len_from_outer, len_from_inner))
1913
      {
1914
192
        new_len = len_from_inner;
1915
      }
1916
2575
      if (!new_len.isNull())
1917
      {
1918
1434
        Node new_start = nm->mkNode(kind::PLUS, start_inner, start_outer);
1919
        Node ret =
1920
1434
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], new_start, new_len);
1921
717
        return returnRewrite(node, ret, Rewrite::SS_COMBINE);
1922
      }
1923
    }
1924
  }
1925
36910
  return node;
1926
}
1927
1928
174
Node SequencesRewriter::rewriteUpdate(Node node)
1929
{
1930
174
  Assert(node.getKind() == kind::STRING_UPDATE);
1931
348
  Node s = node[0];
1932
174
  if (s.isConst())
1933
  {
1934
162
    if (Word::isEmpty(s))
1935
    {
1936
      return returnRewrite(node, s, Rewrite::UPD_EMPTYSTR);
1937
    }
1938
    // rewriting for constant arguments
1939
162
    if (node[1].isConst())
1940
    {
1941
142
      cvc5::Rational rMaxInt(String::maxSize());
1942
122
      if (node[1].getConst<Rational>() > rMaxInt)
1943
      {
1944
        // start beyond the maximum size of strings
1945
        // thus, it must be beyond the end point of this string
1946
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_MAX_OOB);
1947
      }
1948
122
      else if (node[1].getConst<Rational>().sgn() < 0)
1949
      {
1950
        // start before the beginning of the string
1951
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_NEG);
1952
      }
1953
      uint32_t start =
1954
122
          node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1955
122
      size_t len = Word::getLength(s);
1956
122
      if (start >= len)
1957
      {
1958
        // start beyond the end of the string
1959
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_OOB);
1960
      }
1961
122
      if (node[2].isConst())
1962
      {
1963
204
        Node ret = Word::update(s, start, node[2]);
1964
102
        return returnRewrite(node, ret, Rewrite::UPD_EVAL);
1965
      }
1966
    }
1967
  }
1968
1969
72
  return node;
1970
}
1971
1972
111825
Node SequencesRewriter::rewriteContains(Node node)
1973
{
1974
111825
  Assert(node.getKind() == kind::STRING_CONTAINS);
1975
111825
  NodeManager* nm = NodeManager::currentNM();
1976
1977
111825
  if (node[0] == node[1])
1978
  {
1979
1360
    Node ret = NodeManager::currentNM()->mkConst(true);
1980
680
    return returnRewrite(node, ret, Rewrite::CTN_EQ);
1981
  }
1982
111145
  if (node[0].isConst())
1983
  {
1984
56648
    if (node[1].isConst())
1985
    {
1986
9628
      Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos);
1987
4814
      return returnRewrite(node, ret, Rewrite::CTN_CONST);
1988
    }
1989
    else
1990
    {
1991
94860
      Node t = node[1];
1992
51834
      if (Word::isEmpty(node[0]))
1993
      {
1994
        Node len1 =
1995
38045
            NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
1996
19143
        if (ArithEntail::check(len1, true))
1997
        {
1998
          // we handle the false case here since the rewrite for equality
1999
          // uses this function, hence we want to conclude false if possible.
2000
          // len(x)>0 => contains( "", x ) ---> false
2001
482
          Node ret = NodeManager::currentNM()->mkConst(false);
2002
241
          return returnRewrite(node, ret, Rewrite::CTN_LHS_EMPTYSTR);
2003
        }
2004
      }
2005
32691
      else if (StringsEntail::checkLengthOne(t))
2006
      {
2007
17084
        std::vector<Node> vec = Word::getChars(node[0]);
2008
17084
        Node emp = Word::mkEmptyWord(t.getType());
2009
17084
        NodeBuilder nb(OR);
2010
8542
        nb << emp.eqNode(t);
2011
18430
        for (const Node& c : vec)
2012
        {
2013
9888
          Assert(c.getType() == t.getType());
2014
9888
          nb << c.eqNode(t);
2015
        }
2016
2017
        // str.contains("ABCabc", t) --->
2018
        // t = "" v t = "A" v t = "B" v t = "C" v t = "a" v t = "b" v t = "c"
2019
        // if len(t) <= 1
2020
17084
        Node ret = nb;
2021
8542
        return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2022
      }
2023
24149
      else if (node[1].getKind() == kind::STRING_CONCAT)
2024
      {
2025
        int firstc, lastc;
2026
724
        if (!StringsEntail::canConstantContainConcat(
2027
                node[0], node[1], firstc, lastc))
2028
        {
2029
50
          Node ret = NodeManager::currentNM()->mkConst(false);
2030
25
          return returnRewrite(node, ret, Rewrite::CTN_NCONST_CTN_CONCAT);
2031
        }
2032
      }
2033
    }
2034
  }
2035
97523
  if (node[1].isConst())
2036
  {
2037
48029
    size_t len = Word::getLength(node[1]);
2038
48029
    if (len == 0)
2039
    {
2040
      // contains( x, "" ) ---> true
2041
37644
      Node ret = NodeManager::currentNM()->mkConst(true);
2042
18822
      return returnRewrite(node, ret, Rewrite::CTN_RHS_EMPTYSTR);
2043
    }
2044
29207
    else if (len == 1)
2045
    {
2046
      // The following rewrites are specific to a single character second
2047
      // argument of contains, where we can reason that this character is
2048
      // not split over multiple components in the first argument.
2049
19065
      if (node[0].getKind() == STRING_CONCAT)
2050
      {
2051
1546
        std::vector<Node> nc1;
2052
773
        utils::getConcat(node[0], nc1);
2053
1546
        NodeBuilder nb(OR);
2054
2810
        for (const Node& ncc : nc1)
2055
        {
2056
2037
          nb << nm->mkNode(STRING_CONTAINS, ncc, node[1]);
2057
        }
2058
1546
        Node ret = nb.constructNode();
2059
        // str.contains( x ++ y, "A" ) --->
2060
        //   str.contains( x, "A" ) OR str.contains( y, "A" )
2061
773
        return returnRewrite(node, ret, Rewrite::CTN_CONCAT_CHAR);
2062
      }
2063
18292
      else if (node[0].getKind() == STRING_REPLACE)
2064
      {
2065
2967
        Node rplDomain = d_stringsEntail.checkContains(node[0][1], node[1]);
2066
1557
        if (!rplDomain.isNull() && !rplDomain.getConst<bool>())
2067
        {
2068
294
          Node d1 = nm->mkNode(STRING_CONTAINS, node[0][0], node[1]);
2069
          Node d2 =
2070
              nm->mkNode(AND,
2071
294
                         nm->mkNode(STRING_CONTAINS, node[0][0], node[0][1]),
2072
588
                         nm->mkNode(STRING_CONTAINS, node[0][2], node[1]));
2073
294
          Node ret = nm->mkNode(OR, d1, d2);
2074
          // If str.contains( y, "A" ) ---> false, then:
2075
          // str.contains( str.replace( x, y, z ), "A" ) --->
2076
          //   str.contains( x, "A" ) OR
2077
          //   ( str.contains( x, y ) AND str.contains( z, "A" ) )
2078
147
          return returnRewrite(node, ret, Rewrite::CTN_REPL_CHAR);
2079
        }
2080
      }
2081
    }
2082
  }
2083
155562
  std::vector<Node> nc1;
2084
77781
  utils::getConcat(node[0], nc1);
2085
155562
  std::vector<Node> nc2;
2086
77781
  utils::getConcat(node[1], nc2);
2087
2088
  // component-wise containment
2089
155562
  std::vector<Node> nc1rb;
2090
155562
  std::vector<Node> nc1re;
2091
77781
  if (d_stringsEntail.componentContains(nc1, nc2, nc1rb, nc1re) != -1)
2092
  {
2093
5240
    Node ret = NodeManager::currentNM()->mkConst(true);
2094
2620
    return returnRewrite(node, ret, Rewrite::CTN_COMPONENT);
2095
  }
2096
150322
  TypeNode stype = node[0].getType();
2097
2098
  // strip endpoints
2099
150322
  std::vector<Node> nb;
2100
150322
  std::vector<Node> ne;
2101
75161
  if (StringsEntail::stripConstantEndpoints(nc1, nc2, nb, ne))
2102
  {
2103
    Node ret = NodeManager::currentNM()->mkNode(
2104
274
        kind::STRING_CONTAINS, utils::mkConcat(nc1, stype), node[1]);
2105
137
    return returnRewrite(node, ret, Rewrite::CTN_STRIP_ENDPT);
2106
  }
2107
2108
152500
  for (const Node& n : nc2)
2109
  {
2110
78343
    if (n.getKind() == kind::STRING_REPLACE)
2111
    {
2112
      // (str.contains x (str.replace y z w)) --> false
2113
      // if (str.contains x y) = false and (str.contains x w) = false
2114
      //
2115
      // Reasoning: (str.contains x y) checks that x does not contain y if the
2116
      // replacement does not change y. (str.contains x w) checks that if the
2117
      // replacement changes anything in y, the w makes it impossible for it to
2118
      // occur in x.
2119
6565
      Node ctnConst = d_stringsEntail.checkContains(node[0], n[0]);
2120
3716
      if (!ctnConst.isNull() && !ctnConst.getConst<bool>())
2121
      {
2122
1392
        Node ctnConst2 = d_stringsEntail.checkContains(node[0], n[2]);
2123
714
        if (!ctnConst2.isNull() && !ctnConst2.getConst<bool>())
2124
        {
2125
72
          Node res = nm->mkConst(false);
2126
36
          return returnRewrite(node, res, Rewrite::CTN_RPL_NON_CTN);
2127
        }
2128
      }
2129
2130
      // (str.contains x (str.++ w (str.replace x y x) z)) --->
2131
      //   (and (= w "") (= x (str.replace x y x)) (= z ""))
2132
      //
2133
      // TODO: Remove with under-/over-approximation
2134
3680
      if (node[0] == n[0] && node[0] == n[2])
2135
      {
2136
1662
        Node ret;
2137
831
        if (nc2.size() > 1)
2138
        {
2139
80
          Node emp = Word::mkEmptyWord(stype);
2140
80
          NodeBuilder nb2(kind::AND);
2141
120
          for (const Node& n2 : nc2)
2142
          {
2143
80
            if (n2 == n)
2144
            {
2145
40
              nb2 << nm->mkNode(kind::EQUAL, node[0], node[1]);
2146
            }
2147
            else
2148
            {
2149
40
              nb2 << nm->mkNode(kind::EQUAL, emp, n2);
2150
            }
2151
          }
2152
40
          ret = nb2.constructNode();
2153
        }
2154
        else
2155
        {
2156
791
          ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2157
        }
2158
831
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SELF);
2159
      }
2160
    }
2161
  }
2162
2163
  // length entailment
2164
148314
  Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2165
148314
  Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2166
74157
  if (ArithEntail::check(len_n2, len_n1, true))
2167
  {
2168
    // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false
2169
274
    Node ret = NodeManager::currentNM()->mkConst(false);
2170
137
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ);
2171
  }
2172
2173
  // multi-set reasoning
2174
  //   For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false
2175
  //   since the number of a's in the second argument is greater than the number
2176
  //   of a's in the first argument
2177
74020
  if (StringsEntail::checkMultisetSubset(node[0], node[1]))
2178
  {
2179
36
    Node ret = nm->mkConst(false);
2180
18
    return returnRewrite(node, ret, Rewrite::CTN_MSET_NSS);
2181
  }
2182
2183
74002
  if (ArithEntail::check(len_n2, len_n1, false))
2184
  {
2185
    // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2
2186
59696
    Node ret = node[0].eqNode(node[1]);
2187
29848
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ_NSTRICT);
2188
  }
2189
2190
  // splitting
2191
44154
  if (node[0].getKind() == kind::STRING_CONCAT)
2192
  {
2193
2152
    if (node[1].isConst())
2194
    {
2195
1055
      Node t = node[1];
2196
      // Below, we are looking for a constant component of node[0]
2197
      // has no overlap with node[1], which means we can split.
2198
      // Notice that if the first or last components had no
2199
      // overlap, these would have been removed by strip
2200
      // constant endpoints above.
2201
      // Hence, we consider only the inner children.
2202
1214
      for (unsigned i = 1; i < (node[0].getNumChildren() - 1); i++)
2203
      {
2204
        // constant contains
2205
698
        if (node[0][i].isConst())
2206
        {
2207
          // if no overlap, we can split into disjunction
2208
29
          if (Word::noOverlapWith(node[0][i], node[1]))
2209
          {
2210
46
            std::vector<Node> nc0;
2211
23
            utils::getConcat(node[0], nc0);
2212
46
            std::vector<Node> spl[2];
2213
23
            spl[0].insert(spl[0].end(), nc0.begin(), nc0.begin() + i);
2214
23
            Assert(i < nc0.size() - 1);
2215
23
            spl[1].insert(spl[1].end(), nc0.begin() + i + 1, nc0.end());
2216
            Node ret = NodeManager::currentNM()->mkNode(
2217
                kind::OR,
2218
92
                NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS,
2219
46
                                                 utils::mkConcat(spl[0], stype),
2220
                                                 node[1]),
2221
92
                NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS,
2222
46
                                                 utils::mkConcat(spl[1], stype),
2223
115
                                                 node[1]));
2224
23
            return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2225
          }
2226
        }
2227
      }
2228
    }
2229
  }
2230
42002
  else if (node[0].getKind() == kind::STRING_SUBSTR)
2231
  {
2232
    // (str.contains (str.substr x n (str.len y)) y) --->
2233
    //   (= (str.substr x n (str.len y)) y)
2234
    //
2235
    // TODO: Remove with under-/over-approximation
2236
11284
    if (node[0][2] == nm->mkNode(kind::STRING_LENGTH, node[1]))
2237
    {
2238
32
      Node ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2239
16
      return returnRewrite(node, ret, Rewrite::CTN_SUBSTR);
2240
    }
2241
  }
2242
30718
  else if (node[0].getKind() == kind::STRING_REPLACE)
2243
  {
2244
900
    if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst())
2245
    {
2246
472
      if (Word::noOverlapWith(node[1], node[0][1])
2247
472
          && Word::noOverlapWith(node[1], node[0][2]))
2248
      {
2249
        // (str.contains (str.replace x c1 c2) c3) ---> (str.contains x c3)
2250
        // if there is no overlap between c1 and c3 and none between c2 and c3
2251
4
        Node ret = nm->mkNode(STRING_CONTAINS, node[0][0], node[1]);
2252
2
        return returnRewrite(node, ret, Rewrite::CTN_REPL_CNSTS_TO_CTN);
2253
      }
2254
    }
2255
2256
898
    if (node[0][0] == node[0][2])
2257
    {
2258
      // (str.contains (str.replace x y x) y) ---> (str.contains x y)
2259
106
      if (node[0][1] == node[1])
2260
      {
2261
18
        Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0][0], node[1]);
2262
9
        return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN);
2263
      }
2264
2265
      // (str.contains (str.replace x y x) z) ---> (str.contains x z)
2266
      // if (str.len z) <= 1
2267
97
      if (StringsEntail::checkLengthOne(node[1]))
2268
      {
2269
12
        Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0][0], node[1]);
2270
6
        return returnRewrite(node, ret, Rewrite::CTN_REPL_LEN_ONE_TO_CTN);
2271
      }
2272
    }
2273
2274
    // (str.contains (str.replace x y z) z) --->
2275
    //   (or (str.contains x y) (str.contains x z))
2276
883
    if (node[0][2] == node[1])
2277
    {
2278
      Node ret =
2279
          nm->mkNode(OR,
2280
292
                     nm->mkNode(STRING_CONTAINS, node[0][0], node[0][1]),
2281
584
                     nm->mkNode(STRING_CONTAINS, node[0][0], node[0][2]));
2282
146
      return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN_DISJ);
2283
    }
2284
2285
    // (str.contains (str.replace x y z) w) --->
2286
    //   (str.contains (str.replace x y "") w)
2287
    // if (str.contains z w) ---> false and (str.len w) = 1
2288
737
    if (StringsEntail::checkLengthOne(node[1]))
2289
    {
2290
432
      Node ctn = d_stringsEntail.checkContains(node[0][2], node[1]);
2291
242
      if (!ctn.isNull() && !ctn.getConst<bool>())
2292
      {
2293
104
        Node empty = Word::mkEmptyWord(stype);
2294
        Node ret = nm->mkNode(
2295
            kind::STRING_CONTAINS,
2296
104
            nm->mkNode(kind::STRING_REPLACE, node[0][0], node[0][1], empty),
2297
208
            node[1]);
2298
52
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SIMP_REPL);
2299
      }
2300
    }
2301
  }
2302
2303
43900
  if (node[1].getKind() == kind::STRING_REPLACE)
2304
  {
2305
    // (str.contains x (str.replace y x y)) --->
2306
    //   (str.contains x y)
2307
1041
    if (node[0] == node[1][1] && node[1][0] == node[1][2])
2308
    {
2309
8
      Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0], node[1][0]);
2310
4
      return returnRewrite(node, ret, Rewrite::CTN_REPL);
2311
    }
2312
2313
    // (str.contains x (str.replace "" x y)) --->
2314
    //   (= "" (str.replace "" x y))
2315
    //
2316
    // Note: Length-based reasoning is not sufficient to get this rewrite. We
2317
    // can neither show that str.len(str.replace("", x, y)) - str.len(x) >= 0
2318
    // nor str.len(x) - str.len(str.replace("", x, y)) >= 0
2319
2072
    Node emp = Word::mkEmptyWord(stype);
2320
1037
    if (node[0] == node[1][1] && node[1][0] == emp)
2321
    {
2322
4
      Node ret = nm->mkNode(kind::EQUAL, emp, node[1]);
2323
2
      return returnRewrite(node, ret, Rewrite::CTN_REPL_EMPTY);
2324
    }
2325
  }
2326
2327
43894
  return node;
2328
}
2329
2330
5999
Node SequencesRewriter::rewriteIndexof(Node node)
2331
{
2332
5999
  Assert(node.getKind() == kind::STRING_INDEXOF);
2333
5999
  NodeManager* nm = NodeManager::currentNM();
2334
2335
5999
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() < 0)
2336
  {
2337
    // z<0  implies  str.indexof( x, y, z ) --> -1
2338
356
    Node negone = nm->mkConst(Rational(-1));
2339
178
    return returnRewrite(node, negone, Rewrite::IDOF_NEG);
2340
  }
2341
2342
  // the string type
2343
11642
  TypeNode stype = node[0].getType();
2344
2345
  // evaluation and simple cases
2346
11642
  std::vector<Node> children0;
2347
5821
  utils::getConcat(node[0], children0);
2348
5821
  if (children0[0].isConst() && node[1].isConst() && node[2].isConst())
2349
  {
2350
1012
    cvc5::Rational rMaxInt(cvc5::String::maxSize());
2351
869
    if (node[2].getConst<Rational>() > rMaxInt)
2352
    {
2353
      // We know that, due to limitations on the size of string constants
2354
      // in our implementation, that accessing a position greater than
2355
      // rMaxInt is guaranteed to be out of bounds.
2356
      Node negone = nm->mkConst(Rational(-1));
2357
      return returnRewrite(node, negone, Rewrite::IDOF_MAX);
2358
    }
2359
869
    Assert(node[2].getConst<Rational>().sgn() >= 0);
2360
1012
    Node s = children0[0];
2361
1012
    Node t = node[1];
2362
    uint32_t start =
2363
869
        node[2].getConst<Rational>().getNumerator().toUnsignedInt();
2364
869
    std::size_t ret = Word::find(s, t, start);
2365
869
    if (ret != std::string::npos)
2366
    {
2367
632
      Node retv = nm->mkConst(Rational(static_cast<unsigned>(ret)));
2368
316
      return returnRewrite(node, retv, Rewrite::IDOF_FIND);
2369
    }
2370
553
    else if (children0.size() == 1)
2371
    {
2372
820
      Node negone = nm->mkConst(Rational(-1));
2373
410
      return returnRewrite(node, negone, Rewrite::IDOF_NFIND);
2374
    }
2375
  }
2376
2377
5095
  if (node[0] == node[1])
2378
  {
2379
92
    if (node[2].isConst())
2380
    {
2381
18
      if (node[2].getConst<Rational>().sgn() == 0)
2382
      {
2383
        // indexof( x, x, 0 ) --> 0
2384
36
        Node zero = nm->mkConst(Rational(0));
2385
18
        return returnRewrite(node, zero, Rewrite::IDOF_EQ_CST_START);
2386
      }
2387
    }
2388
74
    if (ArithEntail::check(node[2], true))
2389
    {
2390
      // y>0  implies  indexof( x, x, y ) --> -1
2391
      Node negone = nm->mkConst(Rational(-1));
2392
      return returnRewrite(node, negone, Rewrite::IDOF_EQ_NSTART);
2393
    }
2394
126
    Node emp = Word::mkEmptyWord(stype);
2395
74
    if (node[0] != emp)
2396
    {
2397
      // indexof( x, x, z ) ---> indexof( "", "", z )
2398
44
      Node ret = nm->mkNode(STRING_INDEXOF, emp, emp, node[2]);
2399
22
      return returnRewrite(node, ret, Rewrite::IDOF_EQ_NORM);
2400
    }
2401
  }
2402
2403
10110
  Node len0 = nm->mkNode(STRING_LENGTH, node[0]);
2404
10110
  Node len1 = nm->mkNode(STRING_LENGTH, node[1]);
2405
10110
  Node len0m2 = nm->mkNode(MINUS, len0, node[2]);
2406
2407
5055
  if (node[1].isConst())
2408
  {
2409
4479
    if (Word::isEmpty(node[1]))
2410
    {
2411
78
      if (ArithEntail::check(len0, node[2]) && ArithEntail::check(node[2]))
2412
      {
2413
        // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z
2414
10
        return returnRewrite(node, node[2], Rewrite::IDOF_EMP_IDOF);
2415
      }
2416
    }
2417
  }
2418
2419
5045
  if (ArithEntail::check(len1, len0m2, true))
2420
  {
2421
    // len(x)-z < len(y)  implies  indexof( x, y, z ) ----> -1
2422
16
    Node negone = nm->mkConst(Rational(-1));
2423
8
    return returnRewrite(node, negone, Rewrite::IDOF_LEN);
2424
  }
2425
2426
10074
  Node fstr = node[0];
2427
5037
  if (!node[2].isConst() || node[2].getConst<Rational>().sgn() != 0)
2428
  {
2429
1050
    fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0);
2430
1050
    fstr = Rewriter::rewrite(fstr);
2431
  }
2432
2433
10074
  Node cmp_conr = d_stringsEntail.checkContains(fstr, node[1]);
2434
10074
  Trace("strings-rewrite-debug") << "For " << node << ", check contains("
2435
5037
                                 << fstr << ", " << node[1] << ")" << std::endl;
2436
5037
  Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl;
2437
10074
  std::vector<Node> children1;
2438
5037
  utils::getConcat(node[1], children1);
2439
5037
  if (!cmp_conr.isNull())
2440
  {
2441
274
    if (cmp_conr.getConst<bool>())
2442
    {
2443
269
      if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2444
      {
2445
        // past the first position in node[0] that contains node[1], we can drop
2446
323
        std::vector<Node> nb;
2447
323
        std::vector<Node> ne;
2448
195
        int cc = d_stringsEntail.componentContains(
2449
195
            children0, children1, nb, ne, true, 1);
2450
195
        if (cc != -1 && !ne.empty())
2451
        {
2452
          // For example:
2453
          // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0)
2454
110
          Node nn = utils::mkConcat(children0, stype);
2455
110
          Node ret = nm->mkNode(kind::STRING_INDEXOF, nn, node[1], node[2]);
2456
55
          return returnRewrite(node, ret, Rewrite::IDOF_DEF_CTN);
2457
        }
2458
2459
        // Strip components from the beginning that are guaranteed not to match
2460
140
        if (StringsEntail::stripConstantEndpoints(
2461
                children0, children1, nb, ne, 1))
2462
        {
2463
          // str.indexof(str.++("AB", x, "C"), "C", 0) --->
2464
          // 2 + str.indexof(str.++(x, "C"), "C", 0)
2465
          Node ret = nm->mkNode(
2466
              kind::PLUS,
2467
24
              nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(nb, stype)),
2468
48
              nm->mkNode(kind::STRING_INDEXOF,
2469
24
                         utils::mkConcat(children0, stype),
2470
                         node[1],
2471
48
                         node[2]));
2472
12
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_CNST_ENDPTS);
2473
        }
2474
      }
2475
      // To show that the first argument definitely contains the second, the
2476
      // index must be a valid index in the first argument. This ensures that
2477
      // (str.indexof t "" n) is not rewritten to something other than -1 when n
2478
      // is beyond the length of t. This is not required for the above rewrites,
2479
      // which only apply when n=0.
2480
202
      if (ArithEntail::check(node[2]) && ArithEntail::check(len0, node[2]))
2481
      {
2482
        // strip symbolic length
2483
262
        Node new_len = node[2];
2484
262
        std::vector<Node> nr;
2485
134
        if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
2486
        {
2487
          // For example:
2488
          // z>=0 and z>str.len( x1 ) and str.contains( x2, y )-->true
2489
          // implies
2490
          // str.indexof( str.++( x1, x2 ), y, z ) --->
2491
          // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) )
2492
12
          Node nn = utils::mkConcat(children0, stype);
2493
          Node ret =
2494
              nm->mkNode(PLUS,
2495
12
                         nm->mkNode(MINUS, node[2], new_len),
2496
24
                         nm->mkNode(STRING_INDEXOF, nn, node[1], new_len));
2497
6
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_SYM_LEN);
2498
        }
2499
      }
2500
    }
2501
    else
2502
    {
2503
      // str.contains( x, y ) --> false  implies  str.indexof(x,y,z) --> -1
2504
10
      Node negone = nm->mkConst(Rational(-1));
2505
5
      return returnRewrite(node, negone, Rewrite::IDOF_NCTN);
2506
    }
2507
  }
2508
  else
2509
  {
2510
9495
    Node new_len = node[2];
2511
9495
    std::vector<Node> nr;
2512
4763
    if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
2513
    {
2514
      // Normalize the string before the start index.
2515
      //
2516
      // For example:
2517
      // str.indexof(str.++("ABCD", x), y, 3) --->
2518
      // str.indexof(str.++("AAAD", x), y, 3)
2519
123
      Node nodeNr = utils::mkConcat(nr, stype);
2520
123
      Node normNr = lengthPreserveRewrite(nodeNr);
2521
77
      if (normNr != nodeNr)
2522
      {
2523
62
        std::vector<Node> normNrChildren;
2524
31
        utils::getConcat(normNr, normNrChildren);
2525
62
        std::vector<Node> children(normNrChildren);
2526
31
        children.insert(children.end(), children0.begin(), children0.end());
2527
62
        Node nn = utils::mkConcat(children, stype);
2528
62
        Node res = nm->mkNode(kind::STRING_INDEXOF, nn, node[1], node[2]);
2529
31
        return returnRewrite(node, res, Rewrite::IDOF_NORM_PREFIX);
2530
      }
2531
    }
2532
  }
2533
2534
4928
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2535
  {
2536
7830
    std::vector<Node> cb;
2537
7830
    std::vector<Node> ce;
2538
3920
    if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce, -1))
2539
    {
2540
20
      Node ret = utils::mkConcat(children0, stype);
2541
10
      ret = nm->mkNode(STRING_INDEXOF, ret, node[1], node[2]);
2542
      // For example:
2543
      // str.indexof( str.++( x, "A" ), "B", 0 ) ---> str.indexof( x, "B", 0 )
2544
10
      return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2545
    }
2546
  }
2547
2548
4918
  return node;
2549
}
2550
2551
929
Node SequencesRewriter::rewriteIndexofRe(Node node)
2552
{
2553
929
  Assert(node.getKind() == STRING_INDEXOF_RE);
2554
929
  NodeManager* nm = NodeManager::currentNM();
2555
1858
  Node s = node[0];
2556
1858
  Node r = node[1];
2557
1858
  Node n = node[2];
2558
1858
  Node zero = nm->mkConst(Rational(0));
2559
1858
  Node slen = nm->mkNode(STRING_LENGTH, s);
2560
2561
929
  if (ArithEntail::check(zero, n, true) || ArithEntail::check(n, slen, true))
2562
  {
2563
64
    Node ret = nm->mkConst(Rational(-1));
2564
32
    return returnRewrite(node, ret, Rewrite::INDEXOF_RE_INVALID_INDEX);
2565
  }
2566
2567
897
  if (RegExpEntail::isConstRegExp(r))
2568
  {
2569
627
    if (s.isConst() && n.isConst())
2570
    {
2571
298
      Rational nrat = n.getConst<Rational>();
2572
298
      cvc5::Rational rMaxInt(cvc5::String::maxSize());
2573
149
      if (nrat > rMaxInt)
2574
      {
2575
        // We know that, due to limitations on the size of string constants
2576
        // in our implementation, that accessing a position greater than
2577
        // rMaxInt is guaranteed to be out of bounds.
2578
        Node negone = nm->mkConst(Rational(-1));
2579
        return returnRewrite(node, negone, Rewrite::INDEXOF_RE_MAX_INDEX);
2580
      }
2581
2582
149
      uint32_t start = nrat.getNumerator().toUnsignedInt();
2583
298
      Node rem = nm->mkConst(s.getConst<String>().substr(start));
2584
149
      std::pair<size_t, size_t> match = firstMatch(rem, r);
2585
      Node ret = nm->mkConst(
2586
389
          Rational(match.first == string::npos
2587
                       ? -1
2588
389
                       : static_cast<int64_t>(start + match.first)));
2589
149
      return returnRewrite(node, ret, Rewrite::INDEXOF_RE_EVAL);
2590
    }
2591
2592
478
    if (ArithEntail::check(n, zero) && ArithEntail::check(slen, n))
2593
    {
2594
668
      String emptyStr("");
2595
338
      if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, r))
2596
      {
2597
8
        return returnRewrite(node, n, Rewrite::INDEXOF_RE_EMP_RE);
2598
      }
2599
    }
2600
  }
2601
740
  return node;
2602
}
2603
2604
4083
Node SequencesRewriter::rewriteReplace(Node node)
2605
{
2606
4083
  Assert(node.getKind() == kind::STRING_REPLACE);
2607
4083
  NodeManager* nm = NodeManager::currentNM();
2608
2609
4083
  if (node[1].isConst() && Word::isEmpty(node[1]))
2610
  {
2611
858
    Node ret = nm->mkNode(STRING_CONCAT, node[2], node[0]);
2612
429
    return returnRewrite(node, ret, Rewrite::RPL_RPL_EMPTY);
2613
  }
2614
  // the string type
2615
7308
  TypeNode stype = node.getType();
2616
2617
7308
  std::vector<Node> children0;
2618
3654
  utils::getConcat(node[0], children0);
2619
2620
3654
  if (node[1].isConst() && children0[0].isConst())
2621
  {
2622
891
    Node s = children0[0];
2623
891
    Node t = node[1];
2624
882
    std::size_t p = Word::find(s, t);
2625
882
    if (p == std::string::npos)
2626
    {
2627
572
      if (children0.size() == 1)
2628
      {
2629
563
        return returnRewrite(node, node[0], Rewrite::RPL_CONST_NFIND);
2630
      }
2631
    }
2632
    else
2633
    {
2634
620
      Node s1 = Word::substr(s, 0, p);
2635
620
      Node s3 = Word::substr(s, p + Word::getLength(t));
2636
620
      std::vector<Node> children;
2637
310
      if (!Word::isEmpty(s1))
2638
      {
2639
51
        children.push_back(s1);
2640
      }
2641
310
      children.push_back(node[2]);
2642
310
      if (!Word::isEmpty(s3))
2643
      {
2644
88
        children.push_back(s3);
2645
      }
2646
310
      children.insert(children.end(), children0.begin() + 1, children0.end());
2647
620
      Node ret = utils::mkConcat(children, stype);
2648
310
      return returnRewrite(node, ret, Rewrite::RPL_CONST_FIND);
2649
    }
2650
  }
2651
2652
  // rewrites that apply to both replace and replaceall
2653
5562
  Node rri = rewriteReplaceInternal(node);
2654
2781
  if (!rri.isNull())
2655
  {
2656
    // printing of the rewrite managed by the call above
2657
169
    return rri;
2658
  }
2659
2660
2612
  if (node[0] == node[2])
2661
  {
2662
    // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x
2663
908
    Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2664
908
    Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2665
603
    if (ArithEntail::check(l1, l0))
2666
    {
2667
290
      return returnRewrite(node, node[0], Rewrite::RPL_RPL_LEN_ID);
2668
    }
2669
2670
    // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x)
2671
    // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "")
2672
313
    if (StringsEntail::checkLengthOne(node[0]))
2673
    {
2674
252
      Node empty = Word::mkEmptyWord(stype);
2675
      Node rn1 = Rewriter::rewrite(
2676
252
          rewriteEqualityExt(nm->mkNode(EQUAL, node[1], empty)));
2677
130
      if (rn1 != node[1])
2678
      {
2679
252
        std::vector<Node> emptyNodes;
2680
        bool allEmptyEqs;
2681
130
        std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(rn1);
2682
2683
130
        if (allEmptyEqs)
2684
        {
2685
188
          Node nn1 = utils::mkConcat(emptyNodes, stype);
2686
98
          if (node[1] != nn1)
2687
          {
2688
16
            Node ret = nm->mkNode(STRING_REPLACE, node[0], nn1, node[2]);
2689
8
            return returnRewrite(node, ret, Rewrite::RPL_X_Y_X_SIMP);
2690
          }
2691
        }
2692
      }
2693
    }
2694
  }
2695
2696
4628
  std::vector<Node> children1;
2697
2314
  utils::getConcat(node[1], children1);
2698
2699
  // check if contains definitely does (or does not) hold
2700
4628
  Node cmp_con = nm->mkNode(kind::STRING_CONTAINS, node[0], node[1]);
2701
4628
  Node cmp_conr = Rewriter::rewrite(cmp_con);
2702
2314
  if (cmp_conr.isConst())
2703
  {
2704
523
    if (cmp_conr.getConst<bool>())
2705
    {
2706
      // component-wise containment
2707
826
      std::vector<Node> cb;
2708
826
      std::vector<Node> ce;
2709
508
      int cc = d_stringsEntail.componentContains(
2710
508
          children0, children1, cb, ce, true, 1);
2711
508
      if (cc != -1)
2712
      {
2713
414
        if (cc == 0 && children0[0] == children1[0])
2714
        {
2715
          // definitely a prefix, can do the replace
2716
          // for example,
2717
          //   str.replace( str.++( x, "ab" ), str.++( x, "a" ), y )  --->
2718
          //   str.++( y, "b" )
2719
146
          std::vector<Node> cres;
2720
73
          cres.push_back(node[2]);
2721
73
          cres.insert(cres.end(), ce.begin(), ce.end());
2722
146
          Node ret = utils::mkConcat(cres, stype);
2723
73
          return returnRewrite(node, ret, Rewrite::RPL_CCTN_RPL);
2724
        }
2725
341
        else if (!ce.empty())
2726
        {
2727
          // we can pull remainder past first definite containment
2728
          // for example,
2729
          //   str.replace( str.++( x, "ab" ), "a", y ) --->
2730
          //   str.++( str.replace( str.++( x, "a" ), "a", y ), "b" )
2731
          // this is independent of whether the second argument may be empty
2732
234
          std::vector<Node> scc;
2733
351
          scc.push_back(NodeManager::currentNM()->mkNode(
2734
              kind::STRING_REPLACE,
2735
234
              utils::mkConcat(children0, stype),
2736
              node[1],
2737
              node[2]));
2738
117
          scc.insert(scc.end(), ce.begin(), ce.end());
2739
234
          Node ret = utils::mkConcat(scc, stype);
2740
117
          return returnRewrite(node, ret, Rewrite::RPL_CCTN);
2741
        }
2742
      }
2743
    }
2744
    else
2745
    {
2746
      // ~contains( t, s ) => ( replace( t, s, r ) ----> t )
2747
15
      return returnRewrite(node, node[0], Rewrite::RPL_NCTN);
2748
    }
2749
  }
2750
1791
  else if (cmp_conr.getKind() == kind::EQUAL || cmp_conr.getKind() == kind::AND)
2751
  {
2752
    // Rewriting the str.contains may return equalities of the form (= x "").
2753
    // In that case, we can substitute the variables appearing in those
2754
    // equalities with the empty string in the third argument of the
2755
    // str.replace. For example:
2756
    //
2757
    // (str.replace x (str.++ x y) y) --> (str.replace x (str.++ x y) "")
2758
    //
2759
    // This can be done because str.replace changes x iff (str.++ x y) is in x
2760
    // but that means that y must be empty in that case. Thus, we can
2761
    // substitute y with "" in the third argument. Note that the third argument
2762
    // does not matter when the str.replace does not apply.
2763
    //
2764
700
    Node empty = Word::mkEmptyWord(stype);
2765
2766
700
    std::vector<Node> emptyNodes;
2767
    bool allEmptyEqs;
2768
364
    std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(cmp_conr);
2769
2770
364
    if (emptyNodes.size() > 0)
2771
    {
2772
      // Perform the substitutions
2773
416
      std::vector<TNode> substs(emptyNodes.size(), TNode(empty));
2774
      Node nn2 = node[2].substitute(
2775
416
          emptyNodes.begin(), emptyNodes.end(), substs.begin(), substs.end());
2776
2777
      // If the contains rewrites to a conjunction of empty-string equalities
2778
      // and we are doing the replacement in an empty string, we can rewrite
2779
      // the string-to-replace with a concatenation of all the terms that must
2780
      // be empty:
2781
      //
2782
      // (str.replace "" y z) ---> (str.replace "" (str.++ y1 ... yn)  z)
2783
      // if (str.contains "" y) ---> (and (= y1 "") ... (= yn ""))
2784
222
      if (node[0] == empty && allEmptyEqs)
2785
      {
2786
192
        std::vector<Node> emptyNodesList(emptyNodes.begin(), emptyNodes.end());
2787
192
        Node nn1 = utils::mkConcat(emptyNodesList, stype);
2788
100
        if (nn1 != node[1] || nn2 != node[2])
2789
        {
2790
16
          Node res = nm->mkNode(kind::STRING_REPLACE, node[0], nn1, nn2);
2791
8
          return returnRewrite(node, res, Rewrite::RPL_EMP_CNTS_SUBSTS);
2792
        }
2793
      }
2794
2795
214
      if (nn2 != node[2])
2796
      {
2797
40
        Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1], nn2);
2798
20
        return returnRewrite(node, res, Rewrite::RPL_CNTS_SUBSTS);
2799
      }
2800
    }
2801
  }
2802
2803
2081
  if (cmp_conr != cmp_con)
2804
  {
2805
781
    if (StringsEntail::checkNonEmpty(node[1]))
2806
    {
2807
      // pull endpoints that can be stripped
2808
      // for example,
2809
      //   str.replace( str.++( "b", x, "b" ), "a", y ) --->
2810
      //   str.++( "b", str.replace( x, "a", y ), "b" )
2811
905
      std::vector<Node> cb;
2812
905
      std::vector<Node> ce;
2813
461
      if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce))
2814
      {
2815
34
        std::vector<Node> cc;
2816
17
        cc.insert(cc.end(), cb.begin(), cb.end());
2817
17
        cc.push_back(
2818
68
            NodeManager::currentNM()->mkNode(kind::STRING_REPLACE,
2819
34
                                             utils::mkConcat(children0, stype),
2820
                                             node[1],
2821
                                             node[2]));
2822
17
        cc.insert(cc.end(), ce.begin(), ce.end());
2823
34
        Node ret = utils::mkConcat(cc, stype);
2824
17
        return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2825
      }
2826
    }
2827
  }
2828
2829
2064
  children1.clear();
2830
2064
  utils::getConcat(node[1], children1);
2831
4128
  Node lastChild1 = children1[children1.size() - 1];
2832
2064
  if (lastChild1.getKind() == kind::STRING_SUBSTR)
2833
  {
2834
    // (str.replace x (str.++ t (str.substr y i j)) z) --->
2835
    // (str.replace x (str.++ t
2836
    //                  (str.substr y i (+ (str.len x) 1 (- (str.len t))))) z)
2837
    // if j > len(x)
2838
    //
2839
    // Reasoning: If the string to be replaced is longer than x, then it does
2840
    // not matter how much longer it is, the result is always x. Thus, it is
2841
    // fine to only look at the prefix of length len(x) + 1 - len(t).
2842
2843
116
    children1.pop_back();
2844
    // Length of the non-substr components in the second argument
2845
    Node partLen1 =
2846
224
        nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(children1, stype));
2847
224
    Node maxLen1 = nm->mkNode(kind::PLUS, partLen1, lastChild1[2]);
2848
2849
224
    Node zero = nm->mkConst(Rational(0));
2850
224
    Node one = nm->mkConst(Rational(1));
2851
224
    Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]);
2852
224
    Node len0_1 = nm->mkNode(kind::PLUS, len0, one);
2853
    // Check len(t) + j > len(x) + 1
2854
116
    if (ArithEntail::check(maxLen1, len0_1, true))
2855
    {
2856
8
      children1.push_back(nm->mkNode(
2857
          kind::STRING_SUBSTR,
2858
          lastChild1[0],
2859
          lastChild1[1],
2860
16
          nm->mkNode(
2861
16
              kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1))));
2862
      Node res = nm->mkNode(kind::STRING_REPLACE,
2863
                            node[0],
2864
16
                            utils::mkConcat(children1, stype),
2865
32
                            node[2]);
2866
8
      return returnRewrite(node, res, Rewrite::REPL_SUBST_IDX);
2867
    }
2868
  }
2869
2870
2056
  if (node[0].getKind() == STRING_REPLACE)
2871
  {
2872
360
    Node x = node[0];
2873
360
    Node y = node[1];
2874
360
    Node z = node[2];
2875
186
    if (x[0] == x[2] && x[0] == y)
2876
    {
2877
      // (str.replace (str.replace y w y) y z) -->
2878
      //   (str.replace (str.replace y w z) y z)
2879
      // if (str.len w) >= (str.len z) and w != z
2880
      //
2881
      // Reasoning: There are two cases: (1) w does not appear in y and (2) w
2882
      // does appear in y.
2883
      //
2884
      // Case (1): In this case, the reasoning is trivial. The
2885
      // inner replace does not do anything, so we can just replace its third
2886
      // argument with any string.
2887
      //
2888
      // Case (2): After the inner replace, we are guaranteed to have a string
2889
      // that contains y at the index of w in the original string y. The outer
2890
      // replace then replaces that y with z, so we can short-circuit that
2891
      // replace by directly replacing w with z in the inner replace. We can
2892
      // only do that if the result of the new inner replace does not contain
2893
      // y, otherwise we end up doing two replaces that are different from the
2894
      // original expression. We enforce that by requiring that the length of w
2895
      // has to be greater or equal to the length of z and that w and z have to
2896
      // be different. This makes sure that an inner replace changes a string
2897
      // to a string that is shorter than y, making it impossible for the outer
2898
      // replace to match.
2899
128
      Node w = x[1];
2900
2901
      // (str.len w) >= (str.len z)
2902
128
      Node wlen = nm->mkNode(kind::STRING_LENGTH, w);
2903
128
      Node zlen = nm->mkNode(kind::STRING_LENGTH, z);
2904
70
      if (ArithEntail::check(wlen, zlen))
2905
      {
2906
        // w != z
2907
64
        Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z));
2908
38
        if (wEqZ.isConst() && !wEqZ.getConst<bool>())
2909
        {
2910
          Node ret = nm->mkNode(kind::STRING_REPLACE,
2911
24
                                nm->mkNode(kind::STRING_REPLACE, y, w, z),
2912
                                y,
2913
48
                                z);
2914
12
          return returnRewrite(node, ret, Rewrite::REPL_REPL_SHORT_CIRCUIT);
2915
        }
2916
      }
2917
    }
2918
  }
2919
2920
2044
  if (node[1].getKind() == STRING_REPLACE)
2921
  {
2922
244
    if (node[1][0] == node[0])
2923
    {
2924
52
      if (node[1][0] == node[1][2] && node[1][0] == node[2])
2925
      {
2926
        // str.replace( x, str.replace( x, y, x ), x ) ---> x
2927
        return returnRewrite(node, node[0], Rewrite::REPL_REPL2_INV_ID);
2928
      }
2929
52
      bool dualReplIteSuccess = false;
2930
96
      Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2931
52
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2932
      {
2933
        // str.contains( x, z ) ---> false
2934
        //   implies
2935
        // str.replace( x, str.replace( x, y, z ), w ) --->
2936
        // ite( str.contains( x, y ), x, w )
2937
        dualReplIteSuccess = true;
2938
      }
2939
      else
2940
      {
2941
        // str.contains( y, z ) ---> false and str.contains( z, y ) ---> false
2942
        //   implies
2943
        // str.replace( x, str.replace( x, y, z ), w ) --->
2944
        // ite( str.contains( x, y ), x, w )
2945
52
        cmp_con2 = d_stringsEntail.checkContains(node[1][1], node[1][2]);
2946
52
        if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2947
        {
2948
16
          cmp_con2 = d_stringsEntail.checkContains(node[1][2], node[1][1]);
2949
16
          if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2950
          {
2951
8
            dualReplIteSuccess = true;
2952
          }
2953
        }
2954
      }
2955
52
      if (dualReplIteSuccess)
2956
      {
2957
        Node res = nm->mkNode(ITE,
2958
16
                              nm->mkNode(STRING_CONTAINS, node[0], node[1][1]),
2959
                              node[0],
2960
32
                              node[2]);
2961
8
        return returnRewrite(node, res, Rewrite::REPL_DUAL_REPL_ITE);
2962
      }
2963
    }
2964
2965
236
    bool invSuccess = false;
2966
236
    if (node[1][1] == node[0])
2967
    {
2968
52
      if (node[1][0] == node[1][2])
2969
      {
2970
        // str.replace(x, str.replace(y, x, y), w) ---> str.replace(x, y, w)
2971
        invSuccess = true;
2972
      }
2973
52
      else if (node[1][1] == node[2] || node[1][0] == node[2])
2974
      {
2975
        // str.contains(y, z) ----> false and ( y == w or x == w ) implies
2976
        //   implies
2977
        // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w)
2978
40
        Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2979
20
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2980
      }
2981
    }
2982
    else
2983
    {
2984
      // str.contains(x, z) ----> false and str.contains(x, w) ----> false
2985
      //   implies
2986
      // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u)
2987
368
      Node cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][1]);
2988
184
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2989
      {
2990
8
        cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][2]);
2991
8
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2992
      }
2993
    }
2994
236
    if (invSuccess)
2995
    {
2996
      Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1][0], node[2]);
2997
      return returnRewrite(node, res, Rewrite::REPL_REPL2_INV);
2998
    }
2999
  }
3000
2036
  if (node[2].getKind() == STRING_REPLACE)
3001
  {
3002
56
    if (node[2][1] == node[0])
3003
    {
3004
      // str.contains( z, w ) ----> false implies
3005
      // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z )
3006
24
      Node cmp_con2 = d_stringsEntail.checkContains(node[2][0], node[1]);
3007
12
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
3008
      {
3009
        Node res =
3010
            nm->mkNode(kind::STRING_REPLACE, node[0], node[1], node[2][0]);
3011
        return returnRewrite(node, res, Rewrite::REPL_REPL3_INV);
3012
      }
3013
    }
3014
56
    if (node[2][0] == node[1])
3015
    {
3016
4
      bool success = false;
3017
4
      if (node[2][0] == node[2][2] && node[2][1] == node[0])
3018
      {
3019
        // str.replace( x, y, str.replace( y, x, y ) ) ---> x
3020
        success = true;
3021
      }
3022
      else
3023
      {
3024
        // str.contains( x, z ) ----> false implies
3025
        // str.replace( x, y, str.replace( y, z, w ) ) ---> x
3026
4
        cmp_con = d_stringsEntail.checkContains(node[0], node[2][1]);
3027
4
        success = !cmp_con.isNull() && !cmp_con.getConst<bool>();
3028
      }
3029
4
      if (success)
3030
      {
3031
        return returnRewrite(node, node[0], Rewrite::REPL_REPL3_INV_ID);
3032
      }
3033
    }
3034
  }
3035
  // miniscope based on components that do not contribute to contains
3036
  // for example,
3037
  //   str.replace( x ++ y ++ x ++ y, "A", z ) -->
3038
  //   str.replace( x ++ y, "A", z ) ++ x ++ y
3039
  // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y.
3040
2036
  if (StringsEntail::checkLengthOne(node[1]))
3041
  {
3042
1360
    Node lastLhs;
3043
684
    unsigned lastCheckIndex = 0;
3044
696
    for (unsigned i = 1, iend = children0.size(); i < iend; i++)
3045
    {
3046
126
      unsigned checkIndex = children0.size() - i;
3047
138
      std::vector<Node> checkLhs;
3048
126
      checkLhs.insert(
3049
252
          checkLhs.end(), children0.begin(), children0.begin() + checkIndex);
3050
138
      Node lhs = utils::mkConcat(checkLhs, stype);
3051
138
      Node rhs = children0[checkIndex];
3052
138
      Node ctn = d_stringsEntail.checkContains(lhs, rhs);
3053
126
      if (!ctn.isNull() && ctn.getConst<bool>())
3054
      {
3055
12
        lastLhs = lhs;
3056
12
        lastCheckIndex = checkIndex;
3057
      }
3058
      else
3059
      {
3060
114
        break;
3061
      }
3062
    }
3063
684
    if (!lastLhs.isNull())
3064
    {
3065
16
      std::vector<Node> remc(children0.begin() + lastCheckIndex,
3066
32
                             children0.end());
3067
16
      Node rem = utils::mkConcat(remc, stype);
3068
      Node ret =
3069
          nm->mkNode(STRING_CONCAT,
3070
16
                     nm->mkNode(STRING_REPLACE, lastLhs, node[1], node[2]),
3071
32
                     rem);
3072
      // for example:
3073
      //   str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x
3074
      // Since we know that the first occurrence of "A" cannot be in the
3075
      // second occurrence of x. Notice this is specific to single characters
3076
      // due to complications with finds that span multiple components for
3077
      // non-characters.
3078
8
      return returnRewrite(node, ret, Rewrite::REPL_CHAR_NCONTRIB_FIND);
3079
    }
3080
  }
3081
3082
  // TODO (#1180) incorporate these?
3083
  // contains( t, s ) =>
3084
  //   replace( replace( x, t, s ), s, r ) ----> replace( x, t, r )
3085
  // contains( t, s ) =>
3086
  //   contains( replace( t, s, r ), r ) ----> true
3087
3088
2028
  return node;
3089
}
3090
3091
194
Node SequencesRewriter::rewriteReplaceAll(Node node)
3092
{
3093
194
  Assert(node.getKind() == STRING_REPLACE_ALL);
3094
3095
388
  TypeNode stype = node.getType();
3096
3097
194
  if (node[0].isConst() && node[1].isConst())
3098
  {
3099
80
    std::vector<Node> children;
3100
80
    Node s = node[0];
3101
80
    Node t = node[1];
3102
40
    if (Word::isEmpty(s) || Word::isEmpty(t))
3103
    {
3104
10
      return returnRewrite(node, node[0], Rewrite::REPLALL_EMPTY_FIND);
3105
    }
3106
30
    std::size_t sizeS = Word::getLength(s);
3107
30
    std::size_t sizeT = Word::getLength(t);
3108
30
    std::size_t index = 0;
3109
30
    std::size_t curr = 0;
3110
65
    do
3111
    {
3112
95
      curr = Word::find(s, t, index);
3113
95
      if (curr != std::string::npos)
3114
      {
3115
65
        if (curr > index)
3116
        {
3117
32
          children.push_back(Word::substr(s, index, curr - index));
3118
        }
3119
65
        children.push_back(node[2]);
3120
65
        index = curr + sizeT;
3121
      }
3122
      else
3123
      {
3124
30
        children.push_back(Word::substr(s, index, sizeS - index));
3125
      }
3126
95
    } while (curr != std::string::npos && curr < sizeS);
3127
    // constant evaluation
3128
60
    Node res = utils::mkConcat(children, stype);
3129
30
    return returnRewrite(node, res, Rewrite::REPLALL_CONST);
3130
  }
3131
3132
  // rewrites that apply to both replace and replaceall
3133
308
  Node rri = rewriteReplaceInternal(node);
3134
154
  if (!rri.isNull())
3135
  {
3136
    // printing of the rewrite managed by the call above
3137
4
    return rri;
3138
  }
3139
3140
150
  return node;
3141
}
3142
3143
2935
Node SequencesRewriter::rewriteReplaceInternal(Node node)
3144
{
3145
2935
  Kind nk = node.getKind();
3146
2935
  Assert(nk == STRING_REPLACE || nk == STRING_REPLACE_ALL);
3147
3148
2935
  if (node[1] == node[2])
3149
  {
3150
80
    return returnRewrite(node, node[0], Rewrite::RPL_ID);
3151
  }
3152
3153
2855
  if (node[0] == node[1])
3154
  {
3155
    // only holds for replaceall if non-empty
3156
117
    if (nk == STRING_REPLACE || StringsEntail::checkNonEmpty(node[1]))
3157
    {
3158
93
      return returnRewrite(node, node[2], Rewrite::RPL_REPLACE);
3159
    }
3160
  }
3161
3162
2762
  return Node::null();
3163
}
3164
3165
397
Node SequencesRewriter::rewriteReplaceRe(Node node)
3166
{
3167
397
  Assert(node.getKind() == STRING_REPLACE_RE);
3168
397
  NodeManager* nm = NodeManager::currentNM();
3169
794
  Node x = node[0];
3170
794
  Node y = node[1];
3171
794
  Node z = node[2];
3172
3173
397
  if (RegExpEntail::isConstRegExp(y))
3174
  {
3175
267
    if (x.isConst())
3176
    {
3177
      // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z"
3178
75
      std::pair<size_t, size_t> match = firstMatch(x, y);
3179
75
      if (match.first != string::npos)
3180
      {
3181
84
        String s = x.getConst<String>();
3182
        Node ret = nm->mkNode(STRING_CONCAT,
3183
84
                              nm->mkConst(s.substr(0, match.first)),
3184
                              z,
3185
168
                              nm->mkConst(s.substr(match.second)));
3186
42
        return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL);
3187
      }
3188
      else
3189
      {
3190
33
        return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL);
3191
      }
3192
    }
3193
    // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true
3194
380
    String emptyStr("");
3195
192
    if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y))
3196
    {
3197
8
      Node ret = nm->mkNode(STRING_CONCAT, z, x);
3198
4
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE);
3199
    }
3200
  }
3201
318
  return node;
3202
}
3203
3204
227
Node SequencesRewriter::rewriteReplaceReAll(Node node)
3205
{
3206
227
  Assert(node.getKind() == STRING_REPLACE_RE_ALL);
3207
227
  NodeManager* nm = NodeManager::currentNM();
3208
454
  Node x = node[0];
3209
454
  Node y = node[1];
3210
454
  Node z = node[2];
3211
3212
227
  if (RegExpEntail::isConstRegExp(y))
3213
  {
3214
181
    if (x.isConst())
3215
    {
3216
      // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) --->
3217
      //   "Z" ++ y ++ "Z" ++ y
3218
110
      TypeNode t = x.getType();
3219
110
      Node emp = Word::mkEmptyWord(t);
3220
      Node yp = Rewriter::rewrite(
3221
110
          nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)));
3222
110
      std::vector<Node> res;
3223
110
      String rem = x.getConst<String>();
3224
55
      std::pair<size_t, size_t> match(0, 0);
3225
153
      while (rem.size() != 0)
3226
      {
3227
57
        match = firstMatch(nm->mkConst(rem), yp);
3228
57
        if (match.first == string::npos)
3229
        {
3230
8
          break;
3231
        }
3232
49
        res.push_back(nm->mkConst(rem.substr(0, match.first)));
3233
49
        res.push_back(z);
3234
49
        rem = rem.substr(match.second);
3235
      }
3236
55
      res.push_back(nm->mkConst(rem));
3237
110
      Node ret = utils::mkConcat(res, t);
3238
55
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL);
3239
    }
3240
  }
3241
3242
172
  return node;
3243
}
3244
3245
281
std::pair<size_t, size_t> SequencesRewriter::firstMatch(Node n, Node r)
3246
{
3247
281
  Assert(n.isConst() && n.getType().isStringLike());
3248
281
  Assert(r.getType().isRegExp());
3249
281
  NodeManager* nm = NodeManager::currentNM();
3250
3251
562
  Node sigmaStar = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
3252
562
  Node re = nm->mkNode(REGEXP_CONCAT, r, sigmaStar);
3253
562
  String s = n.getConst<String>();
3254
3255
281
  if (s.size() == 0)
3256
  {
3257
61
    if (RegExpEntail::testConstStringInRegExp(s, 0, r))
3258
    {
3259
6
      return std::make_pair(0, 0);
3260
    }
3261
    else
3262
    {
3263
55
      return std::make_pair(string::npos, string::npos);
3264
    }
3265
  }
3266
3267
335
  for (size_t i = 0, size = s.size(); i < size; i++)
3268
  {
3269
291
    if (RegExpEntail::testConstStringInRegExp(s, i, re))
3270
    {
3271
569
      for (size_t j = i; j <= size; j++)
3272
      {
3273
962
        String substr = s.substr(i, j - i);
3274
569
        if (RegExpEntail::testConstStringInRegExp(substr, 0, r))
3275
        {
3276
176
          return std::make_pair(i, j);
3277
        }
3278
      }
3279
    }
3280
  }
3281
3282
44
  return std::make_pair(string::npos, string::npos);
3283
}
3284
3285
241
Node SequencesRewriter::rewriteStrReverse(Node node)
3286
{
3287
241
  Assert(node.getKind() == STRING_REV);
3288
241
  NodeManager* nm = NodeManager::currentNM();
3289
482
  Node x = node[0];
3290
241
  if (x.isConst())
3291
  {
3292
    // reverse the characters in the constant
3293
126
    Node retNode = Word::reverse(x);
3294
63
    return returnRewrite(node, retNode, Rewrite::STR_CONV_CONST);
3295
  }
3296
178
  else if (x.getKind() == STRING_CONCAT)
3297
  {
3298
78
    std::vector<Node> children;
3299
119
    for (const Node& nc : x)
3300
    {
3301
80
      children.push_back(nm->mkNode(STRING_REV, nc));
3302
    }
3303
39
    std::reverse(children.begin(), children.end());
3304
    // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 )
3305
78
    Node retNode = nm->mkNode(STRING_CONCAT, children);
3306
39
    return returnRewrite(node, retNode, Rewrite::STR_REV_MINSCOPE_CONCAT);
3307
  }
3308
139
  else if (x.getKind() == STRING_REV)
3309
  {
3310
    // rev( rev( x ) ) --> x
3311
10
    Node retNode = x[0];
3312
5
    return returnRewrite(node, retNode, Rewrite::STR_REV_IDEM);
3313
  }
3314
134
  return node;
3315
}
3316
3317
168
Node SequencesRewriter::rewritePrefixSuffix(Node n)
3318
{
3319
168
  Assert(n.getKind() == kind::STRING_PREFIX
3320
         || n.getKind() == kind::STRING_SUFFIX);
3321
168
  bool isPrefix = n.getKind() == kind::STRING_PREFIX;
3322
168
  if (n[0] == n[1])
3323
  {
3324
16
    Node ret = NodeManager::currentNM()->mkConst(true);
3325
8
    return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EQ);
3326
  }
3327
160
  if (n[0].isConst())
3328
  {
3329
70
    if (Word::isEmpty(n[0]))
3330
    {
3331
8
      Node ret = NodeManager::currentNM()->mkConst(true);
3332
4
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY_CONST);
3333
    }
3334
  }
3335
156
  if (n[1].isConst())
3336
  {
3337
46
    Node s = n[1];
3338
46
    size_t lenS = Word::getLength(s);
3339
46
    if (n[0].isConst())
3340
    {
3341
      Node ret = NodeManager::currentNM()->mkConst(false);
3342
      Node t = n[0];
3343
      size_t lenT = Word::getLength(t);
3344
      if (lenS >= lenT)
3345
      {
3346
        if ((isPrefix && t == Word::prefix(s, lenT))
3347
            || (!isPrefix && t == Word::suffix(s, lenT)))
3348
        {
3349
          ret = NodeManager::currentNM()->mkConst(true);
3350
        }
3351
      }
3352
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CONST);
3353
    }
3354
46
    else if (lenS == 0)
3355
    {
3356
8
      Node ret = n[0].eqNode(n[1]);
3357
4
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY);
3358
    }
3359
42
    else if (lenS == 1)
3360
    {
3361
      // (str.prefix x "A") and (str.suffix x "A") are equivalent to
3362
      // (str.contains "A" x )
3363
      Node ret =
3364
84
          NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS, n[1], n[0]);
3365
42
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CTN);
3366
    }
3367
  }
3368
220
  Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]);
3369
220
  Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]);
3370
220
  Node val;
3371
110
  if (isPrefix)
3372
  {
3373
83
    val = NodeManager::currentNM()->mkConst(::cvc5::Rational(0));
3374
  }
3375
  else
3376
  {
3377
27
    val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens);
3378
  }
3379
3380
  // Check if we can turn the prefix/suffix into equalities by showing that the
3381
  // prefix/suffix is at least as long as the string
3382
220
  Node eqs = StringsEntail::inferEqsFromContains(n[1], n[0]);
3383
110
  if (!eqs.isNull())
3384
  {
3385
8
    return returnRewrite(n, eqs, Rewrite::SUF_PREFIX_TO_EQS);
3386
  }
3387
3388
  // general reduction to equality + substr
3389
  Node retNode = n[0].eqNode(
3390
204
      NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens));
3391
3392
102
  return returnRewrite(n, retNode, Rewrite::SUF_PREFIX_ELIM);
3393
}
3394
3395
81
Node SequencesRewriter::lengthPreserveRewrite(Node n)
3396
{
3397
81
  NodeManager* nm = NodeManager::currentNM();
3398
162
  Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
3399
162
  Node res = canonicalStrForSymbolicLength(len, n.getType());
3400
162
  return res.isNull() ? n : res;
3401
}
3402
3403
93
Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
3404
{
3405
93
  NodeManager* nm = NodeManager::currentNM();
3406
3407
186
  Node res;
3408
93
  if (len.getKind() == CONST_RATIONAL)
3409
  {
3410
    // c -> "A" repeated c times
3411
162
    Rational ratLen = len.getConst<Rational>();
3412
81
    Assert(ratLen.getDenominator() == 1);
3413
162
    Integer intLen = ratLen.getNumerator();
3414
81
    uint32_t u = intLen.getUnsignedInt();
3415
81
    if (stype.isString())  // string-only
3416
    {
3417
81
      res = nm->mkConst(String(std::string(u, 'A')));
3418
    }
3419
    // we could do this for sequences, but we need to be careful: some
3420
    // sorts do not permit values that the solver can handle (e.g. uninterpreted
3421
    // sorts and arrays).
3422
  }
3423
12
  else if (len.getKind() == PLUS)
3424
  {
3425
    // x + y -> norm(x) + norm(y)
3426
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3427
12
    for (const auto& n : len)
3428
    {
3429
16
      Node sn = canonicalStrForSymbolicLength(n, stype);
3430
8
      if (sn.isNull())
3431
      {
3432
        return Node::null();
3433
      }
3434
16
      std::vector<Node> snChildren;
3435
8
      utils::getConcat(sn, snChildren);
3436
8
      concatBuilder.append(snChildren);
3437
    }
3438
4
    res = concatBuilder.constructNode();
3439
  }
3440
20
  else if (len.getKind() == MULT && len.getNumChildren() == 2
3441
20
           && len[0].isConst())
3442
  {
3443
    // c * x -> norm(x) repeated c times
3444
8
    Rational ratReps = len[0].getConst<Rational>();
3445
4
    Assert(ratReps.getDenominator() == 1);
3446
8
    Integer intReps = ratReps.getNumerator();
3447
3448
8
    Node nRep = canonicalStrForSymbolicLength(len[1], stype);
3449
4
    if (nRep.isNull())
3450
    {
3451
      return Node::null();
3452
    }
3453
8
    std::vector<Node> nRepChildren;
3454
4
    utils::getConcat(nRep, nRepChildren);
3455
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3456
12
    for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
3457
    {
3458
8
      concatBuilder.append(nRepChildren);
3459
    }
3460
4
    res = concatBuilder.constructNode();
3461
  }
3462
4
  else if (len.getKind() == STRING_LENGTH)
3463
  {
3464
    // len(x) -> x
3465
4
    res = len[0];
3466
  }
3467
93
  return res;
3468
}
3469
3470
399
Node SequencesRewriter::rewriteSeqUnit(Node node)
3471
{
3472
399
  NodeManager* nm = NodeManager::currentNM();
3473
399
  if (node[0].isConst())
3474
  {
3475
274
    std::vector<Node> seq;
3476
137
    seq.push_back(node[0]);
3477
274
    TypeNode stype = node[0].getType();
3478
274
    Node ret = nm->mkConst(Sequence(stype, seq));
3479
137
    return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL);
3480
  }
3481
262
  return node;
3482
}
3483
3484
159242
Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r)
3485
{
3486
318484
  Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << r
3487
159242
                           << "." << std::endl;
3488
159242
  if (d_statistics != nullptr)
3489
  {
3490
158902
    (*d_statistics) << r;
3491
  }
3492
159242
  return ret;
3493
}
3494
3495
91547
Node SequencesRewriter::postProcessRewrite(Node node, Node ret)
3496
{
3497
91547
  NodeManager* nm = NodeManager::currentNM();
3498
  // standard post-processing
3499
  // We rewrite (string) equalities immediately here. This allows us to forego
3500
  // the standard invariant on equality rewrites (that s=t must rewrite to one
3501
  // of { s=t, t=s, true, false } ).
3502
91547
  Kind retk = ret.getKind();
3503
91547
  if (retk == OR || retk == AND)
3504
  {
3505
4922
    std::vector<Node> children;
3506
2461
    bool childChanged = false;
3507
9300
    for (const Node& cret : ret)
3508
    {
3509
13678
      Node creter = cret;
3510
6839
      if (cret.getKind() == EQUAL)
3511
      {
3512
2730
        creter = rewriteEqualityExt(cret);
3513
      }
3514
4109
      else if (cret.getKind() == NOT && cret[0].getKind() == EQUAL)
3515
      {
3516
        creter = nm->mkNode(NOT, rewriteEqualityExt(cret[0]));
3517
      }
3518
6839
      childChanged = childChanged || cret != creter;
3519
6839
      children.push_back(creter);
3520
    }
3521
2461
    if (childChanged)
3522
    {
3523
358
      ret = nm->mkNode(retk, children);
3524
2461
    }
3525
  }
3526
89086
  else if (retk == NOT && ret[0].getKind() == EQUAL)
3527
  {
3528
    ret = nm->mkNode(NOT, rewriteEqualityExt(ret[0]));
3529
  }
3530
89086
  else if (retk == EQUAL && node.getKind() != EQUAL)
3531
  {
3532
3146
    Trace("strings-rewrite")
3533
1573
        << "Apply extended equality rewrite on " << ret << std::endl;
3534
1573
    ret = rewriteEqualityExt(ret);
3535
  }
3536
91547
  return ret;
3537
}
3538
3539
}  // namespace strings
3540
}  // namespace theory
3541
29322
}  // namespace cvc5