GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/sequences_rewriter.cpp Lines: 1602 1655 96.8 %
Date: 2021-11-07 Branches: 5017 9618 52.2 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of the theory of strings.
14
 */
15
16
#include "theory/strings/sequences_rewriter.h"
17
18
#include "expr/attribute.h"
19
#include "expr/node_builder.h"
20
#include "expr/sequence.h"
21
#include "theory/rewriter.h"
22
#include "theory/strings/arith_entail.h"
23
#include "theory/strings/regexp_entail.h"
24
#include "theory/strings/skolem_cache.h"
25
#include "theory/strings/strings_rewriter.h"
26
#include "theory/strings/theory_strings_utils.h"
27
#include "theory/strings/word.h"
28
#include "util/rational.h"
29
#include "util/regexp.h"
30
#include "util/statistics_registry.h"
31
#include "util/string.h"
32
33
using namespace std;
34
using namespace cvc5::kind;
35
36
namespace cvc5 {
37
namespace theory {
38
namespace strings {
39
40
15667
SequencesRewriter::SequencesRewriter(Rewriter* r,
41
15667
                                     HistogramStat<Rewrite>* statistics)
42
    : d_statistics(statistics),
43
      d_arithEntail(r),
44
15667
      d_stringsEntail(r, d_arithEntail, *this)
45
{
46
15667
}
47
48
15277
ArithEntail& SequencesRewriter::getArithEntail() { return d_arithEntail; }
49
50
6
StringsEntail& SequencesRewriter::getStringsEntail() { return d_stringsEntail; }
51
52
174339
Node SequencesRewriter::rewriteEquality(Node node)
53
{
54
174339
  Assert(node.getKind() == kind::EQUAL);
55
174339
  if (node[0] == node[1])
56
  {
57
7618
    Node ret = NodeManager::currentNM()->mkConst(true);
58
3809
    return returnRewrite(node, ret, Rewrite::EQ_REFL);
59
  }
60
170530
  else if (node[0].isConst() && node[1].isConst())
61
  {
62
5766
    Node ret = NodeManager::currentNM()->mkConst(false);
63
2883
    return returnRewrite(node, ret, Rewrite::EQ_CONST_FALSE);
64
  }
65
  // standard ordering
66
167647
  if (node[0] > node[1])
67
  {
68
59458
    Node ret = NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]);
69
29729
    return returnRewrite(node, ret, Rewrite::EQ_SYM);
70
  }
71
137918
  return node;
72
}
73
74
122874
Node SequencesRewriter::rewriteEqualityExt(Node node)
75
{
76
122874
  Assert(node.getKind() == EQUAL);
77
245748
  TypeNode tn = node[0].getType();
78
122874
  if (tn.isInteger())
79
  {
80
30682
    return rewriteArithEqualityExt(node);
81
  }
82
92192
  if (tn.isStringLike())
83
  {
84
92192
    return rewriteStrEqualityExt(node);
85
  }
86
  return node;
87
}
88
89
92601
Node SequencesRewriter::rewriteStrEqualityExt(Node node)
90
{
91
92601
  Assert(node.getKind() == EQUAL && node[0].getType().isStringLike());
92
185202
  TypeNode stype = node[0].getType();
93
94
92601
  bool hasStrTerm = false;
95
209166
  for (size_t r = 0; r < 2; r++)
96
  {
97
546522
    if (!node[r].isConst()
98
546522
        && kindToTheoryId(node[r].getKind()) == THEORY_STRINGS)
99
    {
100
65609
      hasStrTerm = true;
101
65609
      break;
102
    }
103
  }
104
92601
  if (!hasStrTerm)
105
  {
106
    // equality between variables and constants, no rewrites apply
107
26992
    return node;
108
  }
109
110
65609
  NodeManager* nm = NodeManager::currentNM();
111
  // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false )
112
196190
  for (unsigned r = 0; r < 2; r++)
113
  {
114
    // must call rewrite contains directly to avoid infinite loop
115
261603
    Node ctn = nm->mkNode(STRING_CONTAINS, node[r], node[1 - r]);
116
131022
    ctn = rewriteContains(ctn);
117
131022
    Assert(!ctn.isNull());
118
131022
    if (ctn.isConst())
119
    {
120
37649
      if (!ctn.getConst<bool>())
121
      {
122
441
        return returnRewrite(node, ctn, Rewrite::EQ_NCTN);
123
      }
124
      else
125
      {
126
        // definitely contains but not syntactically equal
127
        // We may be able to simplify, e.g.
128
        //  str.++( x, "a" ) == "a"  ----> x = ""
129
      }
130
    }
131
  }
132
133
  // ( len( s ) != len( t ) ) => ( s == t ---> false )
134
  // This covers cases like str.++( x, x ) == "a" ---> false
135
130336
  Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
136
130336
  Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
137
130336
  Node len_eq = len0.eqNode(len1);
138
65168
  len_eq = Rewriter::rewrite(len_eq);
139
65168
  if (len_eq.isConst() && !len_eq.getConst<bool>())
140
  {
141
6
    return returnRewrite(node, len_eq, Rewrite::EQ_LEN_DEQ);
142
  }
143
144
130324
  std::vector<Node> c[2];
145
195486
  for (unsigned i = 0; i < 2; i++)
146
  {
147
130324
    utils::getConcat(node[i], c[i]);
148
  }
149
150
  // check if the prefix, suffix mismatches
151
  //   For example, str.++( x, "a", y ) == str.++( x, "bc", z ) ---> false
152
65162
  unsigned minsize = std::min(c[0].size(), c[1].size());
153
195450
  for (unsigned r = 0; r < 2; r++)
154
  {
155
130537
    for (unsigned i = 0; i < minsize; i++)
156
    {
157
130411
      unsigned index1 = r == 0 ? i : (c[0].size() - 1) - i;
158
130411
      unsigned index2 = r == 0 ? i : (c[1].size() - 1) - i;
159
130637
      Node s = c[0][index1];
160
130637
      Node t = c[1][index2];
161
130411
      if (s.isConst() && t.isConst())
162
      {
163
324
        size_t lenS = Word::getLength(s);
164
324
        size_t lenT = Word::getLength(t);
165
324
        size_t lenShort = lenS <= lenT ? lenS : lenT;
166
1088
        bool isSameFix = r == 1 ? Word::rstrncmp(s, t, lenShort)
167
764
                                : Word::strncmp(s, t, lenShort);
168
324
        if (!isSameFix)
169
        {
170
46
          Node ret = NodeManager::currentNM()->mkConst(false);
171
23
          return returnRewrite(node, ret, Rewrite::EQ_NFIX);
172
        }
173
      }
174
130388
      if (s != t)
175
      {
176
130162
        break;
177
      }
178
    }
179
  }
180
181
130278
  Node new_ret;
182
  // ------- equality unification
183
65139
  bool changed = false;
184
195417
  for (unsigned i = 0; i < 2; i++)
185
  {
186
130702
    while (!c[0].empty() && !c[1].empty() && c[0].back() == c[1].back())
187
    {
188
212
      c[0].pop_back();
189
212
      c[1].pop_back();
190
212
      changed = true;
191
    }
192
    // splice constants
193
390692
    if (!c[0].empty() && !c[1].empty() && c[0].back().isConst()
194
253908
        && c[1].back().isConst())
195
    {
196
400
      Node cs[2];
197
      size_t csl[2];
198
600
      for (unsigned j = 0; j < 2; j++)
199
      {
200
400
        cs[j] = c[j].back();
201
400
        csl[j] = Word::getLength(cs[j]);
202
      }
203
200
      size_t larger = csl[0] > csl[1] ? 0 : 1;
204
200
      size_t smallerSize = csl[1 - larger];
205
400
      if (cs[1 - larger]
206
400
          == (i == 0 ? Word::suffix(cs[larger], smallerSize)
207
                     : Word::prefix(cs[larger], smallerSize)))
208
      {
209
200
        size_t sizeDiff = csl[larger] - smallerSize;
210
200
        c[larger][c[larger].size() - 1] =
211
400
            i == 0 ? Word::prefix(cs[larger], sizeDiff)
212
                   : Word::suffix(cs[larger], sizeDiff);
213
200
        c[1 - larger].pop_back();
214
200
        changed = true;
215
      }
216
    }
217
390834
    for (unsigned j = 0; j < 2; j++)
218
    {
219
260556
      std::reverse(c[j].begin(), c[j].end());
220
    }
221
  }
222
65139
  if (changed)
223
  {
224
    // e.g. x++y = x++z ---> y = z, "AB" ++ x = "A" ++ y --> "B" ++ x = y
225
364
    Node s1 = utils::mkConcat(c[0], stype);
226
364
    Node s2 = utils::mkConcat(c[1], stype);
227
364
    if (s1 != node[0] || s2 != node[1])
228
    {
229
364
      new_ret = s1.eqNode(s2);
230
      // We generally don't apply the extended equality rewriter if the
231
      // original node was an equality but we may be able to do additional
232
      // rewriting here, e.g.,
233
      // x++y = "" --> x = "" and y = ""
234
364
      new_ret = returnRewrite(node, new_ret, Rewrite::STR_EQ_UNIFY);
235
364
      return rewriteStrEqualityExt(new_ret);
236
    }
237
  }
238
239
  // ------- homogeneous constants
240
194229
  for (unsigned i = 0; i < 2; i++)
241
  {
242
258960
    Node cn = d_stringsEntail.checkHomogeneousString(node[i]);
243
129506
    if (!cn.isNull() && !Word::isEmpty(cn))
244
    {
245
26845
      Assert(cn.isConst());
246
26845
      Assert(Word::getLength(cn) == 1);
247
248
      // The operands of the concat on each side of the equality without
249
      // constant strings
250
80535
      std::vector<Node> trimmed[2];
251
      // Counts the number of `cn`s on each side
252
26845
      size_t numCns[2] = {0, 0};
253
80522
      for (size_t j = 0; j < 2; j++)
254
      {
255
        // Sort the operands of the concats on both sides of the equality
256
        // (since both sides may only contain one char, the order does not
257
        // matter)
258
53684
        std::sort(c[j].begin(), c[j].end());
259
107485
        for (const Node& cc : c[j])
260
        {
261
53808
          if (cc.isConst())
262
          {
263
            // Count the number of `cn`s in the string constant and make
264
            // sure that all chars are `cn`s
265
53639
            std::vector<Node> veccc = Word::getChars(cc);
266
53025
            for (const Node& cv : veccc)
267
            {
268
26209
              if (cv != cn)
269
              {
270
                // This conflict case should mostly should be taken care of by
271
                // multiset reasoning in the strings rewriter, but we recognize
272
                // this conflict just in case.
273
7
                new_ret = nm->mkConst(false);
274
                return returnRewrite(
275
7
                    node, new_ret, Rewrite::STR_EQ_CONST_NHOMOG);
276
              }
277
26202
              numCns[j]++;
278
            }
279
          }
280
          else
281
          {
282
26985
            trimmed[j].push_back(cc);
283
          }
284
        }
285
      }
286
287
      // We have to remove the same number of `cn`s from both sides, so the
288
      // side with less `cn`s determines how many we can remove
289
26838
      size_t trimmedConst = std::min(numCns[0], numCns[1]);
290
80514
      for (size_t j = 0; j < 2; j++)
291
      {
292
53676
        size_t diff = numCns[j] - trimmedConst;
293
53676
        if (diff != 0)
294
        {
295
          // Add a constant string to the side with more `cn`s to restore
296
          // the difference in number of `cn`s
297
49004
          std::vector<Node> vec(diff, cn);
298
24502
          trimmed[j].push_back(Word::mkWordFlatten(vec));
299
        }
300
      }
301
302
53631
      Node lhs = utils::mkConcat(trimmed[i], stype);
303
53631
      Node ss = utils::mkConcat(trimmed[1 - i], stype);
304
26838
      if (lhs != node[i] || ss != node[1 - i])
305
      {
306
        // e.g.
307
        //  "AA" = y ++ x ---> "AA" = x ++ y if x < y
308
        //  "AAA" = y ++ "A" ++ z ---> "AA" = y ++ z
309
        //
310
        // We generally don't apply the extended equality rewriter if the
311
        // original node was an equality but we may be able to do additional
312
        // rewriting here.
313
45
        new_ret = lhs.eqNode(ss);
314
45
        new_ret = returnRewrite(node, new_ret, Rewrite::STR_EQ_HOMOG_CONST);
315
45
        return rewriteStrEqualityExt(new_ret);
316
      }
317
    }
318
  }
319
320
  // ------- rewrites for (= "" _)
321
129446
  Node empty = Word::mkEmptyWord(stype);
322
181094
  for (size_t i = 0; i < 2; i++)
323
  {
324
122942
    if (node[i] == empty)
325
    {
326
63513
      Node ne = node[1 - i];
327
35042
      if (ne.getKind() == STRING_REPLACE)
328
      {
329
        // (= "" (str.replace x y x)) ---> (= x "")
330
1058
        if (ne[0] == ne[2])
331
        {
332
56
          Node ret = nm->mkNode(EQUAL, ne[0], empty);
333
28
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_X_Y_X);
334
        }
335
336
        // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y "")))
337
1030
        if (d_stringsEntail.checkNonEmpty(ne[2]))
338
        {
339
          Node ret =
340
              nm->mkNode(AND,
341
184
                         nm->mkNode(EQUAL, ne[0], empty),
342
368
                         nm->mkNode(NOT, nm->mkNode(EQUAL, ne[1], empty)));
343
92
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
344
        }
345
346
        // (= "" (str.replace x "A" "")) ---> (str.prefix x "A")
347
938
        if (d_stringsEntail.checkLengthOne(ne[1], true) && ne[2] == empty)
348
        {
349
294
          Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]);
350
147
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
351
        }
352
      }
353
33984
      else if (ne.getKind() == STRING_SUBSTR)
354
      {
355
60436
        Node zero = nm->mkConst(Rational(0));
356
357
100110
        if (d_arithEntail.check(ne[1], false)
358
100110
            && d_arithEntail.check(ne[2], true))
359
        {
360
          // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0
361
1993
          if (ne[1] == zero)
362
          {
363
940
            Node ret = nm->mkNode(EQUAL, ne[0], empty);
364
470
            return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
365
          }
366
367
          // (= "" (str.substr x n m)) ---> (<= (str.len x) n)
368
          // if n >= 0 and m > 0
369
3046
          Node ret = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, ne[0]), ne[1]);
370
1523
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
371
        }
372
373
        // (= "" (str.substr "A" 0 z)) ---> (<= z 0)
374
31377
        if (d_stringsEntail.checkNonEmpty(ne[0]) && ne[1] == zero)
375
        {
376
8622
          Node ret = nm->mkNode(LEQ, ne[2], zero);
377
4311
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_Z);
378
        }
379
      }
380
    }
381
  }
382
383
  // ------- rewrites for (= (str.replace _ _ _) _)
384
174402
  for (size_t i = 0; i < 2; i++)
385
  {
386
116291
    if (node[i].getKind() == STRING_REPLACE)
387
    {
388
5261
      Node repl = node[i];
389
5261
      Node x = node[1 - i];
390
391
      // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x))
392
2651
      if (d_stringsEntail.checkNonEmpty(x) && repl[0] == empty)
393
      {
394
        Node ret = nm->mkNode(
395
10
            EQUAL, empty, nm->mkNode(STRING_REPLACE, x, repl[2], repl[1]));
396
5
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_EMP);
397
      }
398
399
      // (= x (str.replace y x y)) ---> (= x y)
400
2646
      if (repl[0] == repl[2] && x == repl[1])
401
      {
402
4
        Node ret = nm->mkNode(EQUAL, x, repl[0]);
403
2
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_EQ);
404
      }
405
406
      // (= x (str.replace x "A" "B")) ---> (not (str.contains x "A"))
407
2644
      if (x == repl[0])
408
      {
409
2668
        Node eq = Rewriter::rewrite(nm->mkNode(EQUAL, repl[1], repl[2]));
410
1349
        if (eq.isConst() && !eq.getConst<bool>())
411
        {
412
60
          Node ret = nm->mkNode(NOT, nm->mkNode(STRING_CONTAINS, x, repl[1]));
413
30
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_NOT_CTN);
414
        }
415
      }
416
417
      // (= (str.replace x y z) z) --> (or (= x y) (= x z))
418
      // if (str.len y) = (str.len z)
419
2614
      if (repl[2] == x)
420
      {
421
2504
        Node lenY = nm->mkNode(STRING_LENGTH, repl[1]);
422
2504
        Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]);
423
1254
        if (d_arithEntail.checkEq(lenY, lenZ))
424
        {
425
          Node ret = nm->mkNode(OR,
426
8
                                nm->mkNode(EQUAL, repl[0], repl[1]),
427
16
                                nm->mkNode(EQUAL, repl[0], repl[2]));
428
4
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_DIS);
429
        }
430
      }
431
    }
432
  }
433
434
  // Try to rewrite (= x y) into a conjunction of equalities based on length
435
  // entailment.
436
  //
437
  // (<= (str.len x) (str.++ y1 ... yn)) AND (= x (str.++ y1 ... yn)) --->
438
  //  (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
439
  //
440
  // where yi' and yi'' correspond to some yj and
441
  //   (<= (str.len x) (str.++ y1' ... ym'))
442
174027
  for (unsigned i = 0; i < 2; i++)
443
  {
444
116087
    if (node[1 - i].getKind() == STRING_CONCAT)
445
    {
446
887
      new_ret = d_stringsEntail.inferEqsFromContains(node[i], node[1 - i]);
447
887
      if (!new_ret.isNull())
448
      {
449
171
        return returnRewrite(node, new_ret, Rewrite::STR_EQ_CONJ_LEN_ENTAIL);
450
      }
451
    }
452
  }
453
454
57940
  if (node[0].getKind() == STRING_CONCAT && node[1].getKind() == STRING_CONCAT)
455
  {
456
    // (= (str.++ x_1 ... x_i x_{i + 1} ... x_n)
457
    //    (str.++ y_1 ... y_j y_{j + 1} ... y_m)) --->
458
    //  (and (= (str.++ x_1 ... x_i) (str.++ y_1 ... y_j))
459
    //       (= (str.++ x_{i + 1} ... x_n) (str.++ y_{j + 1} ... y_m)))
460
    //
461
    // if (str.len (str.++ x_1 ... x_i)) = (str.len (str.++ y_1 ... y_j))
462
    //
463
    // This rewrite performs length-based equality splitting: If we can show
464
    // that two prefixes have the same length, we can split an equality into
465
    // two equalities, one over the prefixes and another over the suffixes.
466
101
    std::vector<Node> v0, v1;
467
57
    utils::getConcat(node[0], v0);
468
57
    utils::getConcat(node[1], v1);
469
57
    size_t startRhs = 0;
470
268
    for (size_t i = 0, size0 = v0.size(); i <= size0; i++)
471
    {
472
435
      const std::vector<Node> pfxv0(v0.begin(), v0.begin() + i);
473
435
      Node pfx0 = utils::mkConcat(pfxv0, stype);
474
846
      for (size_t j = startRhs, size1 = v1.size(); j <= size1; j++)
475
      {
476
696
        if (!(i == 0 && j == 0) && !(i == v0.size() && j == v1.size()))
477
        {
478
1116
          std::vector<Node> pfxv1(v1.begin(), v1.begin() + j);
479
1116
          Node pfx1 = utils::mkConcat(pfxv1, stype);
480
1116
          Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0);
481
1116
          Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1);
482
483
595
          if (d_arithEntail.checkEq(lenPfx0, lenPfx1))
484
          {
485
18
            std::vector<Node> sfxv0(v0.begin() + i, v0.end());
486
18
            std::vector<Node> sfxv1(v1.begin() + j, v1.end());
487
            Node ret = nm->mkNode(kind::AND,
488
18
                                  pfx0.eqNode(pfx1),
489
18
                                  utils::mkConcat(sfxv0, stype)
490
36
                                      .eqNode(utils::mkConcat(sfxv1, stype)));
491
9
            return returnRewrite(node, ret, Rewrite::SPLIT_EQ);
492
          }
493
586
          else if (d_arithEntail.check(lenPfx1, lenPfx0, true))
494
          {
495
            // The prefix on the right-hand side is strictly longer than the
496
            // prefix on the left-hand side, so we try to strip the right-hand
497
            // prefix by the length of the left-hand prefix
498
            //
499
            // Example:
500
            // (= (str.++ "A" x y) (str.++ x "AB" z)) --->
501
            //   (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z)))
502
61
            std::vector<Node> rpfxv1;
503
61
            if (d_stringsEntail.stripSymbolicLength(
504
                    pfxv1, rpfxv1, 1, lenPfx0, true))
505
            {
506
              // The rewrite requires the full left-hand prefix length to be
507
              // stripped (otherwise we would have to keep parts of the
508
              // left-hand prefix).
509
              if (lenPfx0.isConst() && lenPfx0.getConst<Rational>().isZero())
510
              {
511
                std::vector<Node> sfxv0(v0.begin() + i, v0.end());
512
                pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end());
513
                Node ret =
514
                    nm->mkNode(kind::AND,
515
                               pfx0.eqNode(utils::mkConcat(rpfxv1, stype)),
516
                               utils::mkConcat(sfxv0, stype)
517
                                   .eqNode(utils::mkConcat(pfxv1, stype)));
518
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_R);
519
              }
520
            }
521
522
            // If the prefix of the right-hand side is (strictly) longer than
523
            // the prefix of the left-hand side, we can advance the left-hand
524
            // side (since the length of the right-hand side is only increasing
525
            // in the inner loop)
526
61
            break;
527
          }
528
525
          else if (d_arithEntail.check(lenPfx0, lenPfx1, true))
529
          {
530
            // The prefix on the left-hand side is strictly longer than the
531
            // prefix on the right-hand side, so we try to strip the left-hand
532
            // prefix by the length of the right-hand prefix
533
            //
534
            // Example:
535
            // (= (str.++ x "AB" z) (str.++ "A" x y)) --->
536
            //   (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y))
537
154
            std::vector<Node> sfxv0 = pfxv0;
538
154
            std::vector<Node> rpfxv0;
539
79
            if (d_stringsEntail.stripSymbolicLength(
540
                    sfxv0, rpfxv0, 1, lenPfx1, true))
541
            {
542
              // The rewrite requires the full right-hand prefix length to be
543
              // stripped (otherwise we would have to keep parts of the
544
              // right-hand prefix).
545
12
              if (lenPfx1.isConst() && lenPfx1.getConst<Rational>().isZero())
546
              {
547
4
                sfxv0.insert(sfxv0.end(), v0.begin() + i, v0.end());
548
8
                std::vector<Node> sfxv1(v1.begin() + j, v1.end());
549
                Node ret =
550
                    nm->mkNode(kind::AND,
551
8
                               utils::mkConcat(rpfxv0, stype).eqNode(pfx1),
552
8
                               utils::mkConcat(sfxv0, stype)
553
16
                                   .eqNode(utils::mkConcat(sfxv1, stype)));
554
4
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_L);
555
              }
556
            }
557
558
            // If the prefix of the left-hand side is (strictly) longer than
559
            // the prefix of the right-hand side, then we don't need to check
560
            // that right-hand prefix for future left-hand prefixes anymore
561
            // (since they are increasing in length)
562
75
            startRhs = j + 1;
563
          }
564
        }
565
      }
566
    }
567
  }
568
57927
  return node;
569
}
570
571
30682
Node SequencesRewriter::rewriteArithEqualityExt(Node node)
572
{
573
30682
  Assert(node.getKind() == EQUAL && node[0].getType().isInteger());
574
575
  // cases where we can solve the equality
576
577
  // notice we cannot rewrite str.to.int(x)=n to x="n" due to leading zeroes.
578
579
30682
  return node;
580
}
581
582
73044
Node SequencesRewriter::rewriteLength(Node node)
583
{
584
73044
  Assert(node.getKind() == STRING_LENGTH);
585
73044
  NodeManager* nm = NodeManager::currentNM();
586
73044
  Kind nk0 = node[0].getKind();
587
73044
  if (node[0].isConst())
588
  {
589
15476
    Node retNode = nm->mkConst(Rational(Word::getLength(node[0])));
590
7738
    return returnRewrite(node, retNode, Rewrite::LEN_EVAL);
591
  }
592
65306
  else if (nk0 == kind::STRING_CONCAT)
593
  {
594
5234
    Node tmpNode = node[0];
595
5234
    if (tmpNode.getKind() == kind::STRING_CONCAT)
596
    {
597
10468
      std::vector<Node> node_vec;
598
21182
      for (unsigned int i = 0; i < tmpNode.getNumChildren(); ++i)
599
      {
600
15948
        if (tmpNode[i].isConst())
601
        {
602
4318
          node_vec.push_back(
603
8636
              nm->mkConst(Rational(Word::getLength(tmpNode[i]))));
604
        }
605
        else
606
        {
607
11630
          node_vec.push_back(NodeManager::currentNM()->mkNode(
608
              kind::STRING_LENGTH, tmpNode[i]));
609
        }
610
      }
611
10468
      Node retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec);
612
5234
      return returnRewrite(node, retNode, Rewrite::LEN_CONCAT);
613
    }
614
  }
615
60072
  else if (nk0 == STRING_REPLACE || nk0 == STRING_REPLACE_ALL)
616
  {
617
1985
    Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1]));
618
1985
    Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2]));
619
1021
    if (len1 == len2)
620
    {
621
      // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x )
622
114
      Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
623
57
      return returnRewrite(node, retNode, Rewrite::LEN_REPL_INV);
624
964
    }
625
  }
626
59051
  else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER || nk0 == STRING_REV
627
58959
           || nk0 == STRING_UPDATE)
628
  {
629
    // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev.
630
    // len( update( x, n, y ) ) = len( x )
631
220
    Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
632
110
    return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV);
633
  }
634
58941
  else if (nk0 == SEQ_UNIT)
635
  {
636
190
    Node retNode = nm->mkConst(Rational(1));
637
95
    return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT);
638
  }
639
59810
  return node;
640
}
641
642
// TODO (#1180) add rewrite
643
//  str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) --->
644
//  str.substr( x, n1, n2+n3 )
645
66432
Node SequencesRewriter::rewriteConcat(Node node)
646
{
647
66432
  Assert(node.getKind() == kind::STRING_CONCAT);
648
132864
  Trace("strings-rewrite-debug")
649
66432
      << "Strings::rewriteConcat start " << node << std::endl;
650
132864
  std::vector<Node> node_vec;
651
132864
  Node preNode = Node::null();
652
291071
  for (Node tmpNode : node)
653
  {
654
224639
    if (tmpNode.getKind() == STRING_CONCAT)
655
    {
656
949
      unsigned j = 0;
657
      // combine the first term with the previous constant if applicable
658
949
      if (!preNode.isNull())
659
      {
660
217
        if (tmpNode[0].isConst())
661
        {
662
148
          std::vector<Node> wvec;
663
74
          wvec.push_back(preNode);
664
74
          wvec.push_back(tmpNode[0]);
665
74
          preNode = Word::mkWordFlatten(wvec);
666
74
          node_vec.push_back(preNode);
667
        }
668
        else
669
        {
670
143
          node_vec.push_back(preNode);
671
143
          node_vec.push_back(tmpNode[0]);
672
        }
673
217
        preNode = Node::null();
674
217
        ++j;
675
      }
676
      // insert the middle terms to node_vec
677
949
      if (j <= tmpNode.getNumChildren() - 1)
678
      {
679
949
        node_vec.insert(node_vec.end(), tmpNode.begin() + j, tmpNode.end() - 1);
680
      }
681
      // take the last term as the current
682
949
      tmpNode = tmpNode[tmpNode.getNumChildren() - 1];
683
    }
684
224639
    if (!tmpNode.isConst())
685
    {
686
119938
      if (!preNode.isNull())
687
      {
688
24867
        if (preNode.isConst() && !Word::isEmpty(preNode))
689
        {
690
23974
          node_vec.push_back(preNode);
691
        }
692
24867
        preNode = Node::null();
693
      }
694
119938
      node_vec.push_back(tmpNode);
695
    }
696
    else
697
    {
698
104701
      if (preNode.isNull())
699
      {
700
55591
        preNode = tmpNode;
701
      }
702
      else
703
      {
704
98220
        std::vector<Node> vec;
705
49110
        vec.push_back(preNode);
706
49110
        vec.push_back(tmpNode);
707
49110
        preNode = Word::mkWordFlatten(vec);
708
      }
709
    }
710
  }
711
66432
  if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode)))
712
  {
713
29446
    node_vec.push_back(preNode);
714
  }
715
716
  // Sort adjacent operands in str.++ that all result in the same string or the
717
  // empty string.
718
  //
719
  // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) -->
720
  // (str.++ ... [sort those 3 arguments] ... )
721
66432
  size_t lastIdx = 0;
722
132864
  Node lastX;
723
241738
  for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++)
724
  {
725
350612
    Node s = d_stringsEntail.getStringOrEmpty(node_vec[i]);
726
175306
    bool nextX = false;
727
175306
    if (s != lastX)
728
    {
729
170875
      nextX = true;
730
    }
731
732
175306
    if (nextX)
733
    {
734
170875
      std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i);
735
170875
      lastX = s;
736
170875
      lastIdx = i;
737
    }
738
  }
739
66432
  std::sort(node_vec.begin() + lastIdx, node_vec.end());
740
741
132864
  TypeNode tn = node.getType();
742
132864
  Node retNode = utils::mkConcat(node_vec, tn);
743
132864
  Trace("strings-rewrite-debug")
744
66432
      << "Strings::rewriteConcat end " << retNode << std::endl;
745
66432
  if (retNode != node)
746
  {
747
18214
    return returnRewrite(node, retNode, Rewrite::CONCAT_NORM);
748
  }
749
48218
  return node;
750
}
751
752
3184
Node SequencesRewriter::rewriteConcatRegExp(TNode node)
753
{
754
3184
  Assert(node.getKind() == kind::REGEXP_CONCAT);
755
3184
  NodeManager* nm = NodeManager::currentNM();
756
6368
  Trace("strings-rewrite-debug")
757
3184
      << "Strings::rewriteConcatRegExp flatten " << node << std::endl;
758
6368
  Node retNode = node;
759
6368
  std::vector<Node> vec;
760
3184
  bool changed = false;
761
6368
  Node emptyRe;
762
763
  // get the string type that are members of this regular expression
764
6368
  TypeNode rtype = node.getType();
765
6368
  TypeNode stype;
766
3184
  if (rtype.isRegExp())
767
  {
768
    // standard regular expressions are for strings
769
3184
    stype = nm->stringType();
770
  }
771
  else
772
  {
773
    Unimplemented();
774
  }
775
776
12386
  for (const Node& c : node)
777
  {
778
9238
    if (c.getKind() == REGEXP_CONCAT)
779
    {
780
218
      changed = true;
781
698
      for (const Node& cc : c)
782
      {
783
480
        vec.push_back(cc);
784
      }
785
    }
786
21834
    else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst()
787
21628
             && Word::isEmpty(c[0]))
788
    {
789
138
      changed = true;
790
138
      emptyRe = c;
791
    }
792
8882
    else if (c.getKind() == REGEXP_EMPTY)
793
    {
794
      // re.++( ..., empty, ... ) ---> empty
795
72
      Node ret = nm->mkNode(REGEXP_EMPTY);
796
36
      return returnRewrite(node, ret, Rewrite::RE_CONCAT_EMPTY);
797
    }
798
    else
799
    {
800
8846
      vec.push_back(c);
801
    }
802
  }
803
3148
  if (changed)
804
  {
805
    // flatten
806
    // this handles nested re.++ and elimination or str.to.re(""), e.g.:
807
    // re.++( re.++( R1, R2 ), str.to.re(""), R3 ) ---> re.++( R1, R2, R3 )
808
338
    if (vec.empty())
809
    {
810
2
      Assert(!emptyRe.isNull());
811
2
      retNode = emptyRe;
812
    }
813
    else
814
    {
815
336
      retNode = vec.size() == 1 ? vec[0] : nm->mkNode(REGEXP_CONCAT, vec);
816
    }
817
338
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_FLATTEN);
818
  }
819
5620
  Trace("strings-rewrite-debug")
820
2810
      << "Strings::rewriteConcatRegExp start " << node << std::endl;
821
5620
  std::vector<Node> cvec;
822
  // the current accumulation of constant strings
823
5620
  std::vector<Node> preReStr;
824
  // whether the last component was (_)*
825
2810
  bool lastAllStar = false;
826
5620
  String emptyStr = String("");
827
  // this loop checks to see if components can be combined or dropped
828
14042
  for (unsigned i = 0, size = vec.size(); i <= size; i++)
829
  {
830
22464
    Node curr;
831
11232
    if (i < size)
832
    {
833
8422
      curr = vec[i];
834
8422
      Assert(curr.getKind() != REGEXP_CONCAT);
835
    }
836
    // update preReStr
837
11232
    if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP)
838
    {
839
3422
      lastAllStar = false;
840
3422
      preReStr.push_back(curr[0]);
841
3422
      curr = Node::null();
842
    }
843
7810
    else if (!preReStr.empty())
844
    {
845
2964
      Assert(!lastAllStar);
846
      // this groups consecutive strings a++b ---> ab
847
5928
      Node acc = nm->mkNode(STRING_TO_REGEXP, utils::mkConcat(preReStr, stype));
848
2964
      cvec.push_back(acc);
849
2964
      preReStr.clear();
850
    }
851
4846
    else if (!curr.isNull() && lastAllStar)
852
    {
853
      // if empty, drop it
854
      // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)*
855
324
      if (RegExpEntail::isConstRegExp(curr)
856
324
          && RegExpEntail::testConstStringInRegExp(emptyStr, 0, curr))
857
      {
858
44
        curr = Node::null();
859
      }
860
    }
861
11232
    if (!curr.isNull())
862
    {
863
4956
      lastAllStar = false;
864
4956
      if (curr.getKind() == REGEXP_STAR)
865
      {
866
        // we can group stars (a)* ++ (a)* ---> (a)*
867
2514
        if (!cvec.empty() && cvec.back() == curr)
868
        {
869
8
          curr = Node::null();
870
        }
871
2506
        else if (curr[0].getKind() == REGEXP_SIGMA)
872
        {
873
1128
          Assert(!lastAllStar);
874
1128
          lastAllStar = true;
875
          // go back and remove empty ones from back of cvec
876
          // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)*
877
3242
          while (!cvec.empty() && RegExpEntail::isConstRegExp(cvec.back())
878
4348
                 && RegExpEntail::testConstStringInRegExp(
879
940
                     emptyStr, 0, cvec.back()))
880
          {
881
8
            cvec.pop_back();
882
          }
883
        }
884
      }
885
    }
886
11232
    if (!curr.isNull())
887
    {
888
4948
      cvec.push_back(curr);
889
    }
890
  }
891
2810
  Assert(!cvec.empty());
892
2810
  retNode = utils::mkConcat(cvec, rtype);
893
2810
  if (retNode != node)
894
  {
895
    // handles all cases where consecutive re constants are combined or
896
    // dropped as described in the loop above.
897
200
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT);
898
  }
899
900
  // flipping adjacent star arguments
901
2610
  changed = false;
902
7450
  for (size_t i = 0, size = cvec.size() - 1; i < size; i++)
903
  {
904
4840
    if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1])
905
    {
906
      // by convention, flip the order (a*)++a ---> a++(a*)
907
64
      std::swap(cvec[i], cvec[i + 1]);
908
64
      changed = true;
909
    }
910
  }
911
2610
  if (changed)
912
  {
913
48
    retNode = utils::mkConcat(cvec, rtype);
914
48
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_OPT);
915
  }
916
2562
  return node;
917
}
918
919
1254
Node SequencesRewriter::rewriteStarRegExp(TNode node)
920
{
921
1254
  Assert(node.getKind() == REGEXP_STAR);
922
1254
  NodeManager* nm = NodeManager::currentNM();
923
2508
  Node retNode = node;
924
1254
  if (node[0].getKind() == REGEXP_STAR)
925
  {
926
    // ((R)*)* ---> R*
927
4
    return returnRewrite(node, node[0], Rewrite::RE_STAR_NESTED_STAR);
928
  }
929
4242
  else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst()
930
4182
           && Word::isEmpty(node[0][0]))
931
  {
932
    // ("")* ---> ""
933
18
    return returnRewrite(node, node[0], Rewrite::RE_STAR_EMPTY_STRING);
934
  }
935
1232
  else if (node[0].getKind() == REGEXP_EMPTY)
936
  {
937
    // (empty)* ---> ""
938
8
    retNode = nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")));
939
8
    return returnRewrite(node, retNode, Rewrite::RE_STAR_EMPTY);
940
  }
941
1224
  else if (node[0].getKind() == REGEXP_UNION)
942
  {
943
    // simplification of unions under star
944
272
    if (RegExpEntail::hasEpsilonNode(node[0]))
945
    {
946
38
      bool changed = false;
947
38
      std::vector<Node> node_vec;
948
120
      for (const Node& nc : node[0])
949
      {
950
240
        if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst()
951
236
            && Word::isEmpty(nc[0]))
952
        {
953
          // can be removed
954
38
          changed = true;
955
        }
956
        else
957
        {
958
44
          node_vec.push_back(nc);
959
        }
960
      }
961
38
      if (changed)
962
      {
963
38
        retNode = node_vec.size() == 1 ? node_vec[0]
964
                                       : nm->mkNode(REGEXP_UNION, node_vec);
965
38
        retNode = nm->mkNode(REGEXP_STAR, retNode);
966
        // simplification of union beneath star based on loop above
967
        // for example, ( "" | "a" )* ---> ("a")*
968
38
        return returnRewrite(node, retNode, Rewrite::RE_STAR_UNION);
969
      }
970
    }
971
  }
972
1186
  return node;
973
}
974
975
1235
Node SequencesRewriter::rewriteAndOrRegExp(TNode node)
976
{
977
1235
  Kind nk = node.getKind();
978
1235
  Assert(nk == REGEXP_UNION || nk == REGEXP_INTER);
979
2470
  Trace("strings-rewrite-debug")
980
1235
      << "Strings::rewriteAndOrRegExp start " << node << std::endl;
981
2470
  std::vector<Node> node_vec;
982
2470
  std::vector<Node> polRegExp[2];
983
3952
  for (const Node& ni : node)
984
  {
985
2727
    if (ni.getKind() == nk)
986
    {
987
42
      for (const Node& nic : ni)
988
      {
989
28
        if (std::find(node_vec.begin(), node_vec.end(), nic) == node_vec.end())
990
        {
991
26
          node_vec.push_back(nic);
992
        }
993
      }
994
    }
995
2713
    else if (ni.getKind() == REGEXP_EMPTY)
996
    {
997
69
      if (nk == REGEXP_INTER)
998
      {
999
2
        return returnRewrite(node, ni, Rewrite::RE_AND_EMPTY);
1000
      }
1001
      // otherwise, can ignore
1002
    }
1003
2644
    else if (ni.getKind() == REGEXP_STAR && ni[0].getKind() == REGEXP_SIGMA)
1004
    {
1005
16
      if (nk == REGEXP_UNION)
1006
      {
1007
8
        return returnRewrite(node, ni, Rewrite::RE_OR_ALL);
1008
      }
1009
      // otherwise, can ignore
1010
    }
1011
2628
    else if (std::find(node_vec.begin(), node_vec.end(), ni) == node_vec.end())
1012
    {
1013
2590
      node_vec.push_back(ni);
1014
2590
      uint32_t pindex = ni.getKind() == REGEXP_COMPLEMENT ? 1 : 0;
1015
5180
      Node nia = pindex == 1 ? ni[0] : ni;
1016
2590
      polRegExp[pindex].push_back(nia);
1017
    }
1018
  }
1019
1225
  NodeManager* nm = NodeManager::currentNM();
1020
  // use inclusion tests
1021
1462
  for (const Node& negMem : polRegExp[1])
1022
  {
1023
397
    for (const Node& posMem : polRegExp[0])
1024
    {
1025
304
      Node m1 = nk == REGEXP_INTER ? negMem : posMem;
1026
304
      Node m2 = nk == REGEXP_INTER ? posMem : negMem;
1027
      // inclusion test for conflicting case m1 contains m2
1028
      // (re.inter (re.comp R1) R2) --> re.none where R1 includes R2
1029
      // (re.union R1 (re.comp R2)) --> (re.* re.allchar) where R1 includes R2
1030
160
      if (RegExpEntail::regExpIncludes(m1, m2))
1031
      {
1032
32
        Node retNode;
1033
16
        if (nk == REGEXP_INTER)
1034
        {
1035
16
          retNode = nm->mkNode(kind::REGEXP_EMPTY);
1036
        }
1037
        else
1038
        {
1039
          retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1040
        }
1041
16
        return returnRewrite(node, retNode, Rewrite::RE_ANDOR_INC_CONFLICT);
1042
      }
1043
    }
1044
  }
1045
2418
  Node retNode;
1046
1209
  if (node_vec.empty())
1047
  {
1048
    if (nk == REGEXP_INTER)
1049
    {
1050
      retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1051
    }
1052
    else
1053
    {
1054
      retNode = nm->mkNode(kind::REGEXP_EMPTY);
1055
    }
1056
  }
1057
  else
1058
  {
1059
1209
    retNode = node_vec.size() == 1 ? node_vec[0] : nm->mkNode(nk, node_vec);
1060
  }
1061
1209
  if (retNode != node)
1062
  {
1063
    // flattening and removing children, based on loop above
1064
107
    return returnRewrite(node, retNode, Rewrite::RE_ANDOR_FLATTEN);
1065
  }
1066
1102
  return node;
1067
}
1068
1069
32
Node SequencesRewriter::rewriteLoopRegExp(TNode node)
1070
{
1071
32
  Assert(node.getKind() == REGEXP_LOOP);
1072
64
  Node retNode = node;
1073
64
  Node r = node[0];
1074
32
  if (r.getKind() == REGEXP_STAR)
1075
  {
1076
    return returnRewrite(node, r, Rewrite::RE_LOOP_STAR);
1077
  }
1078
32
  NodeManager* nm = NodeManager::currentNM();
1079
64
  cvc5::Rational rMaxInt(String::maxSize());
1080
32
  uint32_t l = utils::getLoopMinOccurrences(node);
1081
64
  std::vector<Node> vec_nodes;
1082
122
  for (unsigned i = 0; i < l; i++)
1083
  {
1084
90
    vec_nodes.push_back(r);
1085
  }
1086
  Node n =
1087
32
      vec_nodes.size() == 0
1088
36
          ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")))
1089
68
          : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes);
1090
32
  uint32_t u = utils::getLoopMaxOccurrences(node);
1091
32
  if (u < l)
1092
  {
1093
8
    std::vector<Node> nvec;
1094
4
    retNode = nm->mkNode(REGEXP_EMPTY, nvec);
1095
  }
1096
28
  else if (u == l)
1097
  {
1098
10
    retNode = n;
1099
  }
1100
  else
1101
  {
1102
36
    std::vector<Node> vec2;
1103
18
    vec2.push_back(n);
1104
36
    TypeNode rtype = nm->regExpType();
1105
80
    for (uint32_t j = l; j < u; j++)
1106
    {
1107
62
      vec_nodes.push_back(r);
1108
62
      n = utils::mkConcat(vec_nodes, rtype);
1109
62
      vec2.push_back(n);
1110
    }
1111
18
    retNode = nm->mkNode(REGEXP_UNION, vec2);
1112
  }
1113
64
  Trace("strings-lp") << "Strings::lp " << node << " => " << retNode
1114
32
                      << std::endl;
1115
32
  if (retNode != node)
1116
  {
1117
32
    return returnRewrite(node, retNode, Rewrite::RE_LOOP);
1118
  }
1119
  return node;
1120
}
1121
1122
4
Node SequencesRewriter::rewriteRepeatRegExp(TNode node)
1123
{
1124
4
  Assert(node.getKind() == REGEXP_REPEAT);
1125
4
  NodeManager* nm = NodeManager::currentNM();
1126
  // ((_ re.^ n) R) --> ((_ re.loop n n) R)
1127
4
  unsigned r = utils::getRepeatAmount(node);
1128
8
  Node lop = nm->mkConst(RegExpLoop(r, r));
1129
8
  Node retNode = nm->mkNode(REGEXP_LOOP, lop, node[0]);
1130
8
  return returnRewrite(node, retNode, Rewrite::RE_REPEAT_ELIM);
1131
}
1132
1133
36
Node SequencesRewriter::rewriteOptionRegExp(TNode node)
1134
{
1135
36
  Assert(node.getKind() == REGEXP_OPT);
1136
36
  NodeManager* nm = NodeManager::currentNM();
1137
  Node retNode =
1138
      nm->mkNode(REGEXP_UNION,
1139
72
                 nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))),
1140
144
                 node[0]);
1141
72
  return returnRewrite(node, retNode, Rewrite::RE_OPT_ELIM);
1142
}
1143
1144
55
Node SequencesRewriter::rewritePlusRegExp(TNode node)
1145
{
1146
55
  Assert(node.getKind() == REGEXP_PLUS);
1147
55
  NodeManager* nm = NodeManager::currentNM();
1148
  Node retNode =
1149
110
      nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0]));
1150
110
  return returnRewrite(node, retNode, Rewrite::RE_PLUS_ELIM);
1151
}
1152
1153
66
Node SequencesRewriter::rewriteDifferenceRegExp(TNode node)
1154
{
1155
66
  Assert(node.getKind() == REGEXP_DIFF);
1156
66
  NodeManager* nm = NodeManager::currentNM();
1157
  Node retNode =
1158
132
      nm->mkNode(REGEXP_INTER, node[0], nm->mkNode(REGEXP_COMPLEMENT, node[1]));
1159
132
  return returnRewrite(node, retNode, Rewrite::RE_DIFF_ELIM);
1160
}
1161
1162
406
Node SequencesRewriter::rewriteRangeRegExp(TNode node)
1163
{
1164
406
  Assert(node.getKind() == REGEXP_RANGE);
1165
  unsigned ch[2];
1166
1218
  for (size_t i = 0; i < 2; ++i)
1167
  {
1168
812
    if (!node[i].isConst() || node[i].getConst<String>().size() != 1)
1169
    {
1170
      // not applied to characters, it is not handled
1171
      return node;
1172
    }
1173
812
    ch[i] = node[i].getConst<String>().front();
1174
  }
1175
1176
406
  NodeManager* nm = NodeManager::currentNM();
1177
406
  if (node[0] == node[1])
1178
  {
1179
    Node retNode = nm->mkNode(STRING_TO_REGEXP, node[0]);
1180
    // re.range( "A", "A" ) ---> str.to_re( "A" )
1181
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_SINGLE);
1182
  }
1183
1184
406
  if (ch[0] > ch[1])
1185
  {
1186
    // re.range( "B", "A" ) ---> re.none
1187
4
    Node retNode = nm->mkNode(REGEXP_EMPTY);
1188
2
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_EMPTY);
1189
  }
1190
404
  return node;
1191
}
1192
1193
13270
Node SequencesRewriter::rewriteMembership(TNode node)
1194
{
1195
13270
  NodeManager* nm = NodeManager::currentNM();
1196
26540
  Node x = node[0];
1197
26540
  Node r = node[1];
1198
1199
26540
  TypeNode stype = x.getType();
1200
26540
  TypeNode rtype = r.getType();
1201
1202
13270
  if(r.getKind() == kind::REGEXP_EMPTY)
1203
  {
1204
148
    Node retNode = NodeManager::currentNM()->mkConst(false);
1205
74
    return returnRewrite(node, retNode, Rewrite::RE_IN_EMPTY);
1206
  }
1207
13196
  else if (x.isConst() && RegExpEntail::isConstRegExp(r))
1208
  {
1209
    // test whether x in node[1]
1210
3690
    cvc5::String s = x.getConst<String>();
1211
1845
    bool test = RegExpEntail::testConstStringInRegExp(s, 0, r);
1212
3690
    Node retNode = NodeManager::currentNM()->mkConst(test);
1213
1845
    return returnRewrite(node, retNode, Rewrite::RE_IN_EVAL);
1214
  }
1215
11351
  else if (r.getKind() == kind::REGEXP_SIGMA)
1216
  {
1217
188
    Node one = nm->mkConst(Rational(1));
1218
188
    Node retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x));
1219
94
    return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA);
1220
  }
1221
11257
  else if (r.getKind() == kind::REGEXP_STAR)
1222
  {
1223
3738
    if (x.isConst())
1224
    {
1225
38
      size_t xlen = Word::getLength(x);
1226
38
      if (xlen == 0)
1227
      {
1228
32
        Node retNode = nm->mkConst(true);
1229
        // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true
1230
16
        return returnRewrite(node, retNode, Rewrite::RE_EMPTY_IN_STR_STAR);
1231
      }
1232
22
      else if (xlen == 1)
1233
      {
1234
22
        if (r[0].getKind() == STRING_TO_REGEXP)
1235
        {
1236
4
          Node retNode = r[0][0].eqNode(x);
1237
          // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x
1238
2
          return returnRewrite(node, retNode, Rewrite::RE_CHAR_IN_STR_STAR);
1239
        }
1240
      }
1241
    }
1242
3700
    else if (x.getKind() == STRING_CONCAT)
1243
    {
1244
      // (str.in.re (str.++ x1 ... xn) (re.* R)) -->
1245
      //   (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R))
1246
      //     if the length of all strings in R is one.
1247
1366
      Node flr = RegExpEntail::getFixedLengthForRegexp(r[0]);
1248
926
      if (!flr.isNull())
1249
      {
1250
554
        Node one = nm->mkConst(Rational(1));
1251
520
        if (flr == one)
1252
        {
1253
972
          NodeBuilder nb(AND);
1254
2035
          for (const Node& xc : x)
1255
          {
1256
1549
            nb << nm->mkNode(STRING_IN_REGEXP, xc, r);
1257
          }
1258
          return returnRewrite(
1259
486
              node, nb.constructNode(), Rewrite::RE_IN_DIST_CHAR_STAR);
1260
        }
1261
      }
1262
    }
1263
3234
    if (r[0].getKind() == kind::REGEXP_SIGMA)
1264
    {
1265
312
      Node retNode = NodeManager::currentNM()->mkConst(true);
1266
156
      return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA_STAR);
1267
    }
1268
  }
1269
7519
  else if (r.getKind() == kind::REGEXP_CONCAT)
1270
  {
1271
5325
    bool allSigma = true;
1272
5325
    bool allSigmaStrict = true;
1273
5325
    unsigned allSigmaMinSize = 0;
1274
10507
    Node constStr;
1275
5325
    size_t constIdx = 0;
1276
5325
    size_t nchildren = r.getNumChildren();
1277
10968
    for (size_t i = 0; i < nchildren; i++)
1278
    {
1279
15894
      Node rc = r[i];
1280
10251
      Assert(rc.getKind() != kind::REGEXP_EMPTY);
1281
10251
      if (rc.getKind() == kind::REGEXP_SIGMA)
1282
      {
1283
1001
        allSigmaMinSize++;
1284
      }
1285
9250
      else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA)
1286
      {
1287
2015
        allSigmaStrict = false;
1288
      }
1289
7235
      else if (rc.getKind() == STRING_TO_REGEXP)
1290
      {
1291
3559
        if (constStr.isNull())
1292
        {
1293
2627
          constStr = rc[0];
1294
2627
          constIdx = i;
1295
        }
1296
        else
1297
        {
1298
932
          allSigma = false;
1299
932
          break;
1300
        }
1301
      }
1302
      else
1303
      {
1304
3676
        allSigma = false;
1305
3676
        break;
1306
      }
1307
    }
1308
5325
    if (allSigma)
1309
    {
1310
717
      if (constStr.isNull())
1311
      {
1312
        // x in re.++(_*, _, _) ---> str.len(x) >= 2
1313
40
        Node num = nm->mkConst(Rational(allSigmaMinSize));
1314
40
        Node lenx = nm->mkNode(STRING_LENGTH, x);
1315
40
        Node retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num);
1316
20
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_PURE_ALLCHAR);
1317
      }
1318
697
      else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0
1319
123
               && constIdx != nchildren - 1)
1320
      {
1321
        // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc")
1322
246
        Node retNode = nm->mkNode(STRING_CONTAINS, x, constStr);
1323
123
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_TO_CONTAINS);
1324
      }
1325
    }
1326
  }
1327
4388
  else if (r.getKind() == kind::REGEXP_INTER
1328
2194
           || r.getKind() == kind::REGEXP_UNION)
1329
  {
1330
1066
    std::vector<Node> mvec;
1331
1737
    for (unsigned i = 0; i < r.getNumChildren(); i++)
1332
    {
1333
1204
      mvec.push_back(
1334
2408
          NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r[i]));
1335
    }
1336
    Node retNode = NodeManager::currentNM()->mkNode(
1337
1066
        r.getKind() == kind::REGEXP_INTER ? kind::AND : kind::OR, mvec);
1338
533
    return returnRewrite(node, retNode, Rewrite::RE_IN_ANDOR);
1339
  }
1340
1661
  else if (r.getKind() == kind::STRING_TO_REGEXP)
1341
  {
1342
1766
    Node retNode = x.eqNode(r[0]);
1343
883
    return returnRewrite(node, retNode, Rewrite::RE_IN_CSTRING);
1344
  }
1345
778
  else if (r.getKind() == REGEXP_RANGE)
1346
  {
1347
    // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j
1348
    // we do not do this if the arguments are not constant
1349
521
    if (RegExpEntail::isConstRegExp(r))
1350
    {
1351
1042
      Node xcode = nm->mkNode(STRING_TO_CODE, x);
1352
      Node retNode =
1353
          nm->mkNode(AND,
1354
1042
                     nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode),
1355
2084
                     nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1])));
1356
521
      return returnRewrite(node, retNode, Rewrite::RE_IN_RANGE);
1357
    }
1358
  }
1359
257
  else if (r.getKind() == REGEXP_COMPLEMENT)
1360
  {
1361
386
    Node retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate();
1362
193
    return returnRewrite(node, retNode, Rewrite::RE_IN_COMPLEMENT);
1363
  }
1364
1365
  // do simple consumes
1366
16648
  Node retNode = node;
1367
8324
  if (r.getKind() == kind::REGEXP_STAR)
1368
  {
1369
9118
    for (unsigned dir = 0; dir <= 1; dir++)
1370
    {
1371
12188
      std::vector<Node> mchildren;
1372
6148
      utils::getConcat(x, mchildren);
1373
6148
      bool success = true;
1374
18616
      while (success)
1375
      {
1376
6246
        success = false;
1377
12480
        std::vector<Node> children;
1378
6246
        utils::getConcat(r[0], children);
1379
12480
        Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children, dir);
1380
6246
        if (!scn.isNull())
1381
        {
1382
24
          Trace("regexp-ext-rewrite")
1383
12
              << "Regexp star : const conflict : " << node << std::endl;
1384
12
          return returnRewrite(node, scn, Rewrite::RE_CONSUME_S_CCONF);
1385
        }
1386
6234
        else if (children.empty())
1387
        {
1388
          // fully consumed one copy of the STAR
1389
106
          if (mchildren.empty())
1390
          {
1391
            Trace("regexp-ext-rewrite")
1392
                << "Regexp star : full consume : " << node << std::endl;
1393
            Node ret = NodeManager::currentNM()->mkConst(true);
1394
            return returnRewrite(node, ret, Rewrite::RE_CONSUME_S_FULL);
1395
          }
1396
          else
1397
          {
1398
212
            Node prev = retNode;
1399
318
            retNode = nm->mkNode(
1400
212
                STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r);
1401
            // Iterate again if the node changed. It may not have changed if
1402
            // nothing was consumed from mchildren (e.g. if the body of the
1403
            // re.* accepts the empty string.
1404
106
            success = (retNode != prev);
1405
          }
1406
        }
1407
      }
1408
6136
      if (retNode != node)
1409
      {
1410
192
        Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node
1411
96
                                    << " -> " << retNode << std::endl;
1412
96
        return returnRewrite(node, retNode, Rewrite::RE_CONSUME_S);
1413
      }
1414
    }
1415
  }
1416
  else
1417
  {
1418
9488
    std::vector<Node> children;
1419
5246
    utils::getConcat(r, children);
1420
9488
    std::vector<Node> mchildren;
1421
5246
    utils::getConcat(x, mchildren);
1422
5246
    unsigned prevSize = children.size() + mchildren.size();
1423
9488
    Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children);
1424
5246
    if (!scn.isNull())
1425
    {
1426
208
      Trace("regexp-ext-rewrite")
1427
104
          << "Regexp : const conflict : " << node << std::endl;
1428
104
      return returnRewrite(node, scn, Rewrite::RE_CONSUME_CCONF);
1429
    }
1430
5142
    else if ((children.size() + mchildren.size()) != prevSize)
1431
    {
1432
      // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm),
1433
      // above, we strip components to construct an equivalent membership:
1434
      // (str.++ xi .. xj) in (re.++ rk ... rl).
1435
1800
      Node xn = utils::mkConcat(mchildren, stype);
1436
1800
      Node emptyStr = Word::mkEmptyWord(stype);
1437
900
      if (children.empty())
1438
      {
1439
        // If we stripped all components on the right, then the left is
1440
        // equal to the empty string.
1441
        // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "")
1442
62
        retNode = xn.eqNode(emptyStr);
1443
      }
1444
      else
1445
      {
1446
        // otherwise, construct the updated regular expression
1447
838
        retNode =
1448
1676
            nm->mkNode(STRING_IN_REGEXP, xn, utils::mkConcat(children, rtype));
1449
      }
1450
1800
      Trace("regexp-ext-rewrite")
1451
900
          << "Regexp : rewrite : " << node << " -> " << retNode << std::endl;
1452
900
      return returnRewrite(node, retNode, Rewrite::RE_SIMPLE_CONSUME);
1453
    }
1454
  }
1455
7212
  return node;
1456
}
1457
1458
470407
RewriteResponse SequencesRewriter::postRewrite(TNode node)
1459
{
1460
940814
  Trace("sequences-postrewrite")
1461
470407
      << "Strings::SequencesRewriter::postRewrite start " << node << std::endl;
1462
940814
  Node retNode = node;
1463
470407
  Kind nk = node.getKind();
1464
470407
  if (nk == kind::STRING_CONCAT)
1465
  {
1466
66432
    retNode = rewriteConcat(node);
1467
  }
1468
403975
  else if (nk == kind::EQUAL)
1469
  {
1470
174339
    retNode = rewriteEquality(node);
1471
  }
1472
229636
  else if (nk == kind::STRING_LENGTH)
1473
  {
1474
73044
    retNode = rewriteLength(node);
1475
  }
1476
156592
  else if (nk == kind::STRING_CHARAT)
1477
  {
1478
113
    retNode = rewriteCharAt(node);
1479
  }
1480
156479
  else if (nk == kind::STRING_SUBSTR)
1481
  {
1482
54264
    retNode = rewriteSubstr(node);
1483
  }
1484
102215
  else if (nk == kind::STRING_UPDATE)
1485
  {
1486
185
    retNode = rewriteUpdate(node);
1487
  }
1488
102030
  else if (nk == kind::STRING_CONTAINS)
1489
  {
1490
37935
    retNode = rewriteContains(node);
1491
  }
1492
64095
  else if (nk == kind::STRING_INDEXOF)
1493
  {
1494
3165
    retNode = rewriteIndexof(node);
1495
  }
1496
60930
  else if (nk == kind::STRING_INDEXOF_RE)
1497
  {
1498
890
    retNode = rewriteIndexofRe(node);
1499
  }
1500
60040
  else if (nk == kind::STRING_REPLACE)
1501
  {
1502
4024
    retNode = rewriteReplace(node);
1503
  }
1504
56016
  else if (nk == kind::STRING_REPLACE_ALL)
1505
  {
1506
203
    retNode = rewriteReplaceAll(node);
1507
  }
1508
55813
  else if (nk == kind::STRING_REPLACE_RE)
1509
  {
1510
436
    retNode = rewriteReplaceRe(node);
1511
  }
1512
55377
  else if (nk == kind::STRING_REPLACE_RE_ALL)
1513
  {
1514
213
    retNode = rewriteReplaceReAll(node);
1515
  }
1516
55164
  else if (nk == STRING_REV)
1517
  {
1518
241
    retNode = rewriteStrReverse(node);
1519
  }
1520
54923
  else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX)
1521
  {
1522
185
    retNode = rewritePrefixSuffix(node);
1523
  }
1524
54738
  else if (nk == kind::STRING_IN_REGEXP)
1525
  {
1526
13270
    retNode = rewriteMembership(node);
1527
  }
1528
41468
  else if (nk == REGEXP_CONCAT)
1529
  {
1530
3184
    retNode = rewriteConcatRegExp(node);
1531
  }
1532
38284
  else if (nk == REGEXP_UNION || nk == REGEXP_INTER)
1533
  {
1534
1235
    retNode = rewriteAndOrRegExp(node);
1535
  }
1536
37049
  else if (nk == REGEXP_DIFF)
1537
  {
1538
66
    retNode = rewriteDifferenceRegExp(node);
1539
  }
1540
36983
  else if (nk == REGEXP_STAR)
1541
  {
1542
1254
    retNode = rewriteStarRegExp(node);
1543
  }
1544
35729
  else if (nk == REGEXP_PLUS)
1545
  {
1546
55
    retNode = rewritePlusRegExp(node);
1547
  }
1548
35674
  else if (nk == REGEXP_OPT)
1549
  {
1550
36
    retNode = rewriteOptionRegExp(node);
1551
  }
1552
35638
  else if (nk == REGEXP_RANGE)
1553
  {
1554
406
    retNode = rewriteRangeRegExp(node);
1555
  }
1556
35232
  else if (nk == REGEXP_LOOP)
1557
  {
1558
32
    retNode = rewriteLoopRegExp(node);
1559
  }
1560
35200
  else if (nk == REGEXP_REPEAT)
1561
  {
1562
4
    retNode = rewriteRepeatRegExp(node);
1563
  }
1564
35196
  else if (nk == SEQ_UNIT)
1565
  {
1566
389
    retNode = rewriteSeqUnit(node);
1567
  }
1568
34807
  else if (nk == SEQ_NTH || nk == SEQ_NTH_TOTAL)
1569
  {
1570
399
    retNode = rewriteSeqNth(node);
1571
  }
1572
1573
940814
  Trace("sequences-postrewrite")
1574
470407
      << "Strings::SequencesRewriter::postRewrite returning " << retNode
1575
470407
      << std::endl;
1576
470407
  if (node != retNode)
1577
  {
1578
    // also post process the rewrite, which may apply extended rewriting to
1579
    // equalities, if we rewrite to an equality from a non-equality
1580
107023
    retNode = postProcessRewrite(node, retNode);
1581
214046
    Trace("strings-rewrite-debug") << "Strings::SequencesRewriter::postRewrite "
1582
107023
                                   << node << " to " << retNode << std::endl;
1583
107023
    return RewriteResponse(REWRITE_AGAIN_FULL, retNode);
1584
  }
1585
363384
  Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
1586
363384
  return RewriteResponse(REWRITE_DONE, retNode);
1587
}
1588
1589
298775
RewriteResponse SequencesRewriter::preRewrite(TNode node)
1590
{
1591
298775
  return RewriteResponse(REWRITE_DONE, node);
1592
}
1593
1594
876
TrustNode SequencesRewriter::expandDefinition(Node node)
1595
{
1596
1752
  Trace("strings-exp-def") << "SequencesRewriter::expandDefinition : " << node
1597
876
                           << std::endl;
1598
1599
876
  if (node.getKind() == kind::SEQ_NTH)
1600
  {
1601
2
    NodeManager* nm = NodeManager::currentNM();
1602
4
    Node s = node[0];
1603
4
    Node n = node[1];
1604
    // seq.nth(s, n) --> ite(0 <= n < len(s), seq.nth_total(s,n), Uf(s, n))
1605
    Node cond = nm->mkNode(AND,
1606
4
                           nm->mkNode(LEQ, nm->mkConst(Rational(0)), n),
1607
8
                           nm->mkNode(LT, n, nm->mkNode(STRING_LENGTH, s)));
1608
4
    Node ss = nm->mkNode(SEQ_NTH_TOTAL, s, n);
1609
4
    Node uf = SkolemCache::mkSkolemSeqNth(s.getType(), "Uf");
1610
4
    Node u = nm->mkNode(APPLY_UF, uf, s, n);
1611
4
    Node ret = nm->mkNode(ITE, cond, ss, u);
1612
2
    Trace("strings-exp-def") << "...return " << ret << std::endl;
1613
2
    return TrustNode::mkTrustRewrite(node, ret, nullptr);
1614
  }
1615
874
  return TrustNode::null();
1616
}
1617
1618
399
Node SequencesRewriter::rewriteSeqNth(Node node)
1619
{
1620
399
  Assert(node.getKind() == SEQ_NTH || node.getKind() == SEQ_NTH_TOTAL);
1621
798
  Node s = node[0];
1622
798
  Node i = node[1];
1623
399
  if (s.isConst() && i.isConst())
1624
  {
1625
29
    size_t len = Word::getLength(s);
1626
29
    if (i.getConst<Rational>().sgn() != -1)
1627
    {
1628
27
      size_t pos = i.getConst<Rational>().getNumerator().toUnsignedInt();
1629
27
      if (pos < len)
1630
      {
1631
18
        std::vector<Node> elements = s.getConst<Sequence>().getVec();
1632
9
        const Node& ret = elements[pos];
1633
9
        return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL);
1634
      }
1635
    }
1636
20
    if (node.getKind() == SEQ_NTH_TOTAL)
1637
    {
1638
      // return arbitrary term
1639
4
      Node ret = s.getType().getSequenceElementType().mkGroundValue();
1640
2
      return returnRewrite(node, ret, Rewrite::SEQ_NTH_TOTAL_OOB);
1641
    }
1642
    else
1643
    {
1644
18
      return node;
1645
    }
1646
  }
1647
  else
1648
  {
1649
370
    return node;
1650
  }
1651
}
1652
1653
113
Node SequencesRewriter::rewriteCharAt(Node node)
1654
{
1655
113
  Assert(node.getKind() == STRING_CHARAT);
1656
113
  NodeManager* nm = NodeManager::currentNM();
1657
226
  Node one = nm->mkConst(Rational(1));
1658
226
  Node retNode = nm->mkNode(STRING_SUBSTR, node[0], node[1], one);
1659
226
  return returnRewrite(node, retNode, Rewrite::CHARAT_ELIM);
1660
}
1661
1662
54270
Node SequencesRewriter::rewriteSubstr(Node node)
1663
{
1664
54270
  Assert(node.getKind() == kind::STRING_SUBSTR);
1665
1666
54270
  NodeManager* nm = NodeManager::currentNM();
1667
54270
  if (node[0].isConst())
1668
  {
1669
16956
    if (Word::isEmpty(node[0]))
1670
    {
1671
2320
      Node ret = node[0];
1672
1160
      return returnRewrite(node, ret, Rewrite::SS_EMPTYSTR);
1673
    }
1674
    // rewriting for constant arguments
1675
15796
    if (node[1].isConst() && node[2].isConst())
1676
    {
1677
16064
      Node s = node[0];
1678
16064
      cvc5::Rational rMaxInt(String::maxSize());
1679
      uint32_t start;
1680
8032
      if (node[1].getConst<Rational>() > rMaxInt)
1681
      {
1682
        // start beyond the maximum size of strings
1683
        // thus, it must be beyond the end point of this string
1684
8
        Node ret = Word::mkEmptyWord(node.getType());
1685
4
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_MAX_OOB);
1686
      }
1687
8028
      else if (node[1].getConst<Rational>().sgn() < 0)
1688
      {
1689
        // start before the beginning of the string
1690
1464
        Node ret = Word::mkEmptyWord(node.getType());
1691
732
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_NEG);
1692
      }
1693
      else
1694
      {
1695
7296
        start = node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1696
7296
        if (start >= Word::getLength(node[0]))
1697
        {
1698
          // start beyond the end of the string
1699
804
          Node ret = Word::mkEmptyWord(node.getType());
1700
402
          return returnRewrite(node, ret, Rewrite::SS_CONST_START_OOB);
1701
        }
1702
      }
1703
6894
      if (node[2].getConst<Rational>() > rMaxInt)
1704
      {
1705
        // take up to the end of the string
1706
1
        size_t lenS = Word::getLength(s);
1707
2
        Node ret = Word::suffix(s, lenS - start);
1708
1
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_MAX_OOB);
1709
      }
1710
6893
      else if (node[2].getConst<Rational>().sgn() <= 0)
1711
      {
1712
734
        Node ret = Word::mkEmptyWord(node.getType());
1713
367
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_NON_POS);
1714
      }
1715
      else
1716
      {
1717
        uint32_t len =
1718
6526
            node[2].getConst<Rational>().getNumerator().toUnsignedInt();
1719
6526
        if (start + len > Word::getLength(node[0]))
1720
        {
1721
          // take up to the end of the string
1722
556
          size_t lenS = Word::getLength(s);
1723
1112
          Node ret = Word::suffix(s, lenS - start);
1724
556
          return returnRewrite(node, ret, Rewrite::SS_CONST_END_OOB);
1725
        }
1726
        else
1727
        {
1728
          // compute the substr using the constant string
1729
11940
          Node ret = Word::substr(s, start, len);
1730
5970
          return returnRewrite(node, ret, Rewrite::SS_CONST_SS);
1731
        }
1732
      }
1733
    }
1734
  }
1735
90156
  Node zero = nm->mkConst(cvc5::Rational(0));
1736
1737
  // if entailed non-positive length or negative start point
1738
45078
  if (d_arithEntail.check(zero, node[1], true))
1739
  {
1740
44
    Node ret = Word::mkEmptyWord(node.getType());
1741
22
    return returnRewrite(node, ret, Rewrite::SS_START_NEG);
1742
  }
1743
45056
  else if (d_arithEntail.check(zero, node[2]))
1744
  {
1745
1720
    Node ret = Word::mkEmptyWord(node.getType());
1746
860
    return returnRewrite(node, ret, Rewrite::SS_LEN_NON_POS);
1747
  }
1748
1749
44196
  if (node[0].getKind() == STRING_SUBSTR)
1750
  {
1751
    // (str.substr (str.substr x a b) c d) ---> "" if c >= b
1752
    //
1753
    // Note that this rewrite can be generalized to:
1754
    //
1755
    // (str.substr x a b) ---> "" if a >= (str.len x)
1756
    //
1757
    // This can be done when we generalize our entailment methods to
1758
    // accept an optional context. Then we could conjecture that
1759
    // (str.substr x a b) rewrites to "" and do a case analysis:
1760
    //
1761
    // - a < 0 or b < 0 (the result is trivially empty in these cases)
1762
    // - a >= (str.len x) assuming that { a >= 0, b >= 0 }
1763
    //
1764
    // For example, for (str.substr (str.substr x a a) a a), we could
1765
    // then deduce that under those assumptions, "a" is an
1766
    // over-approximation of the length of (str.substr x a a), which
1767
    // then allows us to reason that the result of the whole term must
1768
    // be empty.
1769
3418
    if (d_arithEntail.check(node[1], node[0][2]))
1770
    {
1771
102
      Node ret = Word::mkEmptyWord(node.getType());
1772
51
      return returnRewrite(node, ret, Rewrite::SS_START_GEQ_LEN);
1773
    }
1774
  }
1775
40778
  else if (node[0].getKind() == STRING_REPLACE)
1776
  {
1777
    // (str.substr (str.replace x y z) 0 n)
1778
    // 	 ---> (str.replace (str.substr x 0 n) y z)
1779
    // if (str.len y) = 1 and (str.len z) = 1
1780
510
    if (node[1] == zero)
1781
    {
1782
780
      if (d_stringsEntail.checkLengthOne(node[0][1], true)
1783
780
          && d_stringsEntail.checkLengthOne(node[0][2], true))
1784
      {
1785
        Node ret = nm->mkNode(
1786
            kind::STRING_REPLACE,
1787
24
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]),
1788
            node[0][1],
1789
48
            node[0][2]);
1790
12
        return returnRewrite(node, ret, Rewrite::SUBSTR_REPL_SWAP);
1791
      }
1792
    }
1793
  }
1794
1795
88266
  std::vector<Node> n1;
1796
44133
  utils::getConcat(node[0], n1);
1797
88266
  TypeNode stype = node.getType();
1798
1799
  // definite inclusion
1800
44133
  if (node[1] == zero)
1801
  {
1802
30641
    Node curr = node[2];
1803
30641
    std::vector<Node> childrenr;
1804
15871
    if (d_stringsEntail.stripSymbolicLength(n1, childrenr, 1, curr))
1805
    {
1806
1101
      if (curr != zero && !n1.empty())
1807
      {
1808
1302
        childrenr.push_back(nm->mkNode(
1809
868
            kind::STRING_SUBSTR, utils::mkConcat(n1, stype), node[1], curr));
1810
      }
1811
2202
      Node ret = utils::mkConcat(childrenr, stype);
1812
1101
      return returnRewrite(node, ret, Rewrite::SS_LEN_INCLUDE);
1813
    }
1814
  }
1815
1816
  // (str.substr s x x) ---> "" if (str.len s) <= 1
1817
43032
  if (node[1] == node[2] && d_stringsEntail.checkLengthOne(node[0]))
1818
  {
1819
28
    Node ret = Word::mkEmptyWord(node.getType());
1820
14
    return returnRewrite(node, ret, Rewrite::SS_LEN_ONE_Z_Z);
1821
  }
1822
1823
  // symbolic length analysis
1824
120902
  for (unsigned r = 0; r < 2; r++)
1825
  {
1826
    // the amount of characters we can strip
1827
160100
    Node curr;
1828
82216
    if (r == 0)
1829
    {
1830
43018
      if (node[1] != zero)
1831
      {
1832
        // strip up to start point off the start of the string
1833
28248
        curr = node[1];
1834
      }
1835
    }
1836
39198
    else if (r == 1)
1837
    {
1838
      Node tot_len =
1839
78280
          d_arithEntail.rewrite(nm->mkNode(kind::STRING_LENGTH, node[0]));
1840
      Node end_pt =
1841
78280
          d_arithEntail.rewrite(nm->mkNode(kind::PLUS, node[1], node[2]));
1842
39198
      if (node[2] != tot_len)
1843
      {
1844
37358
        if (d_arithEntail.check(node[2], tot_len))
1845
        {
1846
          // end point beyond end point of string, map to tot_len
1847
232
          Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len);
1848
116
          return returnRewrite(node, ret, Rewrite::SS_END_PT_NORM);
1849
        }
1850
        else
1851
        {
1852
          // strip up to ( str.len(node[0]) - end_pt ) off the end of the string
1853
37242
          curr =
1854
74484
              d_arithEntail.rewrite(nm->mkNode(kind::MINUS, tot_len, end_pt));
1855
        }
1856
      }
1857
    }
1858
82100
    if (!curr.isNull())
1859
    {
1860
      // strip off components while quantity is entailed positive
1861
65490
      int dir = r == 0 ? 1 : -1;
1862
126764
      std::vector<Node> childrenr;
1863
65490
      if (d_stringsEntail.stripSymbolicLength(n1, childrenr, dir, curr))
1864
      {
1865
4216
        if (r == 0)
1866
        {
1867
          Node ret = nm->mkNode(
1868
7640
              kind::STRING_SUBSTR, utils::mkConcat(n1, stype), curr, node[2]);
1869
3820
          return returnRewrite(node, ret, Rewrite::SS_STRIP_START_PT);
1870
        }
1871
        else
1872
        {
1873
          Node ret = nm->mkNode(kind::STRING_SUBSTR,
1874
792
                                utils::mkConcat(n1, stype),
1875
                                node[1],
1876
1584
                                node[2]);
1877
396
          return returnRewrite(node, ret, Rewrite::SS_STRIP_END_PT);
1878
        }
1879
      }
1880
    }
1881
  }
1882
  // combine substr
1883
38686
  if (node[0].getKind() == kind::STRING_SUBSTR)
1884
  {
1885
6016
    Node start_inner = node[0][1];
1886
6016
    Node start_outer = node[1];
1887
3192
    if (d_arithEntail.check(start_outer) && d_arithEntail.check(start_inner))
1888
    {
1889
      // both are positive
1890
      // thus, start point is definitely start_inner+start_outer.
1891
      // We can rewrite if it for certain what the length is
1892
1893
      // the length of a string from the inner substr subtracts the start point
1894
      // of the outer substr
1895
      Node len_from_inner = d_arithEntail.rewrite(
1896
3828
          nm->mkNode(kind::MINUS, node[0][2], start_outer));
1897
3828
      Node len_from_outer = node[2];
1898
3828
      Node new_len;
1899
      // take quantity that is for sure smaller than the other
1900
2098
      if (len_from_inner == len_from_outer)
1901
      {
1902
19
        new_len = len_from_inner;
1903
      }
1904
2079
      else if (d_arithEntail.check(len_from_inner, len_from_outer))
1905
      {
1906
208
        new_len = len_from_outer;
1907
      }
1908
1871
      else if (d_arithEntail.check(len_from_outer, len_from_inner))
1909
      {
1910
141
        new_len = len_from_inner;
1911
      }
1912
2098
      if (!new_len.isNull())
1913
      {
1914
736
        Node new_start = nm->mkNode(kind::PLUS, start_inner, start_outer);
1915
        Node ret =
1916
736
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], new_start, new_len);
1917
368
        return returnRewrite(node, ret, Rewrite::SS_COMBINE);
1918
      }
1919
    }
1920
  }
1921
38318
  return node;
1922
}
1923
1924
185
Node SequencesRewriter::rewriteUpdate(Node node)
1925
{
1926
185
  Assert(node.getKind() == kind::STRING_UPDATE);
1927
370
  Node s = node[0];
1928
185
  if (s.isConst())
1929
  {
1930
165
    if (Word::isEmpty(s))
1931
    {
1932
      return returnRewrite(node, s, Rewrite::UPD_EMPTYSTR);
1933
    }
1934
    // rewriting for constant arguments
1935
165
    if (node[1].isConst())
1936
    {
1937
145
      cvc5::Rational rMaxInt(String::maxSize());
1938
125
      if (node[1].getConst<Rational>() > rMaxInt)
1939
      {
1940
        // start beyond the maximum size of strings
1941
        // thus, it must be beyond the end point of this string
1942
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_MAX_OOB);
1943
      }
1944
125
      else if (node[1].getConst<Rational>().sgn() < 0)
1945
      {
1946
        // start before the beginning of the string
1947
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_NEG);
1948
      }
1949
      uint32_t start =
1950
125
          node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1951
125
      size_t len = Word::getLength(s);
1952
125
      if (start >= len)
1953
      {
1954
        // start beyond the end of the string
1955
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_OOB);
1956
      }
1957
125
      if (node[2].isConst())
1958
      {
1959
210
        Node ret = Word::update(s, start, node[2]);
1960
105
        return returnRewrite(node, ret, Rewrite::UPD_EVAL);
1961
      }
1962
    }
1963
  }
1964
1965
80
  return node;
1966
}
1967
1968
168957
Node SequencesRewriter::rewriteContains(Node node)
1969
{
1970
168957
  Assert(node.getKind() == kind::STRING_CONTAINS);
1971
168957
  NodeManager* nm = NodeManager::currentNM();
1972
1973
168957
  if (node[0] == node[1])
1974
  {
1975
1290
    Node ret = NodeManager::currentNM()->mkConst(true);
1976
645
    return returnRewrite(node, ret, Rewrite::CTN_EQ);
1977
  }
1978
168312
  if (node[0].isConst())
1979
  {
1980
92367
    if (node[1].isConst())
1981
    {
1982
16370
      Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos);
1983
8185
      return returnRewrite(node, ret, Rewrite::CTN_CONST);
1984
    }
1985
    else
1986
    {
1987
153013
      Node t = node[1];
1988
84182
      if (Word::isEmpty(node[0]))
1989
      {
1990
        Node len1 =
1991
70672
            NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
1992
35460
        if (d_arithEntail.check(len1, true))
1993
        {
1994
          // we handle the false case here since the rewrite for equality
1995
          // uses this function, hence we want to conclude false if possible.
1996
          // len(x)>0 => contains( "", x ) ---> false
1997
496
          Node ret = NodeManager::currentNM()->mkConst(false);
1998
248
          return returnRewrite(node, ret, Rewrite::CTN_LHS_EMPTYSTR);
1999
        }
2000
      }
2001
48722
      else if (d_stringsEntail.checkLengthOne(t))
2002
      {
2003
29996
        std::vector<Node> vec = Word::getChars(node[0]);
2004
29996
        Node emp = Word::mkEmptyWord(t.getType());
2005
29996
        NodeBuilder nb(OR);
2006
14998
        nb << emp.eqNode(t);
2007
32560
        for (const Node& c : vec)
2008
        {
2009
17562
          Assert(c.getType() == t.getType());
2010
17562
          nb << c.eqNode(t);
2011
        }
2012
2013
        // str.contains("ABCabc", t) --->
2014
        // t = "" v t = "A" v t = "B" v t = "C" v t = "a" v t = "b" v t = "c"
2015
        // if len(t) <= 1
2016
29996
        Node ret = nb;
2017
14998
        return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2018
      }
2019
33724
      else if (node[1].getKind() == kind::STRING_CONCAT)
2020
      {
2021
        int firstc, lastc;
2022
791
        if (!d_stringsEntail.canConstantContainConcat(
2023
                node[0], node[1], firstc, lastc))
2024
        {
2025
210
          Node ret = NodeManager::currentNM()->mkConst(false);
2026
105
          return returnRewrite(node, ret, Rewrite::CTN_NCONST_CTN_CONCAT);
2027
        }
2028
      }
2029
    }
2030
  }
2031
144776
  if (node[1].isConst())
2032
  {
2033
69901
    size_t len = Word::getLength(node[1]);
2034
69901
    if (len == 0)
2035
    {
2036
      // contains( x, "" ) ---> true
2037
70396
      Node ret = NodeManager::currentNM()->mkConst(true);
2038
35198
      return returnRewrite(node, ret, Rewrite::CTN_RHS_EMPTYSTR);
2039
    }
2040
34703
    else if (len == 1)
2041
    {
2042
      // The following rewrites are specific to a single character second
2043
      // argument of contains, where we can reason that this character is
2044
      // not split over multiple components in the first argument.
2045
25566
      if (node[0].getKind() == STRING_CONCAT)
2046
      {
2047
1742
        std::vector<Node> nc1;
2048
871
        utils::getConcat(node[0], nc1);
2049
1742
        NodeBuilder nb(OR);
2050
3118
        for (const Node& ncc : nc1)
2051
        {
2052
2247
          nb << nm->mkNode(STRING_CONTAINS, ncc, node[1]);
2053
        }
2054
1742
        Node ret = nb.constructNode();
2055
        // str.contains( x ++ y, "A" ) --->
2056
        //   str.contains( x, "A" ) OR str.contains( y, "A" )
2057
871
        return returnRewrite(node, ret, Rewrite::CTN_CONCAT_CHAR);
2058
      }
2059
24695
      else if (node[0].getKind() == STRING_REPLACE)
2060
      {
2061
3055
        Node rplDomain = d_stringsEntail.checkContains(node[0][1], node[1]);
2062
1622
        if (!rplDomain.isNull() && !rplDomain.getConst<bool>())
2063
        {
2064
378
          Node d1 = nm->mkNode(STRING_CONTAINS, node[0][0], node[1]);
2065
          Node d2 =
2066
              nm->mkNode(AND,
2067
378
                         nm->mkNode(STRING_CONTAINS, node[0][0], node[0][1]),
2068
756
                         nm->mkNode(STRING_CONTAINS, node[0][2], node[1]));
2069
378
          Node ret = nm->mkNode(OR, d1, d2);
2070
          // If str.contains( y, "A" ) ---> false, then:
2071
          // str.contains( str.replace( x, y, z ), "A" ) --->
2072
          //   str.contains( x, "A" ) OR
2073
          //   ( str.contains( x, y ) AND str.contains( z, "A" ) )
2074
189
          return returnRewrite(node, ret, Rewrite::CTN_REPL_CHAR);
2075
        }
2076
      }
2077
    }
2078
  }
2079
217036
  std::vector<Node> nc1;
2080
108518
  utils::getConcat(node[0], nc1);
2081
217036
  std::vector<Node> nc2;
2082
108518
  utils::getConcat(node[1], nc2);
2083
2084
  // component-wise containment
2085
217036
  std::vector<Node> nc1rb;
2086
217036
  std::vector<Node> nc1re;
2087
108518
  if (d_stringsEntail.componentContains(nc1, nc2, nc1rb, nc1re) != -1)
2088
  {
2089
5318
    Node ret = NodeManager::currentNM()->mkConst(true);
2090
2659
    return returnRewrite(node, ret, Rewrite::CTN_COMPONENT);
2091
  }
2092
211718
  TypeNode stype = node[0].getType();
2093
2094
  // strip endpoints
2095
211718
  std::vector<Node> nb;
2096
211718
  std::vector<Node> ne;
2097
105859
  if (d_stringsEntail.stripConstantEndpoints(nc1, nc2, nb, ne))
2098
  {
2099
    Node ret = NodeManager::currentNM()->mkNode(
2100
396
        kind::STRING_CONTAINS, utils::mkConcat(nc1, stype), node[1]);
2101
198
    return returnRewrite(node, ret, Rewrite::CTN_STRIP_ENDPT);
2102
  }
2103
2104
213658
  for (const Node& n : nc2)
2105
  {
2106
108859
    if (n.getKind() == kind::STRING_REPLACE)
2107
    {
2108
      // (str.contains x (str.replace y z w)) --> false
2109
      // if (str.contains x y) = false and (str.contains x w) = false
2110
      //
2111
      // Reasoning: (str.contains x y) checks that x does not contain y if the
2112
      // replacement does not change y. (str.contains x w) checks that if the
2113
      // replacement changes anything in y, the w makes it impossible for it to
2114
      // occur in x.
2115
6698
      Node ctnConst = d_stringsEntail.checkContains(node[0], n[0]);
2116
3780
      if (!ctnConst.isNull() && !ctnConst.getConst<bool>())
2117
      {
2118
1390
        Node ctnConst2 = d_stringsEntail.checkContains(node[0], n[2]);
2119
720
        if (!ctnConst2.isNull() && !ctnConst2.getConst<bool>())
2120
        {
2121
100
          Node res = nm->mkConst(false);
2122
50
          return returnRewrite(node, res, Rewrite::CTN_RPL_NON_CTN);
2123
        }
2124
      }
2125
2126
      // (str.contains x (str.++ w (str.replace x y x) z)) --->
2127
      //   (and (= w "") (= x (str.replace x y x)) (= z ""))
2128
      //
2129
      // TODO: Remove with under-/over-approximation
2130
3730
      if (node[0] == n[0] && node[0] == n[2])
2131
      {
2132
1624
        Node ret;
2133
812
        if (nc2.size() > 1)
2134
        {
2135
50
          Node emp = Word::mkEmptyWord(stype);
2136
50
          NodeBuilder nb2(kind::AND);
2137
75
          for (const Node& n2 : nc2)
2138
          {
2139
50
            if (n2 == n)
2140
            {
2141
25
              nb2 << nm->mkNode(kind::EQUAL, node[0], node[1]);
2142
            }
2143
            else
2144
            {
2145
25
              nb2 << nm->mkNode(kind::EQUAL, emp, n2);
2146
            }
2147
          }
2148
25
          ret = nb2.constructNode();
2149
        }
2150
        else
2151
        {
2152
787
          ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2153
        }
2154
812
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SELF);
2155
      }
2156
    }
2157
  }
2158
2159
  // length entailment
2160
209598
  Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2161
209598
  Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2162
104799
  if (d_arithEntail.check(len_n2, len_n1, true))
2163
  {
2164
    // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false
2165
326
    Node ret = NodeManager::currentNM()->mkConst(false);
2166
163
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ);
2167
  }
2168
2169
  // multi-set reasoning
2170
  //   For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false
2171
  //   since the number of a's in the second argument is greater than the number
2172
  //   of a's in the first argument
2173
104636
  if (d_stringsEntail.checkMultisetSubset(node[0], node[1]))
2174
  {
2175
40
    Node ret = nm->mkConst(false);
2176
20
    return returnRewrite(node, ret, Rewrite::CTN_MSET_NSS);
2177
  }
2178
2179
104616
  if (d_arithEntail.check(len_n2, len_n1, false))
2180
  {
2181
    // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2
2182
104066
    Node ret = node[0].eqNode(node[1]);
2183
52033
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ_NSTRICT);
2184
  }
2185
2186
  // splitting
2187
52583
  if (node[0].getKind() == kind::STRING_CONCAT)
2188
  {
2189
2070
    if (node[1].isConst())
2190
    {
2191
1047
      Node t = node[1];
2192
      // Below, we are looking for a constant component of node[0]
2193
      // has no overlap with node[1], which means we can split.
2194
      // Notice that if the first or last components had no
2195
      // overlap, these would have been removed by strip
2196
      // constant endpoints above.
2197
      // Hence, we consider only the inner children.
2198
1253
      for (unsigned i = 1; i < (node[0].getNumChildren() - 1); i++)
2199
      {
2200
        // constant contains
2201
768
        if (node[0][i].isConst())
2202
        {
2203
          // if no overlap, we can split into disjunction
2204
83
          if (Word::noOverlapWith(node[0][i], node[1]))
2205
          {
2206
154
            std::vector<Node> nc0;
2207
77
            utils::getConcat(node[0], nc0);
2208
154
            std::vector<Node> spl[2];
2209
77
            spl[0].insert(spl[0].end(), nc0.begin(), nc0.begin() + i);
2210
77
            Assert(i < nc0.size() - 1);
2211
77
            spl[1].insert(spl[1].end(), nc0.begin() + i + 1, nc0.end());
2212
            Node ret = NodeManager::currentNM()->mkNode(
2213
                kind::OR,
2214
308
                NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS,
2215
154
                                                 utils::mkConcat(spl[0], stype),
2216
                                                 node[1]),
2217
308
                NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS,
2218
154
                                                 utils::mkConcat(spl[1], stype),
2219
385
                                                 node[1]));
2220
77
            return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2221
          }
2222
        }
2223
      }
2224
    }
2225
  }
2226
50513
  else if (node[0].getKind() == kind::STRING_SUBSTR)
2227
  {
2228
    // (str.contains (str.substr x n (str.len y)) y) --->
2229
    //   (= (str.substr x n (str.len y)) y)
2230
    //
2231
    // TODO: Remove with under-/over-approximation
2232
11425
    if (node[0][2] == nm->mkNode(kind::STRING_LENGTH, node[1]))
2233
    {
2234
32
      Node ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2235
16
      return returnRewrite(node, ret, Rewrite::CTN_SUBSTR);
2236
    }
2237
  }
2238
39088
  else if (node[0].getKind() == kind::STRING_REPLACE)
2239
  {
2240
922
    if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst())
2241
    {
2242
440
      if (Word::noOverlapWith(node[1], node[0][1])
2243
440
          && Word::noOverlapWith(node[1], node[0][2]))
2244
      {
2245
        // (str.contains (str.replace x c1 c2) c3) ---> (str.contains x c3)
2246
        // if there is no overlap between c1 and c3 and none between c2 and c3
2247
4
        Node ret = nm->mkNode(STRING_CONTAINS, node[0][0], node[1]);
2248
2
        return returnRewrite(node, ret, Rewrite::CTN_REPL_CNSTS_TO_CTN);
2249
      }
2250
    }
2251
2252
920
    if (node[0][0] == node[0][2])
2253
    {
2254
      // (str.contains (str.replace x y x) y) ---> (str.contains x y)
2255
100
      if (node[0][1] == node[1])
2256
      {
2257
16
        Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0][0], node[1]);
2258
8
        return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN);
2259
      }
2260
2261
      // (str.contains (str.replace x y x) z) ---> (str.contains x z)
2262
      // if (str.len z) <= 1
2263
92
      if (d_stringsEntail.checkLengthOne(node[1]))
2264
      {
2265
12
        Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0][0], node[1]);
2266
6
        return returnRewrite(node, ret, Rewrite::CTN_REPL_LEN_ONE_TO_CTN);
2267
      }
2268
    }
2269
2270
    // (str.contains (str.replace x y z) z) --->
2271
    //   (or (str.contains x y) (str.contains x z))
2272
906
    if (node[0][2] == node[1])
2273
    {
2274
      Node ret =
2275
          nm->mkNode(OR,
2276
364
                     nm->mkNode(STRING_CONTAINS, node[0][0], node[0][1]),
2277
728
                     nm->mkNode(STRING_CONTAINS, node[0][0], node[0][2]));
2278
182
      return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN_DISJ);
2279
    }
2280
2281
    // (str.contains (str.replace x y z) w) --->
2282
    //   (str.contains (str.replace x y "") w)
2283
    // if (str.contains z w) ---> false and (str.len w) = 1
2284
724
    if (d_stringsEntail.checkLengthOne(node[1]))
2285
    {
2286
416
      Node ctn = d_stringsEntail.checkContains(node[0][2], node[1]);
2287
230
      if (!ctn.isNull() && !ctn.getConst<bool>())
2288
      {
2289
88
        Node empty = Word::mkEmptyWord(stype);
2290
        Node ret = nm->mkNode(
2291
            kind::STRING_CONTAINS,
2292
88
            nm->mkNode(kind::STRING_REPLACE, node[0][0], node[0][1], empty),
2293
176
            node[1]);
2294
44
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SIMP_REPL);
2295
      }
2296
    }
2297
  }
2298
2299
52248
  if (node[1].getKind() == kind::STRING_REPLACE)
2300
  {
2301
    // (str.contains x (str.replace y x y)) --->
2302
    //   (str.contains x y)
2303
1095
    if (node[0] == node[1][1] && node[1][0] == node[1][2])
2304
    {
2305
8
      Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0], node[1][0]);
2306
4
      return returnRewrite(node, ret, Rewrite::CTN_REPL);
2307
    }
2308
2309
    // (str.contains x (str.replace "" x y)) --->
2310
    //   (= "" (str.replace "" x y))
2311
    //
2312
    // Note: Length-based reasoning is not sufficient to get this rewrite. We
2313
    // can neither show that str.len(str.replace("", x, y)) - str.len(x) >= 0
2314
    // nor str.len(x) - str.len(str.replace("", x, y)) >= 0
2315
2180
    Node emp = Word::mkEmptyWord(stype);
2316
1091
    if (node[0] == node[1][1] && node[1][0] == emp)
2317
    {
2318
4
      Node ret = nm->mkNode(kind::EQUAL, emp, node[1]);
2319
2
      return returnRewrite(node, ret, Rewrite::CTN_REPL_EMPTY);
2320
    }
2321
  }
2322
2323
52242
  return node;
2324
}
2325
2326
3165
Node SequencesRewriter::rewriteIndexof(Node node)
2327
{
2328
3165
  Assert(node.getKind() == kind::STRING_INDEXOF);
2329
3165
  NodeManager* nm = NodeManager::currentNM();
2330
2331
3165
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() < 0)
2332
  {
2333
    // z<0  implies  str.indexof( x, y, z ) --> -1
2334
344
    Node negone = nm->mkConst(Rational(-1));
2335
172
    return returnRewrite(node, negone, Rewrite::IDOF_NEG);
2336
  }
2337
2338
  // the string type
2339
5986
  TypeNode stype = node[0].getType();
2340
2341
  // evaluation and simple cases
2342
5986
  std::vector<Node> children0;
2343
2993
  utils::getConcat(node[0], children0);
2344
2993
  if (children0[0].isConst() && node[1].isConst() && node[2].isConst())
2345
  {
2346
1005
    cvc5::Rational rMaxInt(cvc5::String::maxSize());
2347
844
    if (node[2].getConst<Rational>() > rMaxInt)
2348
    {
2349
      // We know that, due to limitations on the size of string constants
2350
      // in our implementation, that accessing a position greater than
2351
      // rMaxInt is guaranteed to be out of bounds.
2352
      Node negone = nm->mkConst(Rational(-1));
2353
      return returnRewrite(node, negone, Rewrite::IDOF_MAX);
2354
    }
2355
844
    Assert(node[2].getConst<Rational>().sgn() >= 0);
2356
1005
    Node s = children0[0];
2357
1005
    Node t = node[1];
2358
    uint32_t start =
2359
844
        node[2].getConst<Rational>().getNumerator().toUnsignedInt();
2360
844
    std::size_t ret = Word::find(s, t, start);
2361
844
    if (ret != std::string::npos)
2362
    {
2363
596
      Node retv = nm->mkConst(Rational(static_cast<unsigned>(ret)));
2364
298
      return returnRewrite(node, retv, Rewrite::IDOF_FIND);
2365
    }
2366
546
    else if (children0.size() == 1)
2367
    {
2368
770
      Node negone = nm->mkConst(Rational(-1));
2369
385
      return returnRewrite(node, negone, Rewrite::IDOF_NFIND);
2370
    }
2371
  }
2372
2373
2310
  if (node[0] == node[1])
2374
  {
2375
76
    if (node[2].isConst())
2376
    {
2377
10
      if (node[2].getConst<Rational>().sgn() == 0)
2378
      {
2379
        // indexof( x, x, 0 ) --> 0
2380
20
        Node zero = nm->mkConst(Rational(0));
2381
10
        return returnRewrite(node, zero, Rewrite::IDOF_EQ_CST_START);
2382
      }
2383
    }
2384
66
    if (d_arithEntail.check(node[2], true))
2385
    {
2386
      // y>0  implies  indexof( x, x, y ) --> -1
2387
      Node negone = nm->mkConst(Rational(-1));
2388
      return returnRewrite(node, negone, Rewrite::IDOF_EQ_NSTART);
2389
    }
2390
114
    Node emp = Word::mkEmptyWord(stype);
2391
66
    if (node[0] != emp)
2392
    {
2393
      // indexof( x, x, z ) ---> indexof( "", "", z )
2394
36
      Node ret = nm->mkNode(STRING_INDEXOF, emp, emp, node[2]);
2395
18
      return returnRewrite(node, ret, Rewrite::IDOF_EQ_NORM);
2396
    }
2397
  }
2398
2399
4564
  Node len0 = nm->mkNode(STRING_LENGTH, node[0]);
2400
4564
  Node len1 = nm->mkNode(STRING_LENGTH, node[1]);
2401
4564
  Node len0m2 = nm->mkNode(MINUS, len0, node[2]);
2402
2403
2282
  if (node[1].isConst())
2404
  {
2405
1708
    if (Word::isEmpty(node[1]))
2406
    {
2407
73
      if (d_arithEntail.check(len0, node[2]) && d_arithEntail.check(node[2]))
2408
      {
2409
        // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z
2410
9
        return returnRewrite(node, node[2], Rewrite::IDOF_EMP_IDOF);
2411
      }
2412
    }
2413
  }
2414
2415
2273
  if (d_arithEntail.check(len1, len0m2, true))
2416
  {
2417
    // len(x)-z < len(y)  implies  indexof( x, y, z ) ----> -1
2418
16
    Node negone = nm->mkConst(Rational(-1));
2419
8
    return returnRewrite(node, negone, Rewrite::IDOF_LEN);
2420
  }
2421
2422
4530
  Node fstr = node[0];
2423
2265
  if (!node[2].isConst() || node[2].getConst<Rational>().sgn() != 0)
2424
  {
2425
1082
    fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0);
2426
1082
    fstr = Rewriter::rewrite(fstr);
2427
  }
2428
2429
4530
  Node cmp_conr = d_stringsEntail.checkContains(fstr, node[1]);
2430
4530
  Trace("strings-rewrite-debug") << "For " << node << ", check contains("
2431
2265
                                 << fstr << ", " << node[1] << ")" << std::endl;
2432
2265
  Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl;
2433
4530
  std::vector<Node> children1;
2434
2265
  utils::getConcat(node[1], children1);
2435
2265
  if (!cmp_conr.isNull())
2436
  {
2437
331
    if (cmp_conr.getConst<bool>())
2438
    {
2439
326
      if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2440
      {
2441
        // past the first position in node[0] that contains node[1], we can drop
2442
395
        std::vector<Node> nb;
2443
395
        std::vector<Node> ne;
2444
239
        int cc = d_stringsEntail.componentContains(
2445
239
            children0, children1, nb, ne, true, 1);
2446
239
        if (cc != -1 && !ne.empty())
2447
        {
2448
          // For example:
2449
          // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0)
2450
130
          Node nn = utils::mkConcat(children0, stype);
2451
130
          Node ret = nm->mkNode(kind::STRING_INDEXOF, nn, node[1], node[2]);
2452
65
          return returnRewrite(node, ret, Rewrite::IDOF_DEF_CTN);
2453
        }
2454
2455
        // Strip components from the beginning that are guaranteed not to match
2456
174
        if (d_stringsEntail.stripConstantEndpoints(
2457
                children0, children1, nb, ne, 1))
2458
        {
2459
          // str.indexof(str.++("AB", x, "C"), "C", 0) --->
2460
          // 2 + str.indexof(str.++(x, "C"), "C", 0)
2461
          Node ret = nm->mkNode(
2462
              kind::PLUS,
2463
36
              nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(nb, stype)),
2464
72
              nm->mkNode(kind::STRING_INDEXOF,
2465
36
                         utils::mkConcat(children0, stype),
2466
                         node[1],
2467
72
                         node[2]));
2468
18
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_CNST_ENDPTS);
2469
        }
2470
      }
2471
      // To show that the first argument definitely contains the second, the
2472
      // index must be a valid index in the first argument. This ensures that
2473
      // (str.indexof t "" n) is not rewritten to something other than -1 when n
2474
      // is beyond the length of t. This is not required for the above rewrites,
2475
      // which only apply when n=0.
2476
243
      if (d_arithEntail.check(node[2]) && d_arithEntail.check(len0, node[2]))
2477
      {
2478
        // strip symbolic length
2479
335
        Node new_len = node[2];
2480
335
        std::vector<Node> nr;
2481
179
        if (d_stringsEntail.stripSymbolicLength(children0, nr, 1, new_len))
2482
        {
2483
          // For example:
2484
          // z>=0 and z>str.len( x1 ) and str.contains( x2, y )-->true
2485
          // implies
2486
          // str.indexof( str.++( x1, x2 ), y, z ) --->
2487
          // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) )
2488
46
          Node nn = utils::mkConcat(children0, stype);
2489
          Node ret =
2490
              nm->mkNode(PLUS,
2491
46
                         nm->mkNode(MINUS, node[2], new_len),
2492
92
                         nm->mkNode(STRING_INDEXOF, nn, node[1], new_len));
2493
23
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_SYM_LEN);
2494
        }
2495
      }
2496
    }
2497
    else
2498
    {
2499
      // str.contains( x, y ) --> false  implies  str.indexof(x,y,z) --> -1
2500
10
      Node negone = nm->mkConst(Rational(-1));
2501
5
      return returnRewrite(node, negone, Rewrite::IDOF_NCTN);
2502
    }
2503
  }
2504
  else
2505
  {
2506
3844
    Node new_len = node[2];
2507
3844
    std::vector<Node> nr;
2508
1934
    if (d_stringsEntail.stripSymbolicLength(children0, nr, 1, new_len))
2509
    {
2510
      // Normalize the string before the start index.
2511
      //
2512
      // For example:
2513
      // str.indexof(str.++("ABCD", x), y, 3) --->
2514
      // str.indexof(str.++("AAAD", x), y, 3)
2515
124
      Node nodeNr = utils::mkConcat(nr, stype);
2516
124
      Node normNr = lengthPreserveRewrite(nodeNr);
2517
74
      if (normNr != nodeNr)
2518
      {
2519
48
        std::vector<Node> normNrChildren;
2520
24
        utils::getConcat(normNr, normNrChildren);
2521
48
        std::vector<Node> children(normNrChildren);
2522
24
        children.insert(children.end(), children0.begin(), children0.end());
2523
48
        Node nn = utils::mkConcat(children, stype);
2524
48
        Node res = nm->mkNode(kind::STRING_INDEXOF, nn, node[1], node[2]);
2525
24
        return returnRewrite(node, res, Rewrite::IDOF_NORM_PREFIX);
2526
      }
2527
    }
2528
  }
2529
2530
2130
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2531
  {
2532
2192
    std::vector<Node> cb;
2533
2192
    std::vector<Node> ce;
2534
1100
    if (d_stringsEntail.stripConstantEndpoints(
2535
            children0, children1, cb, ce, -1))
2536
    {
2537
16
      Node ret = utils::mkConcat(children0, stype);
2538
8
      ret = nm->mkNode(STRING_INDEXOF, ret, node[1], node[2]);
2539
      // For example:
2540
      // str.indexof( str.++( x, "A" ), "B", 0 ) ---> str.indexof( x, "B", 0 )
2541
8
      return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2542
    }
2543
  }
2544
2545
2122
  return node;
2546
}
2547
2548
890
Node SequencesRewriter::rewriteIndexofRe(Node node)
2549
{
2550
890
  Assert(node.getKind() == STRING_INDEXOF_RE);
2551
890
  NodeManager* nm = NodeManager::currentNM();
2552
1780
  Node s = node[0];
2553
1780
  Node r = node[1];
2554
1780
  Node n = node[2];
2555
1780
  Node zero = nm->mkConst(Rational(0));
2556
1780
  Node slen = nm->mkNode(STRING_LENGTH, s);
2557
2558
890
  if (d_arithEntail.check(zero, n, true) || d_arithEntail.check(n, slen, true))
2559
  {
2560
56
    Node ret = nm->mkConst(Rational(-1));
2561
28
    return returnRewrite(node, ret, Rewrite::INDEXOF_RE_INVALID_INDEX);
2562
  }
2563
2564
862
  if (RegExpEntail::isConstRegExp(r))
2565
  {
2566
634
    if (s.isConst() && n.isConst())
2567
    {
2568
264
      Rational nrat = n.getConst<Rational>();
2569
264
      cvc5::Rational rMaxInt(cvc5::String::maxSize());
2570
132
      if (nrat > rMaxInt)
2571
      {
2572
        // We know that, due to limitations on the size of string constants
2573
        // in our implementation, that accessing a position greater than
2574
        // rMaxInt is guaranteed to be out of bounds.
2575
        Node negone = nm->mkConst(Rational(-1));
2576
        return returnRewrite(node, negone, Rewrite::INDEXOF_RE_MAX_INDEX);
2577
      }
2578
2579
132
      uint32_t start = nrat.getNumerator().toUnsignedInt();
2580
264
      Node rem = nm->mkConst(s.getConst<String>().substr(start));
2581
132
      std::pair<size_t, size_t> match = firstMatch(rem, r);
2582
      Node ret = nm->mkConst(
2583
345
          Rational(match.first == string::npos
2584
                       ? -1
2585
345
                       : static_cast<int64_t>(start + match.first)));
2586
132
      return returnRewrite(node, ret, Rewrite::INDEXOF_RE_EVAL);
2587
    }
2588
2589
502
    if (d_arithEntail.check(n, zero) && d_arithEntail.check(slen, n))
2590
    {
2591
768
      String emptyStr("");
2592
388
      if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, r))
2593
      {
2594
8
        return returnRewrite(node, n, Rewrite::INDEXOF_RE_EMP_RE);
2595
      }
2596
    }
2597
  }
2598
722
  return node;
2599
}
2600
2601
4024
Node SequencesRewriter::rewriteReplace(Node node)
2602
{
2603
4024
  Assert(node.getKind() == kind::STRING_REPLACE);
2604
4024
  NodeManager* nm = NodeManager::currentNM();
2605
2606
4024
  if (node[1].isConst() && Word::isEmpty(node[1]))
2607
  {
2608
850
    Node ret = nm->mkNode(STRING_CONCAT, node[2], node[0]);
2609
425
    return returnRewrite(node, ret, Rewrite::RPL_RPL_EMPTY);
2610
  }
2611
  // the string type
2612
7198
  TypeNode stype = node.getType();
2613
2614
7198
  std::vector<Node> children0;
2615
3599
  utils::getConcat(node[0], children0);
2616
2617
3599
  if (node[1].isConst() && children0[0].isConst())
2618
  {
2619
903
    Node s = children0[0];
2620
903
    Node t = node[1];
2621
894
    std::size_t p = Word::find(s, t);
2622
894
    if (p == std::string::npos)
2623
    {
2624
583
      if (children0.size() == 1)
2625
      {
2626
574
        return returnRewrite(node, node[0], Rewrite::RPL_CONST_NFIND);
2627
      }
2628
    }
2629
    else
2630
    {
2631
622
      Node s1 = Word::substr(s, 0, p);
2632
622
      Node s3 = Word::substr(s, p + Word::getLength(t));
2633
622
      std::vector<Node> children;
2634
311
      if (!Word::isEmpty(s1))
2635
      {
2636
49
        children.push_back(s1);
2637
      }
2638
311
      children.push_back(node[2]);
2639
311
      if (!Word::isEmpty(s3))
2640
      {
2641
88
        children.push_back(s3);
2642
      }
2643
311
      children.insert(children.end(), children0.begin() + 1, children0.end());
2644
622
      Node ret = utils::mkConcat(children, stype);
2645
311
      return returnRewrite(node, ret, Rewrite::RPL_CONST_FIND);
2646
    }
2647
  }
2648
2649
  // rewrites that apply to both replace and replaceall
2650
5428
  Node rri = rewriteReplaceInternal(node);
2651
2714
  if (!rri.isNull())
2652
  {
2653
    // printing of the rewrite managed by the call above
2654
166
    return rri;
2655
  }
2656
2657
2548
  if (node[0] == node[2])
2658
  {
2659
    // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x
2660
831
    Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2661
831
    Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2662
532
    if (d_arithEntail.check(l1, l0))
2663
    {
2664
227
      return returnRewrite(node, node[0], Rewrite::RPL_RPL_LEN_ID);
2665
    }
2666
2667
    // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x)
2668
    // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "")
2669
305
    if (d_stringsEntail.checkLengthOne(node[0]))
2670
    {
2671
242
      Node empty = Word::mkEmptyWord(stype);
2672
      Node rn1 = Rewriter::rewrite(
2673
242
          rewriteEqualityExt(nm->mkNode(EQUAL, node[1], empty)));
2674
124
      if (rn1 != node[1])
2675
      {
2676
242
        std::vector<Node> emptyNodes;
2677
        bool allEmptyEqs;
2678
124
        std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(rn1);
2679
2680
124
        if (allEmptyEqs)
2681
        {
2682
182
          Node nn1 = utils::mkConcat(emptyNodes, stype);
2683
94
          if (node[1] != nn1)
2684
          {
2685
12
            Node ret = nm->mkNode(STRING_REPLACE, node[0], nn1, node[2]);
2686
6
            return returnRewrite(node, ret, Rewrite::RPL_X_Y_X_SIMP);
2687
          }
2688
        }
2689
      }
2690
    }
2691
  }
2692
2693
4630
  std::vector<Node> children1;
2694
2315
  utils::getConcat(node[1], children1);
2695
2696
  // check if contains definitely does (or does not) hold
2697
4630
  Node cmp_con = nm->mkNode(kind::STRING_CONTAINS, node[0], node[1]);
2698
4630
  Node cmp_conr = Rewriter::rewrite(cmp_con);
2699
2315
  if (cmp_conr.isConst())
2700
  {
2701
538
    if (cmp_conr.getConst<bool>())
2702
    {
2703
      // component-wise containment
2704
872
      std::vector<Node> cb;
2705
872
      std::vector<Node> ce;
2706
524
      int cc = d_stringsEntail.componentContains(
2707
524
          children0, children1, cb, ce, true, 1);
2708
524
      if (cc != -1)
2709
      {
2710
436
        if (cc == 0 && children0[0] == children1[0])
2711
        {
2712
          // definitely a prefix, can do the replace
2713
          // for example,
2714
          //   str.replace( str.++( x, "ab" ), str.++( x, "a" ), y )  --->
2715
          //   str.++( y, "b" )
2716
134
          std::vector<Node> cres;
2717
67
          cres.push_back(node[2]);
2718
67
          cres.insert(cres.end(), ce.begin(), ce.end());
2719
134
          Node ret = utils::mkConcat(cres, stype);
2720
67
          return returnRewrite(node, ret, Rewrite::RPL_CCTN_RPL);
2721
        }
2722
369
        else if (!ce.empty())
2723
        {
2724
          // we can pull remainder past first definite containment
2725
          // for example,
2726
          //   str.replace( str.++( x, "ab" ), "a", y ) --->
2727
          //   str.++( str.replace( str.++( x, "a" ), "a", y ), "b" )
2728
          // this is independent of whether the second argument may be empty
2729
218
          std::vector<Node> scc;
2730
327
          scc.push_back(NodeManager::currentNM()->mkNode(
2731
              kind::STRING_REPLACE,
2732
218
              utils::mkConcat(children0, stype),
2733
              node[1],
2734
              node[2]));
2735
109
          scc.insert(scc.end(), ce.begin(), ce.end());
2736
218
          Node ret = utils::mkConcat(scc, stype);
2737
109
          return returnRewrite(node, ret, Rewrite::RPL_CCTN);
2738
        }
2739
      }
2740
    }
2741
    else
2742
    {
2743
      // ~contains( t, s ) => ( replace( t, s, r ) ----> t )
2744
14
      return returnRewrite(node, node[0], Rewrite::RPL_NCTN);
2745
    }
2746
  }
2747
1777
  else if (cmp_conr.getKind() == kind::EQUAL || cmp_conr.getKind() == kind::AND)
2748
  {
2749
    // Rewriting the str.contains may return equalities of the form (= x "").
2750
    // In that case, we can substitute the variables appearing in those
2751
    // equalities with the empty string in the third argument of the
2752
    // str.replace. For example:
2753
    //
2754
    // (str.replace x (str.++ x y) y) --> (str.replace x (str.++ x y) "")
2755
    //
2756
    // This can be done because str.replace changes x iff (str.++ x y) is in x
2757
    // but that means that y must be empty in that case. Thus, we can
2758
    // substitute y with "" in the third argument. Note that the third argument
2759
    // does not matter when the str.replace does not apply.
2760
    //
2761
512
    Node empty = Word::mkEmptyWord(stype);
2762
2763
512
    std::vector<Node> emptyNodes;
2764
    bool allEmptyEqs;
2765
272
    std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(cmp_conr);
2766
2767
272
    if (emptyNodes.size() > 0)
2768
    {
2769
      // Perform the substitutions
2770
420
      std::vector<TNode> substs(emptyNodes.size(), TNode(empty));
2771
      Node nn2 = node[2].substitute(
2772
420
          emptyNodes.begin(), emptyNodes.end(), substs.begin(), substs.end());
2773
2774
      // If the contains rewrites to a conjunction of empty-string equalities
2775
      // and we are doing the replacement in an empty string, we can rewrite
2776
      // the string-to-replace with a concatenation of all the terms that must
2777
      // be empty:
2778
      //
2779
      // (str.replace "" y z) ---> (str.replace "" (str.++ y1 ... yn)  z)
2780
      // if (str.contains "" y) ---> (and (= y1 "") ... (= yn ""))
2781
226
      if (node[0] == empty && allEmptyEqs)
2782
      {
2783
188
        std::vector<Node> emptyNodesList(emptyNodes.begin(), emptyNodes.end());
2784
188
        Node nn1 = utils::mkConcat(emptyNodesList, stype);
2785
98
        if (nn1 != node[1] || nn2 != node[2])
2786
        {
2787
16
          Node res = nm->mkNode(kind::STRING_REPLACE, node[0], nn1, nn2);
2788
8
          return returnRewrite(node, res, Rewrite::RPL_EMP_CNTS_SUBSTS);
2789
        }
2790
      }
2791
2792
218
      if (nn2 != node[2])
2793
      {
2794
48
        Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1], nn2);
2795
24
        return returnRewrite(node, res, Rewrite::RPL_CNTS_SUBSTS);
2796
      }
2797
    }
2798
  }
2799
2800
2093
  if (cmp_conr != cmp_con)
2801
  {
2802
733
    if (d_stringsEntail.checkNonEmpty(node[1]))
2803
    {
2804
      // pull endpoints that can be stripped
2805
      // for example,
2806
      //   str.replace( str.++( "b", x, "b" ), "a", y ) --->
2807
      //   str.++( "b", str.replace( x, "a", y ), "b" )
2808
789
      std::vector<Node> cb;
2809
789
      std::vector<Node> ce;
2810
403
      if (d_stringsEntail.stripConstantEndpoints(children0, children1, cb, ce))
2811
      {
2812
34
        std::vector<Node> cc;
2813
17
        cc.insert(cc.end(), cb.begin(), cb.end());
2814
17
        cc.push_back(
2815
68
            NodeManager::currentNM()->mkNode(kind::STRING_REPLACE,
2816
34
                                             utils::mkConcat(children0, stype),
2817
                                             node[1],
2818
                                             node[2]));
2819
17
        cc.insert(cc.end(), ce.begin(), ce.end());
2820
34
        Node ret = utils::mkConcat(cc, stype);
2821
17
        return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2822
      }
2823
    }
2824
  }
2825
2826
2076
  children1.clear();
2827
2076
  utils::getConcat(node[1], children1);
2828
4152
  Node lastChild1 = children1[children1.size() - 1];
2829
2076
  if (lastChild1.getKind() == kind::STRING_SUBSTR)
2830
  {
2831
    // (str.replace x (str.++ t (str.substr y i j)) z) --->
2832
    // (str.replace x (str.++ t
2833
    //                  (str.substr y i (+ (str.len x) 1 (- (str.len t))))) z)
2834
    // if j > len(x)
2835
    //
2836
    // Reasoning: If the string to be replaced is longer than x, then it does
2837
    // not matter how much longer it is, the result is always x. Thus, it is
2838
    // fine to only look at the prefix of length len(x) + 1 - len(t).
2839
2840
100
    children1.pop_back();
2841
    // Length of the non-substr components in the second argument
2842
    Node partLen1 =
2843
192
        nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(children1, stype));
2844
192
    Node maxLen1 = nm->mkNode(kind::PLUS, partLen1, lastChild1[2]);
2845
2846
192
    Node zero = nm->mkConst(Rational(0));
2847
192
    Node one = nm->mkConst(Rational(1));
2848
192
    Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]);
2849
192
    Node len0_1 = nm->mkNode(kind::PLUS, len0, one);
2850
    // Check len(t) + j > len(x) + 1
2851
100
    if (d_arithEntail.check(maxLen1, len0_1, true))
2852
    {
2853
8
      children1.push_back(nm->mkNode(
2854
          kind::STRING_SUBSTR,
2855
          lastChild1[0],
2856
          lastChild1[1],
2857
16
          nm->mkNode(
2858
16
              kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1))));
2859
      Node res = nm->mkNode(kind::STRING_REPLACE,
2860
                            node[0],
2861
16
                            utils::mkConcat(children1, stype),
2862
32
                            node[2]);
2863
8
      return returnRewrite(node, res, Rewrite::REPL_SUBST_IDX);
2864
    }
2865
  }
2866
2867
2068
  if (node[0].getKind() == STRING_REPLACE)
2868
  {
2869
364
    Node x = node[0];
2870
364
    Node y = node[1];
2871
364
    Node z = node[2];
2872
188
    if (x[0] == x[2] && x[0] == y)
2873
    {
2874
      // (str.replace (str.replace y w y) y z) -->
2875
      //   (str.replace (str.replace y w z) y z)
2876
      // if (str.len w) >= (str.len z) and w != z
2877
      //
2878
      // Reasoning: There are two cases: (1) w does not appear in y and (2) w
2879
      // does appear in y.
2880
      //
2881
      // Case (1): In this case, the reasoning is trivial. The
2882
      // inner replace does not do anything, so we can just replace its third
2883
      // argument with any string.
2884
      //
2885
      // Case (2): After the inner replace, we are guaranteed to have a string
2886
      // that contains y at the index of w in the original string y. The outer
2887
      // replace then replaces that y with z, so we can short-circuit that
2888
      // replace by directly replacing w with z in the inner replace. We can
2889
      // only do that if the result of the new inner replace does not contain
2890
      // y, otherwise we end up doing two replaces that are different from the
2891
      // original expression. We enforce that by requiring that the length of w
2892
      // has to be greater or equal to the length of z and that w and z have to
2893
      // be different. This makes sure that an inner replace changes a string
2894
      // to a string that is shorter than y, making it impossible for the outer
2895
      // replace to match.
2896
120
      Node w = x[1];
2897
2898
      // (str.len w) >= (str.len z)
2899
120
      Node wlen = nm->mkNode(kind::STRING_LENGTH, w);
2900
120
      Node zlen = nm->mkNode(kind::STRING_LENGTH, z);
2901
66
      if (d_arithEntail.check(wlen, zlen))
2902
      {
2903
        // w != z
2904
56
        Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z));
2905
34
        if (wEqZ.isConst() && !wEqZ.getConst<bool>())
2906
        {
2907
          Node ret = nm->mkNode(kind::STRING_REPLACE,
2908
24
                                nm->mkNode(kind::STRING_REPLACE, y, w, z),
2909
                                y,
2910
48
                                z);
2911
12
          return returnRewrite(node, ret, Rewrite::REPL_REPL_SHORT_CIRCUIT);
2912
        }
2913
      }
2914
    }
2915
  }
2916
2917
2056
  if (node[1].getKind() == STRING_REPLACE)
2918
  {
2919
246
    if (node[1][0] == node[0])
2920
    {
2921
48
      if (node[1][0] == node[1][2] && node[1][0] == node[2])
2922
      {
2923
        // str.replace( x, str.replace( x, y, x ), x ) ---> x
2924
        return returnRewrite(node, node[0], Rewrite::REPL_REPL2_INV_ID);
2925
      }
2926
48
      bool dualReplIteSuccess = false;
2927
88
      Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2928
48
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2929
      {
2930
        // str.contains( x, z ) ---> false
2931
        //   implies
2932
        // str.replace( x, str.replace( x, y, z ), w ) --->
2933
        // ite( str.contains( x, y ), x, w )
2934
        dualReplIteSuccess = true;
2935
      }
2936
      else
2937
      {
2938
        // str.contains( y, z ) ---> false and str.contains( z, y ) ---> false
2939
        //   implies
2940
        // str.replace( x, str.replace( x, y, z ), w ) --->
2941
        // ite( str.contains( x, y ), x, w )
2942
48
        cmp_con2 = d_stringsEntail.checkContains(node[1][1], node[1][2]);
2943
48
        if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2944
        {
2945
16
          cmp_con2 = d_stringsEntail.checkContains(node[1][2], node[1][1]);
2946
16
          if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2947
          {
2948
8
            dualReplIteSuccess = true;
2949
          }
2950
        }
2951
      }
2952
48
      if (dualReplIteSuccess)
2953
      {
2954
        Node res = nm->mkNode(ITE,
2955
16
                              nm->mkNode(STRING_CONTAINS, node[0], node[1][1]),
2956
                              node[0],
2957
32
                              node[2]);
2958
8
        return returnRewrite(node, res, Rewrite::REPL_DUAL_REPL_ITE);
2959
      }
2960
    }
2961
2962
238
    bool invSuccess = false;
2963
238
    if (node[1][1] == node[0])
2964
    {
2965
50
      if (node[1][0] == node[1][2])
2966
      {
2967
        // str.replace(x, str.replace(y, x, y), w) ---> str.replace(x, y, w)
2968
        invSuccess = true;
2969
      }
2970
50
      else if (node[1][1] == node[2] || node[1][0] == node[2])
2971
      {
2972
        // str.contains(y, z) ----> false and ( y == w or x == w ) implies
2973
        //   implies
2974
        // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w)
2975
40
        Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2976
20
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2977
      }
2978
    }
2979
    else
2980
    {
2981
      // str.contains(x, z) ----> false and str.contains(x, w) ----> false
2982
      //   implies
2983
      // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u)
2984
376
      Node cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][1]);
2985
188
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2986
      {
2987
16
        cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][2]);
2988
16
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2989
      }
2990
    }
2991
238
    if (invSuccess)
2992
    {
2993
      Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1][0], node[2]);
2994
      return returnRewrite(node, res, Rewrite::REPL_REPL2_INV);
2995
    }
2996
  }
2997
2048
  if (node[2].getKind() == STRING_REPLACE)
2998
  {
2999
54
    if (node[2][1] == node[0])
3000
    {
3001
      // str.contains( z, w ) ----> false implies
3002
      // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z )
3003
24
      Node cmp_con2 = d_stringsEntail.checkContains(node[2][0], node[1]);
3004
12
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
3005
      {
3006
        Node res =
3007
            nm->mkNode(kind::STRING_REPLACE, node[0], node[1], node[2][0]);
3008
        return returnRewrite(node, res, Rewrite::REPL_REPL3_INV);
3009
      }
3010
    }
3011
54
    if (node[2][0] == node[1])
3012
    {
3013
4
      bool success = false;
3014
4
      if (node[2][0] == node[2][2] && node[2][1] == node[0])
3015
      {
3016
        // str.replace( x, y, str.replace( y, x, y ) ) ---> x
3017
        success = true;
3018
      }
3019
      else
3020
      {
3021
        // str.contains( x, z ) ----> false implies
3022
        // str.replace( x, y, str.replace( y, z, w ) ) ---> x
3023
4
        cmp_con = d_stringsEntail.checkContains(node[0], node[2][1]);
3024
4
        success = !cmp_con.isNull() && !cmp_con.getConst<bool>();
3025
      }
3026
4
      if (success)
3027
      {
3028
        return returnRewrite(node, node[0], Rewrite::REPL_REPL3_INV_ID);
3029
      }
3030
    }
3031
  }
3032
  // miniscope based on components that do not contribute to contains
3033
  // for example,
3034
  //   str.replace( x ++ y ++ x ++ y, "A", z ) -->
3035
  //   str.replace( x ++ y, "A", z ) ++ x ++ y
3036
  // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y.
3037
2048
  if (d_stringsEntail.checkLengthOne(node[1]))
3038
  {
3039
1260
    Node lastLhs;
3040
634
    unsigned lastCheckIndex = 0;
3041
646
    for (unsigned i = 1, iend = children0.size(); i < iend; i++)
3042
    {
3043
142
      unsigned checkIndex = children0.size() - i;
3044
154
      std::vector<Node> checkLhs;
3045
142
      checkLhs.insert(
3046
284
          checkLhs.end(), children0.begin(), children0.begin() + checkIndex);
3047
154
      Node lhs = utils::mkConcat(checkLhs, stype);
3048
154
      Node rhs = children0[checkIndex];
3049
154
      Node ctn = d_stringsEntail.checkContains(lhs, rhs);
3050
142
      if (!ctn.isNull() && ctn.getConst<bool>())
3051
      {
3052
12
        lastLhs = lhs;
3053
12
        lastCheckIndex = checkIndex;
3054
      }
3055
      else
3056
      {
3057
130
        break;
3058
      }
3059
    }
3060
634
    if (!lastLhs.isNull())
3061
    {
3062
16
      std::vector<Node> remc(children0.begin() + lastCheckIndex,
3063
32
                             children0.end());
3064
16
      Node rem = utils::mkConcat(remc, stype);
3065
      Node ret =
3066
          nm->mkNode(STRING_CONCAT,
3067
16
                     nm->mkNode(STRING_REPLACE, lastLhs, node[1], node[2]),
3068
32
                     rem);
3069
      // for example:
3070
      //   str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x
3071
      // Since we know that the first occurrence of "A" cannot be in the
3072
      // second occurrence of x. Notice this is specific to single characters
3073
      // due to complications with finds that span multiple components for
3074
      // non-characters.
3075
8
      return returnRewrite(node, ret, Rewrite::REPL_CHAR_NCONTRIB_FIND);
3076
    }
3077
  }
3078
3079
  // TODO (#1180) incorporate these?
3080
  // contains( t, s ) =>
3081
  //   replace( replace( x, t, s ), s, r ) ----> replace( x, t, r )
3082
  // contains( t, s ) =>
3083
  //   contains( replace( t, s, r ), r ) ----> true
3084
3085
2040
  return node;
3086
}
3087
3088
203
Node SequencesRewriter::rewriteReplaceAll(Node node)
3089
{
3090
203
  Assert(node.getKind() == STRING_REPLACE_ALL);
3091
3092
406
  TypeNode stype = node.getType();
3093
3094
203
  if (node[0].isConst() && node[1].isConst())
3095
  {
3096
82
    std::vector<Node> children;
3097
82
    Node s = node[0];
3098
82
    Node t = node[1];
3099
41
    if (Word::isEmpty(s) || Word::isEmpty(t))
3100
    {
3101
12
      return returnRewrite(node, node[0], Rewrite::REPLALL_EMPTY_FIND);
3102
    }
3103
29
    std::size_t sizeS = Word::getLength(s);
3104
29
    std::size_t sizeT = Word::getLength(t);
3105
29
    std::size_t index = 0;
3106
29
    std::size_t curr = 0;
3107
62
    do
3108
    {
3109
91
      curr = Word::find(s, t, index);
3110
91
      if (curr != std::string::npos)
3111
      {
3112
62
        if (curr > index)
3113
        {
3114
26
          children.push_back(Word::substr(s, index, curr - index));
3115
        }
3116
62
        children.push_back(node[2]);
3117
62
        index = curr + sizeT;
3118
      }
3119
      else
3120
      {
3121
29
        children.push_back(Word::substr(s, index, sizeS - index));
3122
      }
3123
91
    } while (curr != std::string::npos && curr < sizeS);
3124
    // constant evaluation
3125
58
    Node res = utils::mkConcat(children, stype);
3126
29
    return returnRewrite(node, res, Rewrite::REPLALL_CONST);
3127
  }
3128
3129
  // rewrites that apply to both replace and replaceall
3130
324
  Node rri = rewriteReplaceInternal(node);
3131
162
  if (!rri.isNull())
3132
  {
3133
    // printing of the rewrite managed by the call above
3134
4
    return rri;
3135
  }
3136
3137
158
  return node;
3138
}
3139
3140
2876
Node SequencesRewriter::rewriteReplaceInternal(Node node)
3141
{
3142
2876
  Kind nk = node.getKind();
3143
2876
  Assert(nk == STRING_REPLACE || nk == STRING_REPLACE_ALL);
3144
3145
2876
  if (node[1] == node[2])
3146
  {
3147
75
    return returnRewrite(node, node[0], Rewrite::RPL_ID);
3148
  }
3149
3150
2801
  if (node[0] == node[1])
3151
  {
3152
    // only holds for replaceall if non-empty
3153
115
    if (nk == STRING_REPLACE || d_stringsEntail.checkNonEmpty(node[1]))
3154
    {
3155
95
      return returnRewrite(node, node[2], Rewrite::RPL_REPLACE);
3156
    }
3157
  }
3158
3159
2706
  return Node::null();
3160
}
3161
3162
436
Node SequencesRewriter::rewriteReplaceRe(Node node)
3163
{
3164
436
  Assert(node.getKind() == STRING_REPLACE_RE);
3165
436
  NodeManager* nm = NodeManager::currentNM();
3166
872
  Node x = node[0];
3167
872
  Node y = node[1];
3168
872
  Node z = node[2];
3169
3170
436
  if (RegExpEntail::isConstRegExp(y))
3171
  {
3172
322
    if (x.isConst())
3173
    {
3174
      // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z"
3175
68
      std::pair<size_t, size_t> match = firstMatch(x, y);
3176
68
      if (match.first != string::npos)
3177
      {
3178
76
        String s = x.getConst<String>();
3179
        Node ret = nm->mkNode(STRING_CONCAT,
3180
76
                              nm->mkConst(s.substr(0, match.first)),
3181
                              z,
3182
152
                              nm->mkConst(s.substr(match.second)));
3183
38
        return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL);
3184
      }
3185
      else
3186
      {
3187
30
        return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL);
3188
      }
3189
    }
3190
    // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true
3191
504
    String emptyStr("");
3192
254
    if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y))
3193
    {
3194
8
      Node ret = nm->mkNode(STRING_CONCAT, z, x);
3195
4
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE);
3196
    }
3197
  }
3198
364
  return node;
3199
}
3200
3201
213
Node SequencesRewriter::rewriteReplaceReAll(Node node)
3202
{
3203
213
  Assert(node.getKind() == STRING_REPLACE_RE_ALL);
3204
213
  NodeManager* nm = NodeManager::currentNM();
3205
426
  Node x = node[0];
3206
426
  Node y = node[1];
3207
426
  Node z = node[2];
3208
3209
213
  if (RegExpEntail::isConstRegExp(y))
3210
  {
3211
167
    if (x.isConst())
3212
    {
3213
      // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) --->
3214
      //   "Z" ++ y ++ "Z" ++ y
3215
70
      TypeNode t = x.getType();
3216
70
      Node emp = Word::mkEmptyWord(t);
3217
      Node yp = Rewriter::rewrite(
3218
70
          nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)));
3219
70
      std::vector<Node> res;
3220
70
      String rem = x.getConst<String>();
3221
35
      std::pair<size_t, size_t> match(0, 0);
3222
103
      while (rem.size() != 0)
3223
      {
3224
42
        match = firstMatch(nm->mkConst(rem), yp);
3225
42
        if (match.first == string::npos)
3226
        {
3227
8
          break;
3228
        }
3229
34
        res.push_back(nm->mkConst(rem.substr(0, match.first)));
3230
34
        res.push_back(z);
3231
34
        rem = rem.substr(match.second);
3232
      }
3233
35
      res.push_back(nm->mkConst(rem));
3234
70
      Node ret = utils::mkConcat(res, t);
3235
35
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL);
3236
    }
3237
  }
3238
3239
178
  return node;
3240
}
3241
3242
242
std::pair<size_t, size_t> SequencesRewriter::firstMatch(Node n, Node r)
3243
{
3244
242
  Assert(n.isConst() && n.getType().isStringLike());
3245
242
  Assert(r.getType().isRegExp());
3246
242
  NodeManager* nm = NodeManager::currentNM();
3247
3248
484
  Node sigmaStar = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
3249
484
  Node re = nm->mkNode(REGEXP_CONCAT, r, sigmaStar);
3250
484
  String s = n.getConst<String>();
3251
3252
242
  if (s.size() == 0)
3253
  {
3254
52
    if (RegExpEntail::testConstStringInRegExp(s, 0, r))
3255
    {
3256
6
      return std::make_pair(0, 0);
3257
    }
3258
    else
3259
    {
3260
46
      return std::make_pair(string::npos, string::npos);
3261
    }
3262
  }
3263
3264
286
  for (size_t i = 0, size = s.size(); i < size; i++)
3265
  {
3266
243
    if (RegExpEntail::testConstStringInRegExp(s, i, re))
3267
    {
3268
477
      for (size_t j = i; j <= size; j++)
3269
      {
3270
807
        String substr = s.substr(i, j - i);
3271
477
        if (RegExpEntail::testConstStringInRegExp(substr, 0, r))
3272
        {
3273
147
          return std::make_pair(i, j);
3274
        }
3275
      }
3276
    }
3277
  }
3278
3279
43
  return std::make_pair(string::npos, string::npos);
3280
}
3281
3282
241
Node SequencesRewriter::rewriteStrReverse(Node node)
3283
{
3284
241
  Assert(node.getKind() == STRING_REV);
3285
241
  NodeManager* nm = NodeManager::currentNM();
3286
482
  Node x = node[0];
3287
241
  if (x.isConst())
3288
  {
3289
    // reverse the characters in the constant
3290
126
    Node retNode = Word::reverse(x);
3291
63
    return returnRewrite(node, retNode, Rewrite::STR_CONV_CONST);
3292
  }
3293
178
  else if (x.getKind() == STRING_CONCAT)
3294
  {
3295
78
    std::vector<Node> children;
3296
119
    for (const Node& nc : x)
3297
    {
3298
80
      children.push_back(nm->mkNode(STRING_REV, nc));
3299
    }
3300
39
    std::reverse(children.begin(), children.end());
3301
    // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 )
3302
78
    Node retNode = nm->mkNode(STRING_CONCAT, children);
3303
39
    return returnRewrite(node, retNode, Rewrite::STR_REV_MINSCOPE_CONCAT);
3304
  }
3305
139
  else if (x.getKind() == STRING_REV)
3306
  {
3307
    // rev( rev( x ) ) --> x
3308
10
    Node retNode = x[0];
3309
5
    return returnRewrite(node, retNode, Rewrite::STR_REV_IDEM);
3310
  }
3311
134
  return node;
3312
}
3313
3314
185
Node SequencesRewriter::rewritePrefixSuffix(Node n)
3315
{
3316
185
  Assert(n.getKind() == kind::STRING_PREFIX
3317
         || n.getKind() == kind::STRING_SUFFIX);
3318
185
  bool isPrefix = n.getKind() == kind::STRING_PREFIX;
3319
185
  if (n[0] == n[1])
3320
  {
3321
16
    Node ret = NodeManager::currentNM()->mkConst(true);
3322
8
    return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EQ);
3323
  }
3324
177
  if (n[0].isConst())
3325
  {
3326
94
    if (Word::isEmpty(n[0]))
3327
    {
3328
8
      Node ret = NodeManager::currentNM()->mkConst(true);
3329
4
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY_CONST);
3330
    }
3331
  }
3332
173
  if (n[1].isConst())
3333
  {
3334
37
    Node s = n[1];
3335
37
    size_t lenS = Word::getLength(s);
3336
37
    if (n[0].isConst())
3337
    {
3338
      Node ret = NodeManager::currentNM()->mkConst(false);
3339
      Node t = n[0];
3340
      size_t lenT = Word::getLength(t);
3341
      if (lenS >= lenT)
3342
      {
3343
        if ((isPrefix && t == Word::prefix(s, lenT))
3344
            || (!isPrefix && t == Word::suffix(s, lenT)))
3345
        {
3346
          ret = NodeManager::currentNM()->mkConst(true);
3347
        }
3348
      }
3349
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CONST);
3350
    }
3351
37
    else if (lenS == 0)
3352
    {
3353
8
      Node ret = n[0].eqNode(n[1]);
3354
4
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY);
3355
    }
3356
33
    else if (lenS == 1)
3357
    {
3358
      // (str.prefix x "A") and (str.suffix x "A") are equivalent to
3359
      // (str.contains "A" x )
3360
      Node ret =
3361
66
          NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS, n[1], n[0]);
3362
33
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CTN);
3363
    }
3364
  }
3365
272
  Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]);
3366
272
  Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]);
3367
272
  Node val;
3368
136
  if (isPrefix)
3369
  {
3370
100
    val = NodeManager::currentNM()->mkConst(::cvc5::Rational(0));
3371
  }
3372
  else
3373
  {
3374
36
    val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens);
3375
  }
3376
3377
  // Check if we can turn the prefix/suffix into equalities by showing that the
3378
  // prefix/suffix is at least as long as the string
3379
272
  Node eqs = d_stringsEntail.inferEqsFromContains(n[1], n[0]);
3380
136
  if (!eqs.isNull())
3381
  {
3382
24
    return returnRewrite(n, eqs, Rewrite::SUF_PREFIX_TO_EQS);
3383
  }
3384
3385
  // general reduction to equality + substr
3386
  Node retNode = n[0].eqNode(
3387
224
      NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens));
3388
3389
112
  return returnRewrite(n, retNode, Rewrite::SUF_PREFIX_ELIM);
3390
}
3391
3392
78
Node SequencesRewriter::lengthPreserveRewrite(Node n)
3393
{
3394
78
  NodeManager* nm = NodeManager::currentNM();
3395
156
  Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
3396
156
  Node res = canonicalStrForSymbolicLength(len, n.getType());
3397
156
  return res.isNull() ? n : res;
3398
}
3399
3400
90
Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
3401
{
3402
90
  NodeManager* nm = NodeManager::currentNM();
3403
3404
180
  Node res;
3405
90
  if (len.getKind() == CONST_RATIONAL)
3406
  {
3407
    // c -> "A" repeated c times
3408
156
    Rational ratLen = len.getConst<Rational>();
3409
78
    Assert(ratLen.getDenominator() == 1);
3410
156
    Integer intLen = ratLen.getNumerator();
3411
78
    uint32_t u = intLen.getUnsignedInt();
3412
78
    if (stype.isString())  // string-only
3413
    {
3414
78
      res = nm->mkConst(String(std::string(u, 'A')));
3415
    }
3416
    // we could do this for sequences, but we need to be careful: some
3417
    // sorts do not permit values that the solver can handle (e.g. uninterpreted
3418
    // sorts and arrays).
3419
  }
3420
12
  else if (len.getKind() == PLUS)
3421
  {
3422
    // x + y -> norm(x) + norm(y)
3423
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3424
12
    for (const auto& n : len)
3425
    {
3426
16
      Node sn = canonicalStrForSymbolicLength(n, stype);
3427
8
      if (sn.isNull())
3428
      {
3429
        return Node::null();
3430
      }
3431
16
      std::vector<Node> snChildren;
3432
8
      utils::getConcat(sn, snChildren);
3433
8
      concatBuilder.append(snChildren);
3434
    }
3435
4
    res = concatBuilder.constructNode();
3436
  }
3437
20
  else if (len.getKind() == MULT && len.getNumChildren() == 2
3438
20
           && len[0].isConst())
3439
  {
3440
    // c * x -> norm(x) repeated c times
3441
8
    Rational ratReps = len[0].getConst<Rational>();
3442
4
    Assert(ratReps.getDenominator() == 1);
3443
8
    Integer intReps = ratReps.getNumerator();
3444
3445
8
    Node nRep = canonicalStrForSymbolicLength(len[1], stype);
3446
4
    if (nRep.isNull())
3447
    {
3448
      return Node::null();
3449
    }
3450
8
    std::vector<Node> nRepChildren;
3451
4
    utils::getConcat(nRep, nRepChildren);
3452
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3453
12
    for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
3454
    {
3455
8
      concatBuilder.append(nRepChildren);
3456
    }
3457
4
    res = concatBuilder.constructNode();
3458
  }
3459
4
  else if (len.getKind() == STRING_LENGTH)
3460
  {
3461
    // len(x) -> x
3462
4
    res = len[0];
3463
  }
3464
90
  return res;
3465
}
3466
3467
389
Node SequencesRewriter::rewriteSeqUnit(Node node)
3468
{
3469
389
  NodeManager* nm = NodeManager::currentNM();
3470
389
  if (node[0].isConst())
3471
  {
3472
278
    std::vector<Node> seq;
3473
139
    seq.push_back(node[0]);
3474
278
    TypeNode stype = node[0].getType();
3475
278
    Node ret = nm->mkConst(Sequence(stype, seq));
3476
139
    return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL);
3477
  }
3478
250
  return node;
3479
}
3480
3481
221629
Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r)
3482
{
3483
443258
  Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << r
3484
221629
                           << "." << std::endl;
3485
221629
  if (d_statistics != nullptr)
3486
  {
3487
221146
    (*d_statistics) << r;
3488
  }
3489
221629
  return ret;
3490
}
3491
3492
107023
Node SequencesRewriter::postProcessRewrite(Node node, Node ret)
3493
{
3494
107023
  NodeManager* nm = NodeManager::currentNM();
3495
  // standard post-processing
3496
  // We rewrite (string) equalities immediately here. This allows us to forego
3497
  // the standard invariant on equality rewrites (that s=t must rewrite to one
3498
  // of { s=t, t=s, true, false } ).
3499
107023
  Kind retk = ret.getKind();
3500
107023
  if (retk == OR || retk == AND)
3501
  {
3502
7394
    std::vector<Node> children;
3503
3697
    bool childChanged = false;
3504
14761
    for (const Node& cret : ret)
3505
    {
3506
22128
      Node creter = cret;
3507
11064
      if (cret.getKind() == EQUAL)
3508
      {
3509
5102
        creter = rewriteEqualityExt(cret);
3510
      }
3511
5962
      else if (cret.getKind() == NOT && cret[0].getKind() == EQUAL)
3512
      {
3513
        creter = nm->mkNode(NOT, rewriteEqualityExt(cret[0]));
3514
      }
3515
11064
      childChanged = childChanged || cret != creter;
3516
11064
      children.push_back(creter);
3517
    }
3518
3697
    if (childChanged)
3519
    {
3520
661
      ret = nm->mkNode(retk, children);
3521
3697
    }
3522
  }
3523
103326
  else if (retk == NOT && ret[0].getKind() == EQUAL)
3524
  {
3525
    ret = nm->mkNode(NOT, rewriteEqualityExt(ret[0]));
3526
  }
3527
103326
  else if (retk == EQUAL && node.getKind() != EQUAL)
3528
  {
3529
3212
    Trace("strings-rewrite")
3530
1606
        << "Apply extended equality rewrite on " << ret << std::endl;
3531
1606
    ret = rewriteEqualityExt(ret);
3532
  }
3533
107023
  return ret;
3534
}
3535
3536
}  // namespace strings
3537
}  // namespace theory
3538
31137
}  // namespace cvc5