GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/sequences_rewriter.cpp Lines: 1600 1654 96.7 %
Date: 2021-09-16 Branches: 5061 9696 52.2 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of the theory of strings.
14
 */
15
16
#include "theory/strings/sequences_rewriter.h"
17
18
#include "expr/attribute.h"
19
#include "expr/node_builder.h"
20
#include "expr/sequence.h"
21
#include "theory/rewriter.h"
22
#include "theory/strings/arith_entail.h"
23
#include "theory/strings/regexp_entail.h"
24
#include "theory/strings/skolem_cache.h"
25
#include "theory/strings/strings_rewriter.h"
26
#include "theory/strings/theory_strings_utils.h"
27
#include "theory/strings/word.h"
28
#include "util/rational.h"
29
#include "util/regexp.h"
30
#include "util/statistics_registry.h"
31
#include "util/string.h"
32
33
using namespace std;
34
using namespace cvc5::kind;
35
36
namespace cvc5 {
37
namespace theory {
38
namespace strings {
39
40
10264
SequencesRewriter::SequencesRewriter(HistogramStat<Rewrite>* statistics)
41
10264
    : d_statistics(statistics), d_stringsEntail(*this)
42
{
43
10264
}
44
45
136952
Node SequencesRewriter::rewriteEquality(Node node)
46
{
47
136952
  Assert(node.getKind() == kind::EQUAL);
48
136952
  if (node[0] == node[1])
49
  {
50
6992
    Node ret = NodeManager::currentNM()->mkConst(true);
51
3496
    return returnRewrite(node, ret, Rewrite::EQ_REFL);
52
  }
53
133456
  else if (node[0].isConst() && node[1].isConst())
54
  {
55
4730
    Node ret = NodeManager::currentNM()->mkConst(false);
56
2365
    return returnRewrite(node, ret, Rewrite::EQ_CONST_FALSE);
57
  }
58
  // standard ordering
59
131091
  if (node[0] > node[1])
60
  {
61
59770
    Node ret = NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]);
62
29885
    return returnRewrite(node, ret, Rewrite::EQ_SYM);
63
  }
64
101206
  return node;
65
}
66
67
95329
Node SequencesRewriter::rewriteEqualityExt(Node node)
68
{
69
95329
  Assert(node.getKind() == EQUAL);
70
190658
  TypeNode tn = node[0].getType();
71
95329
  if (tn.isInteger())
72
  {
73
30578
    return rewriteArithEqualityExt(node);
74
  }
75
64751
  if (tn.isStringLike())
76
  {
77
64751
    return rewriteStrEqualityExt(node);
78
  }
79
  return node;
80
}
81
82
65182
Node SequencesRewriter::rewriteStrEqualityExt(Node node)
83
{
84
65182
  Assert(node.getKind() == EQUAL && node[0].getType().isStringLike());
85
130364
  TypeNode stype = node[0].getType();
86
87
65182
  NodeManager* nm = NodeManager::currentNM();
88
  // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false )
89
194996
  for (unsigned r = 0; r < 2; r++)
90
  {
91
    // must call rewrite contains directly to avoid infinite loop
92
259986
    Node ctn = nm->mkNode(STRING_CONTAINS, node[r], node[1 - r]);
93
130172
    ctn = rewriteContains(ctn);
94
130172
    Assert(!ctn.isNull());
95
130172
    if (ctn.isConst())
96
    {
97
37805
      if (!ctn.getConst<bool>())
98
      {
99
358
        return returnRewrite(node, ctn, Rewrite::EQ_NCTN);
100
      }
101
      else
102
      {
103
        // definitely contains but not syntactically equal
104
        // We may be able to simplify, e.g.
105
        //  str.++( x, "a" ) == "a"  ----> x = ""
106
      }
107
    }
108
  }
109
110
  // ( len( s ) != len( t ) ) => ( s == t ---> false )
111
  // This covers cases like str.++( x, x ) == "a" ---> false
112
129648
  Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
113
129648
  Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
114
129648
  Node len_eq = len0.eqNode(len1);
115
64824
  len_eq = Rewriter::rewrite(len_eq);
116
64824
  if (len_eq.isConst() && !len_eq.getConst<bool>())
117
  {
118
7
    return returnRewrite(node, len_eq, Rewrite::EQ_LEN_DEQ);
119
  }
120
121
129634
  std::vector<Node> c[2];
122
194451
  for (unsigned i = 0; i < 2; i++)
123
  {
124
129634
    utils::getConcat(node[i], c[i]);
125
  }
126
127
  // check if the prefix, suffix mismatches
128
  //   For example, str.++( x, "a", y ) == str.++( x, "bc", z ) ---> false
129
64817
  unsigned minsize = std::min(c[0].size(), c[1].size());
130
194439
  for (unsigned r = 0; r < 2; r++)
131
  {
132
129914
    for (unsigned i = 0; i < minsize; i++)
133
    {
134
129743
      unsigned index1 = r == 0 ? i : (c[0].size() - 1) - i;
135
129743
      unsigned index2 = r == 0 ? i : (c[1].size() - 1) - i;
136
130028
      Node s = c[0][index1];
137
130028
      Node t = c[1][index2];
138
129743
      if (s.isConst() && t.isConst())
139
      {
140
342
        size_t lenS = Word::getLength(s);
141
342
        size_t lenT = Word::getLength(t);
142
342
        size_t lenShort = lenS <= lenT ? lenS : lenT;
143
1144
        bool isSameFix = r == 1 ? Word::rstrncmp(s, t, lenShort)
144
802
                                : Word::strncmp(s, t, lenShort);
145
342
        if (!isSameFix)
146
        {
147
14
          Node ret = NodeManager::currentNM()->mkConst(false);
148
7
          return returnRewrite(node, ret, Rewrite::EQ_NFIX);
149
        }
150
      }
151
129736
      if (s != t)
152
      {
153
129451
        break;
154
      }
155
    }
156
  }
157
158
129620
  Node new_ret;
159
  // ------- equality unification
160
64810
  bool changed = false;
161
194430
  for (unsigned i = 0; i < 2; i++)
162
  {
163
130134
    while (!c[0].empty() && !c[1].empty() && c[0].back() == c[1].back())
164
    {
165
257
      c[0].pop_back();
166
257
      c[1].pop_back();
167
257
      changed = true;
168
    }
169
    // splice constants
170
388669
    if (!c[0].empty() && !c[1].empty() && c[0].back().isConst()
171
249342
        && c[1].back().isConst())
172
    {
173
396
      Node cs[2];
174
      size_t csl[2];
175
594
      for (unsigned j = 0; j < 2; j++)
176
      {
177
396
        cs[j] = c[j].back();
178
396
        csl[j] = Word::getLength(cs[j]);
179
      }
180
198
      size_t larger = csl[0] > csl[1] ? 0 : 1;
181
198
      size_t smallerSize = csl[1 - larger];
182
396
      if (cs[1 - larger]
183
396
          == (i == 0 ? Word::suffix(cs[larger], smallerSize)
184
                     : Word::prefix(cs[larger], smallerSize)))
185
      {
186
198
        size_t sizeDiff = csl[larger] - smallerSize;
187
198
        c[larger][c[larger].size() - 1] =
188
396
            i == 0 ? Word::prefix(cs[larger], sizeDiff)
189
                   : Word::suffix(cs[larger], sizeDiff);
190
198
        c[1 - larger].pop_back();
191
198
        changed = true;
192
      }
193
    }
194
388860
    for (unsigned j = 0; j < 2; j++)
195
    {
196
259240
      std::reverse(c[j].begin(), c[j].end());
197
    }
198
  }
199
64810
  if (changed)
200
  {
201
    // e.g. x++y = x++z ---> y = z, "AB" ++ x = "A" ++ y --> "B" ++ x = y
202
396
    Node s1 = utils::mkConcat(c[0], stype);
203
396
    Node s2 = utils::mkConcat(c[1], stype);
204
387
    if (s1 != node[0] || s2 != node[1])
205
    {
206
378
      new_ret = s1.eqNode(s2);
207
      // We generally don't apply the extended equality rewriter if the
208
      // original node was an equality but we may be able to do additional
209
      // rewriting here, e.g.,
210
      // x++y = "" --> x = "" and y = ""
211
378
      new_ret = returnRewrite(node, new_ret, Rewrite::STR_EQ_UNIFY);
212
378
      return rewriteStrEqualityExt(new_ret);
213
    }
214
  }
215
216
  // ------- homogeneous constants
217
193186
  for (unsigned i = 0; i < 2; i++)
218
  {
219
257566
    Node cn = StringsEntail::checkHomogeneousString(node[i]);
220
128812
    if (!cn.isNull() && !Word::isEmpty(cn))
221
    {
222
25591
      Assert(cn.isConst());
223
25591
      Assert(Word::getLength(cn) == 1);
224
225
      // The operands of the concat on each side of the equality without
226
      // constant strings
227
76773
      std::vector<Node> trimmed[2];
228
      // Counts the number of `cn`s on each side
229
25591
      size_t numCns[2] = {0, 0};
230
76764
      for (size_t j = 0; j < 2; j++)
231
      {
232
        // Sort the operands of the concats on both sides of the equality
233
        // (since both sides may only contain one char, the order does not
234
        // matter)
235
51178
        std::sort(c[j].begin(), c[j].end());
236
102495
        for (const Node& cc : c[j])
237
        {
238
51322
          if (cc.isConst())
239
          {
240
            // Count the number of `cn`s in the string constant and make
241
            // sure that all chars are `cn`s
242
51149
            std::vector<Node> veccc = Word::getChars(cc);
243
51243
            for (const Node& cv : veccc)
244
            {
245
25671
              if (cv != cn)
246
              {
247
                // This conflict case should mostly should be taken care of by
248
                // multiset reasoning in the strings rewriter, but we recognize
249
                // this conflict just in case.
250
5
                new_ret = nm->mkConst(false);
251
                return returnRewrite(
252
5
                    node, new_ret, Rewrite::STR_EQ_CONST_NHOMOG);
253
              }
254
25666
              numCns[j]++;
255
            }
256
          }
257
          else
258
          {
259
25745
            trimmed[j].push_back(cc);
260
          }
261
        }
262
      }
263
264
      // We have to remove the same number of `cn`s from both sides, so the
265
      // side with less `cn`s determines how many we can remove
266
25586
      size_t trimmedConst = std::min(numCns[0], numCns[1]);
267
76758
      for (size_t j = 0; j < 2; j++)
268
      {
269
51172
        size_t diff = numCns[j] - trimmedConst;
270
51172
        if (diff != 0)
271
        {
272
          // Add a constant string to the side with more `cn`s to restore
273
          // the difference in number of `cn`s
274
46490
          std::vector<Node> vec(diff, cn);
275
23245
          trimmed[j].push_back(Word::mkWordFlatten(vec));
276
        }
277
      }
278
279
51119
      Node lhs = utils::mkConcat(trimmed[i], stype);
280
51119
      Node ss = utils::mkConcat(trimmed[1 - i], stype);
281
25586
      if (lhs != node[i] || ss != node[1 - i])
282
      {
283
        // e.g.
284
        //  "AA" = y ++ x ---> "AA" = x ++ y if x < y
285
        //  "AAA" = y ++ "A" ++ z ---> "AA" = y ++ z
286
        //
287
        // We generally don't apply the extended equality rewriter if the
288
        // original node was an equality but we may be able to do additional
289
        // rewriting here.
290
53
        new_ret = lhs.eqNode(ss);
291
53
        new_ret = returnRewrite(node, new_ret, Rewrite::STR_EQ_HOMOG_CONST);
292
53
        return rewriteStrEqualityExt(new_ret);
293
      }
294
    }
295
  }
296
297
  // ------- rewrites for (= "" _)
298
128748
  Node empty = Word::mkEmptyWord(stype);
299
180609
  for (size_t i = 0; i < 2; i++)
300
  {
301
122525
    if (node[i] == empty)
302
    {
303
64268
      Node ne = node[1 - i];
304
35279
      if (ne.getKind() == STRING_REPLACE)
305
      {
306
        // (= "" (str.replace x y x)) ---> (= x "")
307
1062
        if (ne[0] == ne[2])
308
        {
309
56
          Node ret = nm->mkNode(EQUAL, ne[0], empty);
310
28
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_X_Y_X);
311
        }
312
313
        // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y "")))
314
1034
        if (StringsEntail::checkNonEmpty(ne[2]))
315
        {
316
          Node ret =
317
              nm->mkNode(AND,
318
186
                         nm->mkNode(EQUAL, ne[0], empty),
319
372
                         nm->mkNode(NOT, nm->mkNode(EQUAL, ne[1], empty)));
320
93
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
321
        }
322
323
        // (= "" (str.replace x "A" "")) ---> (str.prefix x "A")
324
941
        if (StringsEntail::checkLengthOne(ne[1], true) && ne[2] == empty)
325
        {
326
296
          Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]);
327
148
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
328
        }
329
      }
330
34217
      else if (ne.getKind() == STRING_SUBSTR)
331
      {
332
59701
        Node zero = nm->mkConst(Rational(0));
333
334
32861
        if (ArithEntail::check(ne[1], false) && ArithEntail::check(ne[2], true))
335
        {
336
          // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0
337
1708
          if (ne[1] == zero)
338
          {
339
772
            Node ret = nm->mkNode(EQUAL, ne[0], empty);
340
386
            return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
341
          }
342
343
          // (= "" (str.substr x n m)) ---> (<= (str.len x) n)
344
          // if n >= 0 and m > 0
345
2644
          Node ret = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, ne[0]), ne[1]);
346
1322
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
347
        }
348
349
        // (= "" (str.substr "A" 0 z)) ---> (<= z 0)
350
31153
        if (StringsEntail::checkNonEmpty(ne[0]) && ne[1] == zero)
351
        {
352
8626
          Node ret = nm->mkNode(LEQ, ne[2], zero);
353
4313
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_Z);
354
        }
355
      }
356
    }
357
  }
358
359
  // ------- rewrites for (= (str.replace _ _ _) _)
360
174196
  for (size_t i = 0; i < 2; i++)
361
  {
362
116154
    if (node[i].getKind() == STRING_REPLACE)
363
    {
364
5160
      Node repl = node[i];
365
5160
      Node x = node[1 - i];
366
367
      // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x))
368
2601
      if (StringsEntail::checkNonEmpty(x) && repl[0] == empty)
369
      {
370
        Node ret = nm->mkNode(
371
12
            EQUAL, empty, nm->mkNode(STRING_REPLACE, x, repl[2], repl[1]));
372
6
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_EMP);
373
      }
374
375
      // (= x (str.replace y x y)) ---> (= x y)
376
2595
      if (repl[0] == repl[2] && x == repl[1])
377
      {
378
4
        Node ret = nm->mkNode(EQUAL, x, repl[0]);
379
2
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_EQ);
380
      }
381
382
      // (= x (str.replace x "A" "B")) ---> (not (str.contains x "A"))
383
2593
      if (x == repl[0])
384
      {
385
2670
        Node eq = Rewriter::rewrite(nm->mkNode(EQUAL, repl[1], repl[2]));
386
1350
        if (eq.isConst() && !eq.getConst<bool>())
387
        {
388
60
          Node ret = nm->mkNode(NOT, nm->mkNode(STRING_CONTAINS, x, repl[1]));
389
30
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_NOT_CTN);
390
        }
391
      }
392
393
      // (= (str.replace x y z) z) --> (or (= x y) (= x z))
394
      // if (str.len y) = (str.len z)
395
2563
      if (repl[2] == x)
396
      {
397
2512
        Node lenY = nm->mkNode(STRING_LENGTH, repl[1]);
398
2512
        Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]);
399
1258
        if (ArithEntail::checkEq(lenY, lenZ))
400
        {
401
          Node ret = nm->mkNode(OR,
402
8
                                nm->mkNode(EQUAL, repl[0], repl[1]),
403
16
                                nm->mkNode(EQUAL, repl[0], repl[2]));
404
4
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_DIS);
405
        }
406
      }
407
    }
408
  }
409
410
  // Try to rewrite (= x y) into a conjunction of equalities based on length
411
  // entailment.
412
  //
413
  // (<= (str.len x) (str.++ y1 ... yn)) AND (= x (str.++ y1 ... yn)) --->
414
  //  (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
415
  //
416
  // where yi' and yi'' correspond to some yj and
417
  //   (<= (str.len x) (str.++ y1' ... ym'))
418
173830
  for (unsigned i = 0; i < 2; i++)
419
  {
420
115953
    if (node[1 - i].getKind() == STRING_CONCAT)
421
    {
422
881
      new_ret = StringsEntail::inferEqsFromContains(node[i], node[1 - i]);
423
881
      if (!new_ret.isNull())
424
      {
425
165
        return returnRewrite(node, new_ret, Rewrite::STR_EQ_CONJ_LEN_ENTAIL);
426
      }
427
    }
428
  }
429
430
57877
  if (node[0].getKind() == STRING_CONCAT && node[1].getKind() == STRING_CONCAT)
431
  {
432
    // (= (str.++ x_1 ... x_i x_{i + 1} ... x_n)
433
    //    (str.++ y_1 ... y_j y_{j + 1} ... y_m)) --->
434
    //  (and (= (str.++ x_1 ... x_i) (str.++ y_1 ... y_j))
435
    //       (= (str.++ x_{i + 1} ... x_n) (str.++ y_{j + 1} ... y_m)))
436
    //
437
    // if (str.len (str.++ x_1 ... x_i)) = (str.len (str.++ y_1 ... y_j))
438
    //
439
    // This rewrite performs length-based equality splitting: If we can show
440
    // that two prefixes have the same length, we can split an equality into
441
    // two equalities, one over the prefixes and another over the suffixes.
442
97
    std::vector<Node> v0, v1;
443
55
    utils::getConcat(node[0], v0);
444
55
    utils::getConcat(node[1], v1);
445
55
    size_t startRhs = 0;
446
260
    for (size_t i = 0, size0 = v0.size(); i <= size0; i++)
447
    {
448
423
      const std::vector<Node> pfxv0(v0.begin(), v0.begin() + i);
449
423
      Node pfx0 = utils::mkConcat(pfxv0, stype);
450
826
      for (size_t j = startRhs, size1 = v1.size(); j <= size1; j++)
451
      {
452
680
        if (!(i == 0 && j == 0) && !(i == v0.size() && j == v1.size()))
453
        {
454
1094
          std::vector<Node> pfxv1(v1.begin(), v1.begin() + j);
455
1094
          Node pfx1 = utils::mkConcat(pfxv1, stype);
456
1094
          Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0);
457
1094
          Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1);
458
459
583
          if (ArithEntail::checkEq(lenPfx0, lenPfx1))
460
          {
461
18
            std::vector<Node> sfxv0(v0.begin() + i, v0.end());
462
18
            std::vector<Node> sfxv1(v1.begin() + j, v1.end());
463
            Node ret = nm->mkNode(kind::AND,
464
18
                                  pfx0.eqNode(pfx1),
465
18
                                  utils::mkConcat(sfxv0, stype)
466
36
                                      .eqNode(utils::mkConcat(sfxv1, stype)));
467
9
            return returnRewrite(node, ret, Rewrite::SPLIT_EQ);
468
          }
469
574
          else if (ArithEntail::check(lenPfx1, lenPfx0, true))
470
          {
471
            // The prefix on the right-hand side is strictly longer than the
472
            // prefix on the left-hand side, so we try to strip the right-hand
473
            // prefix by the length of the left-hand prefix
474
            //
475
            // Example:
476
            // (= (str.++ "A" x y) (str.++ x "AB" z)) --->
477
            //   (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z)))
478
59
            std::vector<Node> rpfxv1;
479
59
            if (StringsEntail::stripSymbolicLength(
480
                    pfxv1, rpfxv1, 1, lenPfx0, true))
481
            {
482
              // The rewrite requires the full left-hand prefix length to be
483
              // stripped (otherwise we would have to keep parts of the
484
              // left-hand prefix).
485
              if (lenPfx0.isConst() && lenPfx0.getConst<Rational>().isZero())
486
              {
487
                std::vector<Node> sfxv0(v0.begin() + i, v0.end());
488
                pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end());
489
                Node ret =
490
                    nm->mkNode(kind::AND,
491
                               pfx0.eqNode(utils::mkConcat(rpfxv1, stype)),
492
                               utils::mkConcat(sfxv0, stype)
493
                                   .eqNode(utils::mkConcat(pfxv1, stype)));
494
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_R);
495
              }
496
            }
497
498
            // If the prefix of the right-hand side is (strictly) longer than
499
            // the prefix of the left-hand side, we can advance the left-hand
500
            // side (since the length of the right-hand side is only increasing
501
            // in the inner loop)
502
59
            break;
503
          }
504
515
          else if (ArithEntail::check(lenPfx0, lenPfx1, true))
505
          {
506
            // The prefix on the left-hand side is strictly longer than the
507
            // prefix on the right-hand side, so we try to strip the left-hand
508
            // prefix by the length of the right-hand prefix
509
            //
510
            // Example:
511
            // (= (str.++ x "AB" z) (str.++ "A" x y)) --->
512
            //   (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y))
513
154
            std::vector<Node> sfxv0 = pfxv0;
514
154
            std::vector<Node> rpfxv0;
515
79
            if (StringsEntail::stripSymbolicLength(
516
                    sfxv0, rpfxv0, 1, lenPfx1, true))
517
            {
518
              // The rewrite requires the full right-hand prefix length to be
519
              // stripped (otherwise we would have to keep parts of the
520
              // right-hand prefix).
521
12
              if (lenPfx1.isConst() && lenPfx1.getConst<Rational>().isZero())
522
              {
523
4
                sfxv0.insert(sfxv0.end(), v0.begin() + i, v0.end());
524
8
                std::vector<Node> sfxv1(v1.begin() + j, v1.end());
525
                Node ret =
526
                    nm->mkNode(kind::AND,
527
8
                               utils::mkConcat(rpfxv0, stype).eqNode(pfx1),
528
8
                               utils::mkConcat(sfxv0, stype)
529
16
                                   .eqNode(utils::mkConcat(sfxv1, stype)));
530
4
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_L);
531
              }
532
            }
533
534
            // If the prefix of the left-hand side is (strictly) longer than
535
            // the prefix of the right-hand side, then we don't need to check
536
            // that right-hand prefix for future left-hand prefixes anymore
537
            // (since they are increasing in length)
538
75
            startRhs = j + 1;
539
          }
540
        }
541
      }
542
    }
543
  }
544
57864
  return node;
545
}
546
547
30578
Node SequencesRewriter::rewriteArithEqualityExt(Node node)
548
{
549
30578
  Assert(node.getKind() == EQUAL && node[0].getType().isInteger());
550
551
  // cases where we can solve the equality
552
553
  // notice we cannot rewrite str.to.int(x)=n to x="n" due to leading zeroes.
554
555
30578
  return node;
556
}
557
558
253864
Node SequencesRewriter::rewriteLength(Node node)
559
{
560
253864
  Assert(node.getKind() == STRING_LENGTH);
561
253864
  NodeManager* nm = NodeManager::currentNM();
562
253864
  Kind nk0 = node[0].getKind();
563
253864
  if (node[0].isConst())
564
  {
565
15132
    Node retNode = nm->mkConst(Rational(Word::getLength(node[0])));
566
7566
    return returnRewrite(node, retNode, Rewrite::LEN_EVAL);
567
  }
568
246298
  else if (nk0 == kind::STRING_CONCAT)
569
  {
570
5196
    Node tmpNode = node[0];
571
5196
    if (tmpNode.getKind() == kind::STRING_CONCAT)
572
    {
573
10392
      std::vector<Node> node_vec;
574
21184
      for (unsigned int i = 0; i < tmpNode.getNumChildren(); ++i)
575
      {
576
15988
        if (tmpNode[i].isConst())
577
        {
578
4355
          node_vec.push_back(
579
8710
              nm->mkConst(Rational(Word::getLength(tmpNode[i]))));
580
        }
581
        else
582
        {
583
11633
          node_vec.push_back(NodeManager::currentNM()->mkNode(
584
              kind::STRING_LENGTH, tmpNode[i]));
585
        }
586
      }
587
10392
      Node retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec);
588
5196
      return returnRewrite(node, retNode, Rewrite::LEN_CONCAT);
589
    }
590
  }
591
241102
  else if (nk0 == STRING_REPLACE || nk0 == STRING_REPLACE_ALL)
592
  {
593
1991
    Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1]));
594
1991
    Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2]));
595
1019
    if (len1 == len2)
596
    {
597
      // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x )
598
94
      Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
599
47
      return returnRewrite(node, retNode, Rewrite::LEN_REPL_INV);
600
972
    }
601
  }
602
240083
  else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER || nk0 == STRING_REV
603
239991
           || nk0 == STRING_UPDATE)
604
  {
605
    // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev.
606
    // len( update( x, n, y ) ) = len( x )
607
212
    Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
608
106
    return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV);
609
  }
610
239977
  else if (nk0 == SEQ_UNIT)
611
  {
612
218
    Node retNode = nm->mkConst(Rational(1));
613
109
    return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT);
614
  }
615
240840
  return node;
616
}
617
618
// TODO (#1180) add rewrite
619
//  str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) --->
620
//  str.substr( x, n1, n2+n3 )
621
70336
Node SequencesRewriter::rewriteConcat(Node node)
622
{
623
70336
  Assert(node.getKind() == kind::STRING_CONCAT);
624
140672
  Trace("strings-rewrite-debug")
625
70336
      << "Strings::rewriteConcat start " << node << std::endl;
626
140672
  std::vector<Node> node_vec;
627
140672
  Node preNode = Node::null();
628
310731
  for (Node tmpNode : node)
629
  {
630
240395
    if (tmpNode.getKind() == STRING_CONCAT)
631
    {
632
994
      unsigned j = 0;
633
      // combine the first term with the previous constant if applicable
634
994
      if (!preNode.isNull())
635
      {
636
223
        if (tmpNode[0].isConst())
637
        {
638
140
          std::vector<Node> wvec;
639
70
          wvec.push_back(preNode);
640
70
          wvec.push_back(tmpNode[0]);
641
70
          preNode = Word::mkWordFlatten(wvec);
642
70
          node_vec.push_back(preNode);
643
        }
644
        else
645
        {
646
153
          node_vec.push_back(preNode);
647
153
          node_vec.push_back(tmpNode[0]);
648
        }
649
223
        preNode = Node::null();
650
223
        ++j;
651
      }
652
      // insert the middle terms to node_vec
653
994
      if (j <= tmpNode.getNumChildren() - 1)
654
      {
655
994
        node_vec.insert(node_vec.end(), tmpNode.begin() + j, tmpNode.end() - 1);
656
      }
657
      // take the last term as the current
658
994
      tmpNode = tmpNode[tmpNode.getNumChildren() - 1];
659
    }
660
240395
    if (!tmpNode.isConst())
661
    {
662
125992
      if (!preNode.isNull())
663
      {
664
27438
        if (preNode.isConst() && !Word::isEmpty(preNode))
665
        {
666
26592
          node_vec.push_back(preNode);
667
        }
668
27438
        preNode = Node::null();
669
      }
670
125992
      node_vec.push_back(tmpNode);
671
    }
672
    else
673
    {
674
114403
      if (preNode.isNull())
675
      {
676
60658
        preNode = tmpNode;
677
      }
678
      else
679
      {
680
107490
        std::vector<Node> vec;
681
53745
        vec.push_back(preNode);
682
53745
        vec.push_back(tmpNode);
683
53745
        preNode = Word::mkWordFlatten(vec);
684
      }
685
    }
686
  }
687
70336
  if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode)))
688
  {
689
31812
    node_vec.push_back(preNode);
690
  }
691
692
  // Sort adjacent operands in str.++ that all result in the same string or the
693
  // empty string.
694
  //
695
  // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) -->
696
  // (str.++ ... [sort those 3 arguments] ... )
697
70336
  size_t lastIdx = 0;
698
140672
  Node lastX;
699
256809
  for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++)
700
  {
701
372946
    Node s = StringsEntail::getStringOrEmpty(node_vec[i]);
702
186473
    bool nextX = false;
703
186473
    if (s != lastX)
704
    {
705
181723
      nextX = true;
706
    }
707
708
186473
    if (nextX)
709
    {
710
181723
      std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i);
711
181723
      lastX = s;
712
181723
      lastIdx = i;
713
    }
714
  }
715
70336
  std::sort(node_vec.begin() + lastIdx, node_vec.end());
716
717
140672
  TypeNode tn = node.getType();
718
140672
  Node retNode = utils::mkConcat(node_vec, tn);
719
140672
  Trace("strings-rewrite-debug")
720
70336
      << "Strings::rewriteConcat end " << retNode << std::endl;
721
70336
  if (retNode != node)
722
  {
723
20586
    return returnRewrite(node, retNode, Rewrite::CONCAT_NORM);
724
  }
725
49750
  return node;
726
}
727
728
2964
Node SequencesRewriter::rewriteConcatRegExp(TNode node)
729
{
730
2964
  Assert(node.getKind() == kind::REGEXP_CONCAT);
731
2964
  NodeManager* nm = NodeManager::currentNM();
732
5928
  Trace("strings-rewrite-debug")
733
2964
      << "Strings::rewriteConcatRegExp flatten " << node << std::endl;
734
5928
  Node retNode = node;
735
5928
  std::vector<Node> vec;
736
2964
  bool changed = false;
737
5928
  Node emptyRe;
738
739
  // get the string type that are members of this regular expression
740
5928
  TypeNode rtype = node.getType();
741
5928
  TypeNode stype;
742
2964
  if (rtype.isRegExp())
743
  {
744
    // standard regular expressions are for strings
745
2964
    stype = nm->stringType();
746
  }
747
  else
748
  {
749
    Unimplemented();
750
  }
751
752
11312
  for (const Node& c : node)
753
  {
754
8384
    if (c.getKind() == REGEXP_CONCAT)
755
    {
756
218
      changed = true;
757
698
      for (const Node& cc : c)
758
      {
759
480
        vec.push_back(cc);
760
      }
761
    }
762
19726
    else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst()
763
19646
             && Word::isEmpty(c[0]))
764
    {
765
128
      changed = true;
766
128
      emptyRe = c;
767
    }
768
8038
    else if (c.getKind() == REGEXP_EMPTY)
769
    {
770
      // re.++( ..., empty, ... ) ---> empty
771
72
      Node ret = nm->mkNode(REGEXP_EMPTY);
772
36
      return returnRewrite(node, ret, Rewrite::RE_CONCAT_EMPTY);
773
    }
774
    else
775
    {
776
8002
      vec.push_back(c);
777
    }
778
  }
779
2928
  if (changed)
780
  {
781
    // flatten
782
    // this handles nested re.++ and elimination or str.to.re(""), e.g.:
783
    // re.++( re.++( R1, R2 ), str.to.re(""), R3 ) ---> re.++( R1, R2, R3 )
784
328
    if (vec.empty())
785
    {
786
2
      Assert(!emptyRe.isNull());
787
2
      retNode = emptyRe;
788
    }
789
    else
790
    {
791
326
      retNode = vec.size() == 1 ? vec[0] : nm->mkNode(REGEXP_CONCAT, vec);
792
    }
793
328
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_FLATTEN);
794
  }
795
5200
  Trace("strings-rewrite-debug")
796
2600
      << "Strings::rewriteConcatRegExp start " << node << std::endl;
797
5200
  std::vector<Node> cvec;
798
  // the current accumulation of constant strings
799
5200
  std::vector<Node> preReStr;
800
  // whether the last component was (_)*
801
2600
  bool lastAllStar = false;
802
5200
  String emptyStr = String("");
803
  // this loop checks to see if components can be combined or dropped
804
12788
  for (unsigned i = 0, size = vec.size(); i <= size; i++)
805
  {
806
20376
    Node curr;
807
10188
    if (i < size)
808
    {
809
7588
      curr = vec[i];
810
7588
      Assert(curr.getKind() != REGEXP_CONCAT);
811
    }
812
    // update preReStr
813
10188
    if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP)
814
    {
815
3032
      lastAllStar = false;
816
3032
      preReStr.push_back(curr[0]);
817
3032
      curr = Node::null();
818
    }
819
7156
    else if (!preReStr.empty())
820
    {
821
2576
      Assert(!lastAllStar);
822
      // this groups consecutive strings a++b ---> ab
823
5152
      Node acc = nm->mkNode(STRING_TO_REGEXP, utils::mkConcat(preReStr, stype));
824
2576
      cvec.push_back(acc);
825
2576
      preReStr.clear();
826
    }
827
4580
    else if (!curr.isNull() && lastAllStar)
828
    {
829
      // if empty, drop it
830
      // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)*
831
324
      if (RegExpEntail::isConstRegExp(curr)
832
324
          && RegExpEntail::testConstStringInRegExp(emptyStr, 0, curr))
833
      {
834
44
        curr = Node::null();
835
      }
836
    }
837
10188
    if (!curr.isNull())
838
    {
839
4512
      lastAllStar = false;
840
4512
      if (curr.getKind() == REGEXP_STAR)
841
      {
842
        // we can group stars (a)* ++ (a)* ---> (a)*
843
2262
        if (!cvec.empty() && cvec.back() == curr)
844
        {
845
8
          curr = Node::null();
846
        }
847
2254
        else if (curr[0].getKind() == REGEXP_SIGMA)
848
        {
849
902
          Assert(!lastAllStar);
850
902
          lastAllStar = true;
851
          // go back and remove empty ones from back of cvec
852
          // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)*
853
2572
          while (!cvec.empty() && RegExpEntail::isConstRegExp(cvec.back())
854
3452
                 && RegExpEntail::testConstStringInRegExp(
855
722
                     emptyStr, 0, cvec.back()))
856
          {
857
8
            cvec.pop_back();
858
          }
859
        }
860
      }
861
    }
862
10188
    if (!curr.isNull())
863
    {
864
4504
      cvec.push_back(curr);
865
    }
866
  }
867
2600
  Assert(!cvec.empty());
868
2600
  retNode = utils::mkConcat(cvec, rtype);
869
2600
  if (retNode != node)
870
  {
871
    // handles all cases where consecutive re constants are combined or
872
    // dropped as described in the loop above.
873
198
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT);
874
  }
875
876
  // flipping adjacent star arguments
877
2402
  changed = false;
878
6620
  for (size_t i = 0, size = cvec.size() - 1; i < size; i++)
879
  {
880
4218
    if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1])
881
    {
882
      // by convention, flip the order (a*)++a ---> a++(a*)
883
64
      std::swap(cvec[i], cvec[i + 1]);
884
64
      changed = true;
885
    }
886
  }
887
2402
  if (changed)
888
  {
889
48
    retNode = utils::mkConcat(cvec, rtype);
890
48
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_OPT);
891
  }
892
2354
  return node;
893
}
894
895
1180
Node SequencesRewriter::rewriteStarRegExp(TNode node)
896
{
897
1180
  Assert(node.getKind() == REGEXP_STAR);
898
1180
  NodeManager* nm = NodeManager::currentNM();
899
2360
  Node retNode = node;
900
1180
  if (node[0].getKind() == REGEXP_STAR)
901
  {
902
    // ((R)*)* ---> R*
903
4
    return returnRewrite(node, node[0], Rewrite::RE_STAR_NESTED_STAR);
904
  }
905
3992
  else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst()
906
3932
           && Word::isEmpty(node[0][0]))
907
  {
908
    // ("")* ---> ""
909
18
    return returnRewrite(node, node[0], Rewrite::RE_STAR_EMPTY_STRING);
910
  }
911
1158
  else if (node[0].getKind() == REGEXP_EMPTY)
912
  {
913
    // (empty)* ---> ""
914
8
    retNode = nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")));
915
8
    return returnRewrite(node, retNode, Rewrite::RE_STAR_EMPTY);
916
  }
917
1150
  else if (node[0].getKind() == REGEXP_UNION)
918
  {
919
    // simplification of unions under star
920
256
    if (RegExpEntail::hasEpsilonNode(node[0]))
921
    {
922
38
      bool changed = false;
923
38
      std::vector<Node> node_vec;
924
120
      for (const Node& nc : node[0])
925
      {
926
240
        if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst()
927
236
            && Word::isEmpty(nc[0]))
928
        {
929
          // can be removed
930
38
          changed = true;
931
        }
932
        else
933
        {
934
44
          node_vec.push_back(nc);
935
        }
936
      }
937
38
      if (changed)
938
      {
939
38
        retNode = node_vec.size() == 1 ? node_vec[0]
940
                                       : nm->mkNode(REGEXP_UNION, node_vec);
941
38
        retNode = nm->mkNode(REGEXP_STAR, retNode);
942
        // simplification of union beneath star based on loop above
943
        // for example, ( "" | "a" )* ---> ("a")*
944
38
        return returnRewrite(node, retNode, Rewrite::RE_STAR_UNION);
945
      }
946
    }
947
  }
948
1112
  return node;
949
}
950
951
1108
Node SequencesRewriter::rewriteAndOrRegExp(TNode node)
952
{
953
1108
  Kind nk = node.getKind();
954
1108
  Assert(nk == REGEXP_UNION || nk == REGEXP_INTER);
955
2216
  Trace("strings-rewrite-debug")
956
1108
      << "Strings::rewriteAndOrRegExp start " << node << std::endl;
957
2216
  std::vector<Node> node_vec;
958
2216
  std::vector<Node> polRegExp[2];
959
3579
  for (const Node& ni : node)
960
  {
961
2473
    if (ni.getKind() == nk)
962
    {
963
42
      for (const Node& nic : ni)
964
      {
965
28
        if (std::find(node_vec.begin(), node_vec.end(), nic) == node_vec.end())
966
        {
967
26
          node_vec.push_back(nic);
968
        }
969
      }
970
    }
971
2459
    else if (ni.getKind() == REGEXP_EMPTY)
972
    {
973
65
      if (nk == REGEXP_INTER)
974
      {
975
2
        return returnRewrite(node, ni, Rewrite::RE_AND_EMPTY);
976
      }
977
      // otherwise, can ignore
978
    }
979
2394
    else if (ni.getKind() == REGEXP_STAR && ni[0].getKind() == REGEXP_SIGMA)
980
    {
981
8
      if (nk == REGEXP_UNION)
982
      {
983
        return returnRewrite(node, ni, Rewrite::RE_OR_ALL);
984
      }
985
      // otherwise, can ignore
986
    }
987
2386
    else if (std::find(node_vec.begin(), node_vec.end(), ni) == node_vec.end())
988
    {
989
2347
      node_vec.push_back(ni);
990
2347
      uint32_t pindex = ni.getKind() == REGEXP_COMPLEMENT ? 1 : 0;
991
4694
      Node nia = pindex == 1 ? ni[0] : ni;
992
2347
      polRegExp[pindex].push_back(nia);
993
    }
994
  }
995
1106
  NodeManager* nm = NodeManager::currentNM();
996
  // use inclusion tests
997
1302
  for (const Node& negMem : polRegExp[1])
998
  {
999
346
    for (const Node& posMem : polRegExp[0])
1000
    {
1001
284
      Node m1 = nk == REGEXP_INTER ? negMem : posMem;
1002
284
      Node m2 = nk == REGEXP_INTER ? posMem : negMem;
1003
      // inclusion test for conflicting case m1 contains m2
1004
      // (re.inter (re.comp R1) R2) --> re.none where R1 includes R2
1005
      // (re.union R1 (re.comp R2)) --> (re.* re.allchar) where R1 includes R2
1006
150
      if (RegExpEntail::regExpIncludes(m1, m2))
1007
      {
1008
32
        Node retNode;
1009
16
        if (nk == REGEXP_INTER)
1010
        {
1011
16
          retNode = nm->mkNode(kind::REGEXP_EMPTY);
1012
        }
1013
        else
1014
        {
1015
          retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1016
        }
1017
16
        return returnRewrite(node, retNode, Rewrite::RE_ANDOR_INC_CONFLICT);
1018
      }
1019
    }
1020
  }
1021
2180
  Node retNode;
1022
1090
  if (node_vec.empty())
1023
  {
1024
    if (nk == REGEXP_INTER)
1025
    {
1026
      retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1027
    }
1028
    else
1029
    {
1030
      retNode = nm->mkNode(kind::REGEXP_EMPTY);
1031
    }
1032
  }
1033
  else
1034
  {
1035
1090
    retNode = node_vec.size() == 1 ? node_vec[0] : nm->mkNode(nk, node_vec);
1036
  }
1037
1090
  if (retNode != node)
1038
  {
1039
    // flattening and removing children, based on loop above
1040
104
    return returnRewrite(node, retNode, Rewrite::RE_ANDOR_FLATTEN);
1041
  }
1042
986
  return node;
1043
}
1044
1045
32
Node SequencesRewriter::rewriteLoopRegExp(TNode node)
1046
{
1047
32
  Assert(node.getKind() == REGEXP_LOOP);
1048
64
  Node retNode = node;
1049
64
  Node r = node[0];
1050
32
  if (r.getKind() == REGEXP_STAR)
1051
  {
1052
    return returnRewrite(node, r, Rewrite::RE_LOOP_STAR);
1053
  }
1054
32
  NodeManager* nm = NodeManager::currentNM();
1055
64
  cvc5::Rational rMaxInt(String::maxSize());
1056
32
  uint32_t l = utils::getLoopMinOccurrences(node);
1057
64
  std::vector<Node> vec_nodes;
1058
122
  for (unsigned i = 0; i < l; i++)
1059
  {
1060
90
    vec_nodes.push_back(r);
1061
  }
1062
  Node n =
1063
32
      vec_nodes.size() == 0
1064
36
          ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")))
1065
68
          : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes);
1066
32
  uint32_t u = utils::getLoopMaxOccurrences(node);
1067
32
  if (u < l)
1068
  {
1069
8
    std::vector<Node> nvec;
1070
4
    retNode = nm->mkNode(REGEXP_EMPTY, nvec);
1071
  }
1072
28
  else if (u == l)
1073
  {
1074
10
    retNode = n;
1075
  }
1076
  else
1077
  {
1078
36
    std::vector<Node> vec2;
1079
18
    vec2.push_back(n);
1080
36
    TypeNode rtype = nm->regExpType();
1081
80
    for (uint32_t j = l; j < u; j++)
1082
    {
1083
62
      vec_nodes.push_back(r);
1084
62
      n = utils::mkConcat(vec_nodes, rtype);
1085
62
      vec2.push_back(n);
1086
    }
1087
18
    retNode = nm->mkNode(REGEXP_UNION, vec2);
1088
  }
1089
64
  Trace("strings-lp") << "Strings::lp " << node << " => " << retNode
1090
32
                      << std::endl;
1091
32
  if (retNode != node)
1092
  {
1093
32
    return returnRewrite(node, retNode, Rewrite::RE_LOOP);
1094
  }
1095
  return node;
1096
}
1097
1098
4
Node SequencesRewriter::rewriteRepeatRegExp(TNode node)
1099
{
1100
4
  Assert(node.getKind() == REGEXP_REPEAT);
1101
4
  NodeManager* nm = NodeManager::currentNM();
1102
  // ((_ re.^ n) R) --> ((_ re.loop n n) R)
1103
4
  unsigned r = utils::getRepeatAmount(node);
1104
8
  Node lop = nm->mkConst(RegExpLoop(r, r));
1105
8
  Node retNode = nm->mkNode(REGEXP_LOOP, lop, node[0]);
1106
8
  return returnRewrite(node, retNode, Rewrite::RE_REPEAT_ELIM);
1107
}
1108
1109
31
Node SequencesRewriter::rewriteOptionRegExp(TNode node)
1110
{
1111
31
  Assert(node.getKind() == REGEXP_OPT);
1112
31
  NodeManager* nm = NodeManager::currentNM();
1113
  Node retNode =
1114
      nm->mkNode(REGEXP_UNION,
1115
62
                 nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))),
1116
124
                 node[0]);
1117
62
  return returnRewrite(node, retNode, Rewrite::RE_OPT_ELIM);
1118
}
1119
1120
50
Node SequencesRewriter::rewritePlusRegExp(TNode node)
1121
{
1122
50
  Assert(node.getKind() == REGEXP_PLUS);
1123
50
  NodeManager* nm = NodeManager::currentNM();
1124
  Node retNode =
1125
100
      nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0]));
1126
100
  return returnRewrite(node, retNode, Rewrite::RE_PLUS_ELIM);
1127
}
1128
1129
55
Node SequencesRewriter::rewriteDifferenceRegExp(TNode node)
1130
{
1131
55
  Assert(node.getKind() == REGEXP_DIFF);
1132
55
  NodeManager* nm = NodeManager::currentNM();
1133
  Node retNode =
1134
110
      nm->mkNode(REGEXP_INTER, node[0], nm->mkNode(REGEXP_COMPLEMENT, node[1]));
1135
110
  return returnRewrite(node, retNode, Rewrite::RE_DIFF_ELIM);
1136
}
1137
1138
406
Node SequencesRewriter::rewriteRangeRegExp(TNode node)
1139
{
1140
406
  Assert(node.getKind() == REGEXP_RANGE);
1141
  unsigned ch[2];
1142
1218
  for (size_t i = 0; i < 2; ++i)
1143
  {
1144
812
    if (!node[i].isConst() || node[i].getConst<String>().size() != 1)
1145
    {
1146
      // not applied to characters, it is not handled
1147
      return node;
1148
    }
1149
812
    ch[i] = node[i].getConst<String>().front();
1150
  }
1151
1152
406
  NodeManager* nm = NodeManager::currentNM();
1153
406
  if (node[0] == node[1])
1154
  {
1155
    Node retNode = nm->mkNode(STRING_TO_REGEXP, node[0]);
1156
    // re.range( "A", "A" ) ---> str.to_re( "A" )
1157
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_SINGLE);
1158
  }
1159
1160
406
  if (ch[0] > ch[1])
1161
  {
1162
    // re.range( "B", "A" ) ---> re.none
1163
4
    Node retNode = nm->mkNode(REGEXP_EMPTY);
1164
2
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_EMPTY);
1165
  }
1166
404
  return node;
1167
}
1168
1169
13554
Node SequencesRewriter::rewriteMembership(TNode node)
1170
{
1171
13554
  NodeManager* nm = NodeManager::currentNM();
1172
27108
  Node x = node[0];
1173
27108
  Node r = node[1];
1174
1175
27108
  TypeNode stype = x.getType();
1176
27108
  TypeNode rtype = r.getType();
1177
1178
13554
  if(r.getKind() == kind::REGEXP_EMPTY)
1179
  {
1180
124
    Node retNode = NodeManager::currentNM()->mkConst(false);
1181
62
    return returnRewrite(node, retNode, Rewrite::RE_IN_EMPTY);
1182
  }
1183
13492
  else if (x.isConst() && RegExpEntail::isConstRegExp(r))
1184
  {
1185
    // test whether x in node[1]
1186
4166
    cvc5::String s = x.getConst<String>();
1187
2083
    bool test = RegExpEntail::testConstStringInRegExp(s, 0, r);
1188
4166
    Node retNode = NodeManager::currentNM()->mkConst(test);
1189
2083
    return returnRewrite(node, retNode, Rewrite::RE_IN_EVAL);
1190
  }
1191
11409
  else if (r.getKind() == kind::REGEXP_SIGMA)
1192
  {
1193
134
    Node one = nm->mkConst(Rational(1));
1194
134
    Node retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x));
1195
67
    return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA);
1196
  }
1197
11342
  else if (r.getKind() == kind::REGEXP_STAR)
1198
  {
1199
3847
    if (x.isConst())
1200
    {
1201
40
      size_t xlen = Word::getLength(x);
1202
40
      if (xlen == 0)
1203
      {
1204
36
        Node retNode = nm->mkConst(true);
1205
        // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true
1206
18
        return returnRewrite(node, retNode, Rewrite::RE_EMPTY_IN_STR_STAR);
1207
      }
1208
22
      else if (xlen == 1)
1209
      {
1210
22
        if (r[0].getKind() == STRING_TO_REGEXP)
1211
        {
1212
4
          Node retNode = r[0][0].eqNode(x);
1213
          // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x
1214
2
          return returnRewrite(node, retNode, Rewrite::RE_CHAR_IN_STR_STAR);
1215
        }
1216
      }
1217
    }
1218
3807
    else if (x.getKind() == STRING_CONCAT)
1219
    {
1220
      // (str.in.re (str.++ x1 ... xn) (re.* R)) -->
1221
      //   (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R))
1222
      //     if the length of all strings in R is one.
1223
1630
      Node flr = RegExpEntail::getFixedLengthForRegexp(r[0]);
1224
1132
      if (!flr.isNull())
1225
      {
1226
710
        Node one = nm->mkConst(Rational(1));
1227
672
        if (flr == one)
1228
        {
1229
1268
          NodeBuilder nb(AND);
1230
2604
          for (const Node& xc : x)
1231
          {
1232
1970
            nb << nm->mkNode(STRING_IN_REGEXP, xc, r);
1233
          }
1234
          return returnRewrite(
1235
634
              node, nb.constructNode(), Rewrite::RE_IN_DIST_CHAR_STAR);
1236
        }
1237
      }
1238
    }
1239
3193
    if (r[0].getKind() == kind::REGEXP_SIGMA)
1240
    {
1241
202
      Node retNode = NodeManager::currentNM()->mkConst(true);
1242
101
      return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA_STAR);
1243
    }
1244
  }
1245
7495
  else if (r.getKind() == kind::REGEXP_CONCAT)
1246
  {
1247
5331
    bool allSigma = true;
1248
5331
    bool allSigmaStrict = true;
1249
5331
    unsigned allSigmaMinSize = 0;
1250
10498
    Node constStr;
1251
5331
    size_t constIdx = 0;
1252
5331
    size_t nchildren = r.getNumChildren();
1253
10926
    for (size_t i = 0; i < nchildren; i++)
1254
    {
1255
15820
      Node rc = r[i];
1256
10225
      Assert(rc.getKind() != kind::REGEXP_EMPTY);
1257
10225
      if (rc.getKind() == kind::REGEXP_SIGMA)
1258
      {
1259
960
        allSigmaMinSize++;
1260
      }
1261
9265
      else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA)
1262
      {
1263
2051
        allSigmaStrict = false;
1264
      }
1265
7214
      else if (rc.getKind() == STRING_TO_REGEXP)
1266
      {
1267
3518
        if (constStr.isNull())
1268
        {
1269
2584
          constStr = rc[0];
1270
2584
          constIdx = i;
1271
        }
1272
        else
1273
        {
1274
934
          allSigma = false;
1275
934
          break;
1276
        }
1277
      }
1278
      else
1279
      {
1280
3696
        allSigma = false;
1281
3696
        break;
1282
      }
1283
    }
1284
5331
    if (allSigma)
1285
    {
1286
701
      if (constStr.isNull())
1287
      {
1288
        // x in re.++(_*, _, _) ---> str.len(x) >= 2
1289
40
        Node num = nm->mkConst(Rational(allSigmaMinSize));
1290
40
        Node lenx = nm->mkNode(STRING_LENGTH, x);
1291
40
        Node retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num);
1292
20
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_PURE_ALLCHAR);
1293
      }
1294
681
      else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0
1295
144
               && constIdx != nchildren - 1)
1296
      {
1297
        // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc")
1298
288
        Node retNode = nm->mkNode(STRING_CONTAINS, x, constStr);
1299
144
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_TO_CONTAINS);
1300
      }
1301
    }
1302
  }
1303
4328
  else if (r.getKind() == kind::REGEXP_INTER
1304
2164
           || r.getKind() == kind::REGEXP_UNION)
1305
  {
1306
1058
    std::vector<Node> mvec;
1307
1736
    for (unsigned i = 0; i < r.getNumChildren(); i++)
1308
    {
1309
1207
      mvec.push_back(
1310
2414
          NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r[i]));
1311
    }
1312
    Node retNode = NodeManager::currentNM()->mkNode(
1313
1058
        r.getKind() == kind::REGEXP_INTER ? kind::AND : kind::OR, mvec);
1314
529
    return returnRewrite(node, retNode, Rewrite::RE_IN_ANDOR);
1315
  }
1316
1635
  else if (r.getKind() == kind::STRING_TO_REGEXP)
1317
  {
1318
1758
    Node retNode = x.eqNode(r[0]);
1319
879
    return returnRewrite(node, retNode, Rewrite::RE_IN_CSTRING);
1320
  }
1321
756
  else if (r.getKind() == REGEXP_RANGE)
1322
  {
1323
    // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j
1324
    // we do not do this if the arguments are not constant
1325
522
    if (RegExpEntail::isConstRegExp(r))
1326
    {
1327
1044
      Node xcode = nm->mkNode(STRING_TO_CODE, x);
1328
      Node retNode =
1329
          nm->mkNode(AND,
1330
1044
                     nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode),
1331
2088
                     nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1])));
1332
522
      return returnRewrite(node, retNode, Rewrite::RE_IN_RANGE);
1333
    }
1334
  }
1335
234
  else if (r.getKind() == REGEXP_COMPLEMENT)
1336
  {
1337
408
    Node retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate();
1338
204
    return returnRewrite(node, retNode, Rewrite::RE_IN_COMPLEMENT);
1339
  }
1340
1341
  // do simple consumes
1342
16578
  Node retNode = node;
1343
8289
  if (r.getKind() == kind::REGEXP_STAR)
1344
  {
1345
9118
    for (unsigned dir = 0; dir <= 1; dir++)
1346
    {
1347
12198
      std::vector<Node> mchildren;
1348
6172
      utils::getConcat(x, mchildren);
1349
6172
      bool success = true;
1350
18732
      while (success)
1351
      {
1352
6300
        success = false;
1353
12580
        std::vector<Node> children;
1354
6300
        utils::getConcat(r[0], children);
1355
12580
        Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children, dir);
1356
6300
        if (!scn.isNull())
1357
        {
1358
40
          Trace("regexp-ext-rewrite")
1359
20
              << "Regexp star : const conflict : " << node << std::endl;
1360
20
          return returnRewrite(node, scn, Rewrite::RE_CONSUME_S_CCONF);
1361
        }
1362
6280
        else if (children.empty())
1363
        {
1364
          // fully consumed one copy of the STAR
1365
136
          if (mchildren.empty())
1366
          {
1367
            Trace("regexp-ext-rewrite")
1368
                << "Regexp star : full consume : " << node << std::endl;
1369
            Node ret = NodeManager::currentNM()->mkConst(true);
1370
            return returnRewrite(node, ret, Rewrite::RE_CONSUME_S_FULL);
1371
          }
1372
          else
1373
          {
1374
272
            Node prev = retNode;
1375
408
            retNode = nm->mkNode(
1376
272
                STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r);
1377
            // Iterate again if the node changed. It may not have changed if
1378
            // nothing was consumed from mchildren (e.g. if the body of the
1379
            // re.* accepts the empty string.
1380
136
            success = (retNode != prev);
1381
          }
1382
        }
1383
      }
1384
6152
      if (retNode != node)
1385
      {
1386
252
        Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node
1387
126
                                    << " -> " << retNode << std::endl;
1388
126
        return returnRewrite(node, retNode, Rewrite::RE_CONSUME_S);
1389
      }
1390
    }
1391
  }
1392
  else
1393
  {
1394
9345
    std::vector<Node> children;
1395
5197
    utils::getConcat(r, children);
1396
9345
    std::vector<Node> mchildren;
1397
5197
    utils::getConcat(x, mchildren);
1398
5197
    unsigned prevSize = children.size() + mchildren.size();
1399
9345
    Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children);
1400
5197
    if (!scn.isNull())
1401
    {
1402
260
      Trace("regexp-ext-rewrite")
1403
130
          << "Regexp : const conflict : " << node << std::endl;
1404
130
      return returnRewrite(node, scn, Rewrite::RE_CONSUME_CCONF);
1405
    }
1406
5067
    else if ((children.size() + mchildren.size()) != prevSize)
1407
    {
1408
      // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm),
1409
      // above, we strip components to construct an equivalent membership:
1410
      // (str.++ xi .. xj) in (re.++ rk ... rl).
1411
1838
      Node xn = utils::mkConcat(mchildren, stype);
1412
1838
      Node emptyStr = Word::mkEmptyWord(stype);
1413
919
      if (children.empty())
1414
      {
1415
        // If we stripped all components on the right, then the left is
1416
        // equal to the empty string.
1417
        // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "")
1418
73
        retNode = xn.eqNode(emptyStr);
1419
      }
1420
      else
1421
      {
1422
        // otherwise, construct the updated regular expression
1423
846
        retNode =
1424
1692
            nm->mkNode(STRING_IN_REGEXP, xn, utils::mkConcat(children, rtype));
1425
      }
1426
1838
      Trace("regexp-ext-rewrite")
1427
919
          << "Regexp : rewrite : " << node << " -> " << retNode << std::endl;
1428
919
      return returnRewrite(node, retNode, Rewrite::RE_SIMPLE_CONSUME);
1429
    }
1430
  }
1431
7094
  return node;
1432
}
1433
1434
608497
RewriteResponse SequencesRewriter::postRewrite(TNode node)
1435
{
1436
1216994
  Trace("sequences-postrewrite")
1437
608497
      << "Strings::SequencesRewriter::postRewrite start " << node << std::endl;
1438
1216994
  Node retNode = node;
1439
608497
  Kind nk = node.getKind();
1440
608497
  if (nk == kind::STRING_CONCAT)
1441
  {
1442
70336
    retNode = rewriteConcat(node);
1443
  }
1444
538161
  else if (nk == kind::EQUAL)
1445
  {
1446
136952
    retNode = rewriteEquality(node);
1447
  }
1448
401209
  else if (nk == kind::STRING_LENGTH)
1449
  {
1450
253864
    retNode = rewriteLength(node);
1451
  }
1452
147345
  else if (nk == kind::STRING_CHARAT)
1453
  {
1454
111
    retNode = rewriteCharAt(node);
1455
  }
1456
147234
  else if (nk == kind::STRING_SUBSTR)
1457
  {
1458
58416
    retNode = rewriteSubstr(node);
1459
  }
1460
88818
  else if (nk == kind::STRING_UPDATE)
1461
  {
1462
174
    retNode = rewriteUpdate(node);
1463
  }
1464
88644
  else if (nk == kind::STRING_CONTAINS)
1465
  {
1466
26960
    retNode = rewriteContains(node);
1467
  }
1468
61684
  else if (nk == kind::STRING_INDEXOF)
1469
  {
1470
5547
    retNode = rewriteIndexof(node);
1471
  }
1472
56137
  else if (nk == kind::STRING_INDEXOF_RE)
1473
  {
1474
975
    retNode = rewriteIndexofRe(node);
1475
  }
1476
55162
  else if (nk == kind::STRING_REPLACE)
1477
  {
1478
4207
    retNode = rewriteReplace(node);
1479
  }
1480
50955
  else if (nk == kind::STRING_REPLACE_ALL)
1481
  {
1482
202
    retNode = rewriteReplaceAll(node);
1483
  }
1484
50753
  else if (nk == kind::STRING_REPLACE_RE)
1485
  {
1486
443
    retNode = rewriteReplaceRe(node);
1487
  }
1488
50310
  else if (nk == kind::STRING_REPLACE_RE_ALL)
1489
  {
1490
245
    retNode = rewriteReplaceReAll(node);
1491
  }
1492
50065
  else if (nk == STRING_REV)
1493
  {
1494
241
    retNode = rewriteStrReverse(node);
1495
  }
1496
49824
  else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX)
1497
  {
1498
185
    retNode = rewritePrefixSuffix(node);
1499
  }
1500
49639
  else if (nk == kind::STRING_IN_REGEXP)
1501
  {
1502
13554
    retNode = rewriteMembership(node);
1503
  }
1504
36085
  else if (nk == REGEXP_CONCAT)
1505
  {
1506
2964
    retNode = rewriteConcatRegExp(node);
1507
  }
1508
33121
  else if (nk == REGEXP_UNION || nk == REGEXP_INTER)
1509
  {
1510
1108
    retNode = rewriteAndOrRegExp(node);
1511
  }
1512
32013
  else if (nk == REGEXP_DIFF)
1513
  {
1514
55
    retNode = rewriteDifferenceRegExp(node);
1515
  }
1516
31958
  else if (nk == REGEXP_STAR)
1517
  {
1518
1180
    retNode = rewriteStarRegExp(node);
1519
  }
1520
30778
  else if (nk == REGEXP_PLUS)
1521
  {
1522
50
    retNode = rewritePlusRegExp(node);
1523
  }
1524
30728
  else if (nk == REGEXP_OPT)
1525
  {
1526
31
    retNode = rewriteOptionRegExp(node);
1527
  }
1528
30697
  else if (nk == REGEXP_RANGE)
1529
  {
1530
406
    retNode = rewriteRangeRegExp(node);
1531
  }
1532
30291
  else if (nk == REGEXP_LOOP)
1533
  {
1534
32
    retNode = rewriteLoopRegExp(node);
1535
  }
1536
30259
  else if (nk == REGEXP_REPEAT)
1537
  {
1538
4
    retNode = rewriteRepeatRegExp(node);
1539
  }
1540
30255
  else if (nk == SEQ_UNIT)
1541
  {
1542
393
    retNode = rewriteSeqUnit(node);
1543
  }
1544
29862
  else if (nk == SEQ_NTH || nk == SEQ_NTH_TOTAL)
1545
  {
1546
386
    retNode = rewriteSeqNth(node);
1547
  }
1548
1549
1216994
  Trace("sequences-postrewrite")
1550
608497
      << "Strings::SequencesRewriter::postRewrite returning " << retNode
1551
608497
      << std::endl;
1552
608497
  if (node != retNode)
1553
  {
1554
    // also post process the rewrite, which may apply extended rewriting to
1555
    // equalities, if we rewrite to an equality from a non-equality
1556
103631
    retNode = postProcessRewrite(node, retNode);
1557
207262
    Trace("strings-rewrite-debug") << "Strings::SequencesRewriter::postRewrite "
1558
103631
                                   << node << " to " << retNode << std::endl;
1559
103631
    return RewriteResponse(REWRITE_AGAIN_FULL, retNode);
1560
  }
1561
504866
  Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
1562
504866
  return RewriteResponse(REWRITE_DONE, retNode);
1563
}
1564
1565
373763
RewriteResponse SequencesRewriter::preRewrite(TNode node)
1566
{
1567
373763
  return RewriteResponse(REWRITE_DONE, node);
1568
}
1569
1570
717
TrustNode SequencesRewriter::expandDefinition(Node node)
1571
{
1572
1434
  Trace("strings-exp-def") << "SequencesRewriter::expandDefinition : " << node
1573
717
                           << std::endl;
1574
1575
717
  if (node.getKind() == kind::SEQ_NTH)
1576
  {
1577
2
    NodeManager* nm = NodeManager::currentNM();
1578
4
    Node s = node[0];
1579
4
    Node n = node[1];
1580
    // seq.nth(s, n) --> ite(0 <= n < len(s), seq.nth_total(s,n), Uf(s, n))
1581
    Node cond = nm->mkNode(AND,
1582
4
                           nm->mkNode(LEQ, nm->mkConst(Rational(0)), n),
1583
8
                           nm->mkNode(LT, n, nm->mkNode(STRING_LENGTH, s)));
1584
4
    Node ss = nm->mkNode(SEQ_NTH_TOTAL, s, n);
1585
4
    Node uf = SkolemCache::mkSkolemSeqNth(s.getType(), "Uf");
1586
4
    Node u = nm->mkNode(APPLY_UF, uf, s, n);
1587
4
    Node ret = nm->mkNode(ITE, cond, ss, u);
1588
2
    Trace("strings-exp-def") << "...return " << ret << std::endl;
1589
2
    return TrustNode::mkTrustRewrite(node, ret, nullptr);
1590
  }
1591
715
  return TrustNode::null();
1592
}
1593
1594
386
Node SequencesRewriter::rewriteSeqNth(Node node)
1595
{
1596
386
  Assert(node.getKind() == SEQ_NTH || node.getKind() == SEQ_NTH_TOTAL);
1597
772
  Node s = node[0];
1598
772
  Node i = node[1];
1599
386
  if (s.isConst() && i.isConst())
1600
  {
1601
26
    size_t len = Word::getLength(s);
1602
26
    if (i.getConst<Rational>().sgn() != -1)
1603
    {
1604
26
      size_t pos = i.getConst<Rational>().getNumerator().toUnsignedInt();
1605
26
      if (pos < len)
1606
      {
1607
16
        std::vector<Node> elements = s.getConst<Sequence>().getVec();
1608
8
        const Node& ret = elements[pos];
1609
8
        return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL);
1610
      }
1611
    }
1612
18
    if (node.getKind() == SEQ_NTH_TOTAL)
1613
    {
1614
      // return arbitrary term
1615
4
      Node ret = s.getType().getSequenceElementType().mkGroundValue();
1616
2
      return returnRewrite(node, ret, Rewrite::SEQ_NTH_TOTAL_OOB);
1617
    }
1618
    else
1619
    {
1620
16
      return node;
1621
    }
1622
  }
1623
  else
1624
  {
1625
360
    return node;
1626
  }
1627
}
1628
1629
111
Node SequencesRewriter::rewriteCharAt(Node node)
1630
{
1631
111
  Assert(node.getKind() == STRING_CHARAT);
1632
111
  NodeManager* nm = NodeManager::currentNM();
1633
222
  Node one = nm->mkConst(Rational(1));
1634
222
  Node retNode = nm->mkNode(STRING_SUBSTR, node[0], node[1], one);
1635
222
  return returnRewrite(node, retNode, Rewrite::CHARAT_ELIM);
1636
}
1637
1638
58428
Node SequencesRewriter::rewriteSubstr(Node node)
1639
{
1640
58428
  Assert(node.getKind() == kind::STRING_SUBSTR);
1641
1642
58428
  NodeManager* nm = NodeManager::currentNM();
1643
58428
  if (node[0].isConst())
1644
  {
1645
11589
    if (Word::isEmpty(node[0]))
1646
    {
1647
2082
      Node ret = node[0];
1648
1041
      return returnRewrite(node, ret, Rewrite::SS_EMPTYSTR);
1649
    }
1650
    // rewriting for constant arguments
1651
10548
    if (node[1].isConst() && node[2].isConst())
1652
    {
1653
10614
      Node s = node[0];
1654
10614
      cvc5::Rational rMaxInt(String::maxSize());
1655
      uint32_t start;
1656
5307
      if (node[1].getConst<Rational>() > rMaxInt)
1657
      {
1658
        // start beyond the maximum size of strings
1659
        // thus, it must be beyond the end point of this string
1660
8
        Node ret = Word::mkEmptyWord(node.getType());
1661
4
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_MAX_OOB);
1662
      }
1663
5303
      else if (node[1].getConst<Rational>().sgn() < 0)
1664
      {
1665
        // start before the beginning of the string
1666
1050
        Node ret = Word::mkEmptyWord(node.getType());
1667
525
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_NEG);
1668
      }
1669
      else
1670
      {
1671
4778
        start = node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1672
4778
        if (start >= Word::getLength(node[0]))
1673
        {
1674
          // start beyond the end of the string
1675
782
          Node ret = Word::mkEmptyWord(node.getType());
1676
391
          return returnRewrite(node, ret, Rewrite::SS_CONST_START_OOB);
1677
        }
1678
      }
1679
4387
      if (node[2].getConst<Rational>() > rMaxInt)
1680
      {
1681
        // take up to the end of the string
1682
1
        size_t lenS = Word::getLength(s);
1683
2
        Node ret = Word::suffix(s, lenS - start);
1684
1
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_MAX_OOB);
1685
      }
1686
4386
      else if (node[2].getConst<Rational>().sgn() <= 0)
1687
      {
1688
732
        Node ret = Word::mkEmptyWord(node.getType());
1689
366
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_NON_POS);
1690
      }
1691
      else
1692
      {
1693
        uint32_t len =
1694
4020
            node[2].getConst<Rational>().getNumerator().toUnsignedInt();
1695
4020
        if (start + len > Word::getLength(node[0]))
1696
        {
1697
          // take up to the end of the string
1698
338
          size_t lenS = Word::getLength(s);
1699
676
          Node ret = Word::suffix(s, lenS - start);
1700
338
          return returnRewrite(node, ret, Rewrite::SS_CONST_END_OOB);
1701
        }
1702
        else
1703
        {
1704
          // compute the substr using the constant string
1705
7364
          Node ret = Word::substr(s, start, len);
1706
3682
          return returnRewrite(node, ret, Rewrite::SS_CONST_SS);
1707
        }
1708
      }
1709
    }
1710
  }
1711
104160
  Node zero = nm->mkConst(cvc5::Rational(0));
1712
1713
  // if entailed non-positive length or negative start point
1714
52080
  if (ArithEntail::check(zero, node[1], true))
1715
  {
1716
64
    Node ret = Word::mkEmptyWord(node.getType());
1717
32
    return returnRewrite(node, ret, Rewrite::SS_START_NEG);
1718
  }
1719
52048
  else if (ArithEntail::check(zero, node[2]))
1720
  {
1721
1760
    Node ret = Word::mkEmptyWord(node.getType());
1722
880
    return returnRewrite(node, ret, Rewrite::SS_LEN_NON_POS);
1723
  }
1724
1725
51168
  if (node[0].getKind() == STRING_SUBSTR)
1726
  {
1727
    // (str.substr (str.substr x a b) c d) ---> "" if c >= b
1728
    //
1729
    // Note that this rewrite can be generalized to:
1730
    //
1731
    // (str.substr x a b) ---> "" if a >= (str.len x)
1732
    //
1733
    // This can be done when we generalize our entailment methods to
1734
    // accept an optional context. Then we could conjecture that
1735
    // (str.substr x a b) rewrites to "" and do a case analysis:
1736
    //
1737
    // - a < 0 or b < 0 (the result is trivially empty in these cases)
1738
    // - a >= (str.len x) assuming that { a >= 0, b >= 0 }
1739
    //
1740
    // For example, for (str.substr (str.substr x a a) a a), we could
1741
    // then deduce that under those assumptions, "a" is an
1742
    // over-approximation of the length of (str.substr x a a), which
1743
    // then allows us to reason that the result of the whole term must
1744
    // be empty.
1745
5013
    if (ArithEntail::check(node[1], node[0][2]))
1746
    {
1747
86
      Node ret = Word::mkEmptyWord(node.getType());
1748
43
      return returnRewrite(node, ret, Rewrite::SS_START_GEQ_LEN);
1749
    }
1750
  }
1751
46155
  else if (node[0].getKind() == STRING_REPLACE)
1752
  {
1753
    // (str.substr (str.replace x y z) 0 n)
1754
    // 	 ---> (str.replace (str.substr x 0 n) y z)
1755
    // if (str.len y) = 1 and (str.len z) = 1
1756
576
    if (node[1] == zero)
1757
    {
1758
954
      if (StringsEntail::checkLengthOne(node[0][1], true)
1759
954
          && StringsEntail::checkLengthOne(node[0][2], true))
1760
      {
1761
        Node ret = nm->mkNode(
1762
            kind::STRING_REPLACE,
1763
8
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]),
1764
            node[0][1],
1765
16
            node[0][2]);
1766
4
        return returnRewrite(node, ret, Rewrite::SUBSTR_REPL_SWAP);
1767
      }
1768
    }
1769
  }
1770
1771
102242
  std::vector<Node> n1;
1772
51121
  utils::getConcat(node[0], n1);
1773
102242
  TypeNode stype = node.getType();
1774
1775
  // definite inclusion
1776
51121
  if (node[1] == zero)
1777
  {
1778
35027
    Node curr = node[2];
1779
35027
    std::vector<Node> childrenr;
1780
18196
    if (StringsEntail::stripSymbolicLength(n1, childrenr, 1, curr))
1781
    {
1782
1365
      if (curr != zero && !n1.empty())
1783
      {
1784
1632
        childrenr.push_back(nm->mkNode(
1785
1088
            kind::STRING_SUBSTR, utils::mkConcat(n1, stype), node[1], curr));
1786
      }
1787
2730
      Node ret = utils::mkConcat(childrenr, stype);
1788
1365
      return returnRewrite(node, ret, Rewrite::SS_LEN_INCLUDE);
1789
    }
1790
  }
1791
1792
  // symbolic length analysis
1793
141105
  for (unsigned r = 0; r < 2; r++)
1794
  {
1795
    // the amount of characters we can strip
1796
187094
    Node curr;
1797
95745
    if (r == 0)
1798
    {
1799
49756
      if (node[1] != zero)
1800
      {
1801
        // strip up to start point off the start of the string
1802
32925
        curr = node[1];
1803
      }
1804
    }
1805
45989
    else if (r == 1)
1806
    {
1807
      Node tot_len =
1808
91814
          Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, node[0]));
1809
91814
      Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2]));
1810
45989
      if (node[2] != tot_len)
1811
      {
1812
43999
        if (ArithEntail::check(node[2], tot_len))
1813
        {
1814
          // end point beyond end point of string, map to tot_len
1815
270
          Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len);
1816
135
          return returnRewrite(node, ret, Rewrite::SS_END_PT_NORM);
1817
        }
1818
        else
1819
        {
1820
          // strip up to ( str.len(node[0]) - end_pt ) off the end of the string
1821
43864
          curr = Rewriter::rewrite(nm->mkNode(kind::MINUS, tot_len, end_pt));
1822
        }
1823
      }
1824
1825
      // (str.substr s x y) --> "" if x < len(s) |= 0 >= y
1826
      Node n1_lt_tot_len =
1827
91679
          Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len));
1828
45854
      if (ArithEntail::checkWithAssumption(n1_lt_tot_len, zero, node[2], false))
1829
      {
1830
40
        Node ret = Word::mkEmptyWord(node.getType());
1831
20
        return returnRewrite(node, ret, Rewrite::SS_START_ENTAILS_ZERO_LEN);
1832
      }
1833
1834
      // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s)
1835
      Node non_zero_len =
1836
91659
          Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2]));
1837
45834
      if (ArithEntail::checkWithAssumption(
1838
              non_zero_len, node[1], tot_len, false))
1839
      {
1840
4
        Node ret = Word::mkEmptyWord(node.getType());
1841
2
        return returnRewrite(node, ret, Rewrite::SS_NON_ZERO_LEN_ENTAILS_OOB);
1842
      }
1843
1844
      // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s)
1845
      Node geq_zero_start =
1846
91657
          Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero));
1847
45832
      if (ArithEntail::checkWithAssumption(
1848
              geq_zero_start, zero, tot_len, false))
1849
      {
1850
4
        Node ret = Word::mkEmptyWord(node.getType());
1851
        return returnRewrite(
1852
2
            node, ret, Rewrite::SS_GEQ_ZERO_START_ENTAILS_EMP_S);
1853
      }
1854
1855
      // (str.substr s x x) ---> "" if (str.len s) <= 1
1856
45830
      if (node[1] == node[2] && StringsEntail::checkLengthOne(node[0]))
1857
      {
1858
10
        Node ret = Word::mkEmptyWord(node.getType());
1859
5
        return returnRewrite(node, ret, Rewrite::SS_LEN_ONE_Z_Z);
1860
      }
1861
    }
1862
95581
    if (!curr.isNull())
1863
    {
1864
      // strip off components while quantity is entailed positive
1865
76760
      int dir = r == 0 ? 1 : -1;
1866
149288
      std::vector<Node> childrenr;
1867
76760
      if (StringsEntail::stripSymbolicLength(n1, childrenr, dir, curr))
1868
      {
1869
4232
        if (r == 0)
1870
        {
1871
          Node ret = nm->mkNode(
1872
7534
              kind::STRING_SUBSTR, utils::mkConcat(n1, stype), curr, node[2]);
1873
3767
          return returnRewrite(node, ret, Rewrite::SS_STRIP_START_PT);
1874
        }
1875
        else
1876
        {
1877
          Node ret = nm->mkNode(kind::STRING_SUBSTR,
1878
930
                                utils::mkConcat(n1, stype),
1879
                                node[1],
1880
1860
                                node[2]);
1881
465
          return returnRewrite(node, ret, Rewrite::SS_STRIP_END_PT);
1882
        }
1883
      }
1884
    }
1885
  }
1886
  // combine substr
1887
45360
  if (node[0].getKind() == kind::STRING_SUBSTR)
1888
  {
1889
8738
    Node start_inner = node[0][1];
1890
8738
    Node start_outer = node[1];
1891
4758
    if (ArithEntail::check(start_outer) && ArithEntail::check(start_inner))
1892
    {
1893
      // both are positive
1894
      // thus, start point is definitely start_inner+start_outer.
1895
      // We can rewrite if it for certain what the length is
1896
1897
      // the length of a string from the inner substr subtracts the start point
1898
      // of the outer substr
1899
      Node len_from_inner =
1900
5694
          Rewriter::rewrite(nm->mkNode(kind::MINUS, node[0][2], start_outer));
1901
5694
      Node len_from_outer = node[2];
1902
5694
      Node new_len;
1903
      // take quantity that is for sure smaller than the other
1904
3236
      if (len_from_inner == len_from_outer)
1905
      {
1906
17
        new_len = len_from_inner;
1907
      }
1908
3219
      else if (ArithEntail::check(len_from_inner, len_from_outer))
1909
      {
1910
571
        new_len = len_from_outer;
1911
      }
1912
2648
      else if (ArithEntail::check(len_from_outer, len_from_inner))
1913
      {
1914
190
        new_len = len_from_inner;
1915
      }
1916
3236
      if (!new_len.isNull())
1917
      {
1918
1556
        Node new_start = nm->mkNode(kind::PLUS, start_inner, start_outer);
1919
        Node ret =
1920
1556
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], new_start, new_len);
1921
778
        return returnRewrite(node, ret, Rewrite::SS_COMBINE);
1922
      }
1923
    }
1924
  }
1925
44582
  return node;
1926
}
1927
1928
174
Node SequencesRewriter::rewriteUpdate(Node node)
1929
{
1930
174
  Assert(node.getKind() == kind::STRING_UPDATE);
1931
348
  Node s = node[0];
1932
174
  if (s.isConst())
1933
  {
1934
162
    if (Word::isEmpty(s))
1935
    {
1936
      return returnRewrite(node, s, Rewrite::UPD_EMPTYSTR);
1937
    }
1938
    // rewriting for constant arguments
1939
162
    if (node[1].isConst())
1940
    {
1941
142
      cvc5::Rational rMaxInt(String::maxSize());
1942
122
      if (node[1].getConst<Rational>() > rMaxInt)
1943
      {
1944
        // start beyond the maximum size of strings
1945
        // thus, it must be beyond the end point of this string
1946
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_MAX_OOB);
1947
      }
1948
122
      else if (node[1].getConst<Rational>().sgn() < 0)
1949
      {
1950
        // start before the beginning of the string
1951
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_NEG);
1952
      }
1953
      uint32_t start =
1954
122
          node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1955
122
      size_t len = Word::getLength(s);
1956
122
      if (start >= len)
1957
      {
1958
        // start beyond the end of the string
1959
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_OOB);
1960
      }
1961
122
      if (node[2].isConst())
1962
      {
1963
204
        Node ret = Word::update(s, start, node[2]);
1964
102
        return returnRewrite(node, ret, Rewrite::UPD_EVAL);
1965
      }
1966
    }
1967
  }
1968
1969
72
  return node;
1970
}
1971
1972
157132
Node SequencesRewriter::rewriteContains(Node node)
1973
{
1974
157132
  Assert(node.getKind() == kind::STRING_CONTAINS);
1975
157132
  NodeManager* nm = NodeManager::currentNM();
1976
1977
157132
  if (node[0] == node[1])
1978
  {
1979
1364
    Node ret = NodeManager::currentNM()->mkConst(true);
1980
682
    return returnRewrite(node, ret, Rewrite::CTN_EQ);
1981
  }
1982
156450
  if (node[0].isConst())
1983
  {
1984
79279
    if (node[1].isConst())
1985
    {
1986
10088
      Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos);
1987
5044
      return returnRewrite(node, ret, Rewrite::CTN_CONST);
1988
    }
1989
    else
1990
    {
1991
135674
      Node t = node[1];
1992
74235
      if (Word::isEmpty(node[0]))
1993
      {
1994
        Node len1 =
1995
71158
            NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
1996
35698
        if (ArithEntail::check(len1, true))
1997
        {
1998
          // we handle the false case here since the rewrite for equality
1999
          // uses this function, hence we want to conclude false if possible.
2000
          // len(x)>0 => contains( "", x ) ---> false
2001
476
          Node ret = NodeManager::currentNM()->mkConst(false);
2002
238
          return returnRewrite(node, ret, Rewrite::CTN_LHS_EMPTYSTR);
2003
        }
2004
      }
2005
38537
      else if (StringsEntail::checkLengthOne(t))
2006
      {
2007
25062
        std::vector<Node> vec = Word::getChars(node[0]);
2008
25062
        Node emp = Word::mkEmptyWord(t.getType());
2009
25062
        NodeBuilder nb(OR);
2010
12531
        nb << emp.eqNode(t);
2011
26428
        for (const Node& c : vec)
2012
        {
2013
13897
          Assert(c.getType() == t.getType());
2014
13897
          nb << c.eqNode(t);
2015
        }
2016
2017
        // str.contains("ABCabc", t) --->
2018
        // t = "" v t = "A" v t = "B" v t = "C" v t = "a" v t = "b" v t = "c"
2019
        // if len(t) <= 1
2020
25062
        Node ret = nb;
2021
12531
        return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2022
      }
2023
26006
      else if (node[1].getKind() == kind::STRING_CONCAT)
2024
      {
2025
        int firstc, lastc;
2026
735
        if (!StringsEntail::canConstantContainConcat(
2027
                node[0], node[1], firstc, lastc))
2028
        {
2029
54
          Node ret = NodeManager::currentNM()->mkConst(false);
2030
27
          return returnRewrite(node, ret, Rewrite::CTN_NCONST_CTN_CONCAT);
2031
        }
2032
      }
2033
    }
2034
  }
2035
138610
  if (node[1].isConst())
2036
  {
2037
70612
    size_t len = Word::getLength(node[1]);
2038
70612
    if (len == 0)
2039
    {
2040
      // contains( x, "" ) ---> true
2041
70800
      Node ret = NodeManager::currentNM()->mkConst(true);
2042
35400
      return returnRewrite(node, ret, Rewrite::CTN_RHS_EMPTYSTR);
2043
    }
2044
35212
    else if (len == 1)
2045
    {
2046
      // The following rewrites are specific to a single character second
2047
      // argument of contains, where we can reason that this character is
2048
      // not split over multiple components in the first argument.
2049
25308
      if (node[0].getKind() == STRING_CONCAT)
2050
      {
2051
1864
        std::vector<Node> nc1;
2052
932
        utils::getConcat(node[0], nc1);
2053
1864
        NodeBuilder nb(OR);
2054
3381
        for (const Node& ncc : nc1)
2055
        {
2056
2449
          nb << nm->mkNode(STRING_CONTAINS, ncc, node[1]);
2057
        }
2058
1864
        Node ret = nb.constructNode();
2059
        // str.contains( x ++ y, "A" ) --->
2060
        //   str.contains( x, "A" ) OR str.contains( y, "A" )
2061
932
        return returnRewrite(node, ret, Rewrite::CTN_CONCAT_CHAR);
2062
      }
2063
24376
      else if (node[0].getKind() == STRING_REPLACE)
2064
      {
2065
3031
        Node rplDomain = d_stringsEntail.checkContains(node[0][1], node[1]);
2066
1589
        if (!rplDomain.isNull() && !rplDomain.getConst<bool>())
2067
        {
2068
294
          Node d1 = nm->mkNode(STRING_CONTAINS, node[0][0], node[1]);
2069
          Node d2 =
2070
              nm->mkNode(AND,
2071
294
                         nm->mkNode(STRING_CONTAINS, node[0][0], node[0][1]),
2072
588
                         nm->mkNode(STRING_CONTAINS, node[0][2], node[1]));
2073
294
          Node ret = nm->mkNode(OR, d1, d2);
2074
          // If str.contains( y, "A" ) ---> false, then:
2075
          // str.contains( str.replace( x, y, z ), "A" ) --->
2076
          //   str.contains( x, "A" ) OR
2077
          //   ( str.contains( x, y ) AND str.contains( z, "A" ) )
2078
147
          return returnRewrite(node, ret, Rewrite::CTN_REPL_CHAR);
2079
        }
2080
      }
2081
    }
2082
  }
2083
204262
  std::vector<Node> nc1;
2084
102131
  utils::getConcat(node[0], nc1);
2085
204262
  std::vector<Node> nc2;
2086
102131
  utils::getConcat(node[1], nc2);
2087
2088
  // component-wise containment
2089
204262
  std::vector<Node> nc1rb;
2090
204262
  std::vector<Node> nc1re;
2091
102131
  if (d_stringsEntail.componentContains(nc1, nc2, nc1rb, nc1re) != -1)
2092
  {
2093
5490
    Node ret = NodeManager::currentNM()->mkConst(true);
2094
2745
    return returnRewrite(node, ret, Rewrite::CTN_COMPONENT);
2095
  }
2096
198772
  TypeNode stype = node[0].getType();
2097
2098
  // strip endpoints
2099
198772
  std::vector<Node> nb;
2100
198772
  std::vector<Node> ne;
2101
99386
  if (StringsEntail::stripConstantEndpoints(nc1, nc2, nb, ne))
2102
  {
2103
    Node ret = NodeManager::currentNM()->mkNode(
2104
326
        kind::STRING_CONTAINS, utils::mkConcat(nc1, stype), node[1]);
2105
163
    return returnRewrite(node, ret, Rewrite::CTN_STRIP_ENDPT);
2106
  }
2107
2108
200881
  for (const Node& n : nc2)
2109
  {
2110
102535
    if (n.getKind() == kind::STRING_REPLACE)
2111
    {
2112
      // (str.contains x (str.replace y z w)) --> false
2113
      // if (str.contains x y) = false and (str.contains x w) = false
2114
      //
2115
      // Reasoning: (str.contains x y) checks that x does not contain y if the
2116
      // replacement does not change y. (str.contains x w) checks that if the
2117
      // replacement changes anything in y, the w makes it impossible for it to
2118
      // occur in x.
2119
6705
      Node ctnConst = d_stringsEntail.checkContains(node[0], n[0]);
2120
3791
      if (!ctnConst.isNull() && !ctnConst.getConst<bool>())
2121
      {
2122
1402
        Node ctnConst2 = d_stringsEntail.checkContains(node[0], n[2]);
2123
727
        if (!ctnConst2.isNull() && !ctnConst2.getConst<bool>())
2124
        {
2125
104
          Node res = nm->mkConst(false);
2126
52
          return returnRewrite(node, res, Rewrite::CTN_RPL_NON_CTN);
2127
        }
2128
      }
2129
2130
      // (str.contains x (str.++ w (str.replace x y x) z)) --->
2131
      //   (and (= w "") (= x (str.replace x y x)) (= z ""))
2132
      //
2133
      // TODO: Remove with under-/over-approximation
2134
3739
      if (node[0] == n[0] && node[0] == n[2])
2135
      {
2136
1650
        Node ret;
2137
825
        if (nc2.size() > 1)
2138
        {
2139
68
          Node emp = Word::mkEmptyWord(stype);
2140
68
          NodeBuilder nb2(kind::AND);
2141
102
          for (const Node& n2 : nc2)
2142
          {
2143
68
            if (n2 == n)
2144
            {
2145
34
              nb2 << nm->mkNode(kind::EQUAL, node[0], node[1]);
2146
            }
2147
            else
2148
            {
2149
34
              nb2 << nm->mkNode(kind::EQUAL, emp, n2);
2150
            }
2151
          }
2152
34
          ret = nb2.constructNode();
2153
        }
2154
        else
2155
        {
2156
791
          ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2157
        }
2158
825
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SELF);
2159
      }
2160
    }
2161
  }
2162
2163
  // length entailment
2164
196692
  Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2165
196692
  Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2166
98346
  if (ArithEntail::check(len_n2, len_n1, true))
2167
  {
2168
    // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false
2169
346
    Node ret = NodeManager::currentNM()->mkConst(false);
2170
173
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ);
2171
  }
2172
2173
  // multi-set reasoning
2174
  //   For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false
2175
  //   since the number of a's in the second argument is greater than the number
2176
  //   of a's in the first argument
2177
98173
  if (StringsEntail::checkMultisetSubset(node[0], node[1]))
2178
  {
2179
40
    Node ret = nm->mkConst(false);
2180
20
    return returnRewrite(node, ret, Rewrite::CTN_MSET_NSS);
2181
  }
2182
2183
98153
  if (ArithEntail::check(len_n2, len_n1, false))
2184
  {
2185
    // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2
2186
100940
    Node ret = node[0].eqNode(node[1]);
2187
50470
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ_NSTRICT);
2188
  }
2189
2190
  // splitting
2191
47683
  if (node[0].getKind() == kind::STRING_CONCAT)
2192
  {
2193
2046
    if (node[1].isConst())
2194
    {
2195
1025
      Node t = node[1];
2196
      // Below, we are looking for a constant component of node[0]
2197
      // has no overlap with node[1], which means we can split.
2198
      // Notice that if the first or last components had no
2199
      // overlap, these would have been removed by strip
2200
      // constant endpoints above.
2201
      // Hence, we consider only the inner children.
2202
1199
      for (unsigned i = 1; i < (node[0].getNumChildren() - 1); i++)
2203
      {
2204
        // constant contains
2205
698
        if (node[0][i].isConst())
2206
        {
2207
          // if no overlap, we can split into disjunction
2208
29
          if (Word::noOverlapWith(node[0][i], node[1]))
2209
          {
2210
46
            std::vector<Node> nc0;
2211
23
            utils::getConcat(node[0], nc0);
2212
46
            std::vector<Node> spl[2];
2213
23
            spl[0].insert(spl[0].end(), nc0.begin(), nc0.begin() + i);
2214
23
            Assert(i < nc0.size() - 1);
2215
23
            spl[1].insert(spl[1].end(), nc0.begin() + i + 1, nc0.end());
2216
            Node ret = NodeManager::currentNM()->mkNode(
2217
                kind::OR,
2218
92
                NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS,
2219
46
                                                 utils::mkConcat(spl[0], stype),
2220
                                                 node[1]),
2221
92
                NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS,
2222
46
                                                 utils::mkConcat(spl[1], stype),
2223
115
                                                 node[1]));
2224
23
            return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2225
          }
2226
        }
2227
      }
2228
    }
2229
  }
2230
45637
  else if (node[0].getKind() == kind::STRING_SUBSTR)
2231
  {
2232
    // (str.contains (str.substr x n (str.len y)) y) --->
2233
    //   (= (str.substr x n (str.len y)) y)
2234
    //
2235
    // TODO: Remove with under-/over-approximation
2236
12719
    if (node[0][2] == nm->mkNode(kind::STRING_LENGTH, node[1]))
2237
    {
2238
32
      Node ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2239
16
      return returnRewrite(node, ret, Rewrite::CTN_SUBSTR);
2240
    }
2241
  }
2242
32918
  else if (node[0].getKind() == kind::STRING_REPLACE)
2243
  {
2244
926
    if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst())
2245
    {
2246
448
      if (Word::noOverlapWith(node[1], node[0][1])
2247
448
          && Word::noOverlapWith(node[1], node[0][2]))
2248
      {
2249
        // (str.contains (str.replace x c1 c2) c3) ---> (str.contains x c3)
2250
        // if there is no overlap between c1 and c3 and none between c2 and c3
2251
4
        Node ret = nm->mkNode(STRING_CONTAINS, node[0][0], node[1]);
2252
2
        return returnRewrite(node, ret, Rewrite::CTN_REPL_CNSTS_TO_CTN);
2253
      }
2254
    }
2255
2256
924
    if (node[0][0] == node[0][2])
2257
    {
2258
      // (str.contains (str.replace x y x) y) ---> (str.contains x y)
2259
102
      if (node[0][1] == node[1])
2260
      {
2261
16
        Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0][0], node[1]);
2262
8
        return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN);
2263
      }
2264
2265
      // (str.contains (str.replace x y x) z) ---> (str.contains x z)
2266
      // if (str.len z) <= 1
2267
94
      if (StringsEntail::checkLengthOne(node[1]))
2268
      {
2269
12
        Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0][0], node[1]);
2270
6
        return returnRewrite(node, ret, Rewrite::CTN_REPL_LEN_ONE_TO_CTN);
2271
      }
2272
    }
2273
2274
    // (str.contains (str.replace x y z) z) --->
2275
    //   (or (str.contains x y) (str.contains x z))
2276
910
    if (node[0][2] == node[1])
2277
    {
2278
      Node ret =
2279
          nm->mkNode(OR,
2280
364
                     nm->mkNode(STRING_CONTAINS, node[0][0], node[0][1]),
2281
728
                     nm->mkNode(STRING_CONTAINS, node[0][0], node[0][2]));
2282
182
      return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN_DISJ);
2283
    }
2284
2285
    // (str.contains (str.replace x y z) w) --->
2286
    //   (str.contains (str.replace x y "") w)
2287
    // if (str.contains z w) ---> false and (str.len w) = 1
2288
728
    if (StringsEntail::checkLengthOne(node[1]))
2289
    {
2290
426
      Node ctn = d_stringsEntail.checkContains(node[0][2], node[1]);
2291
236
      if (!ctn.isNull() && !ctn.getConst<bool>())
2292
      {
2293
92
        Node empty = Word::mkEmptyWord(stype);
2294
        Node ret = nm->mkNode(
2295
            kind::STRING_CONTAINS,
2296
92
            nm->mkNode(kind::STRING_REPLACE, node[0][0], node[0][1], empty),
2297
184
            node[1]);
2298
46
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SIMP_REPL);
2299
      }
2300
    }
2301
  }
2302
2303
47400
  if (node[1].getKind() == kind::STRING_REPLACE)
2304
  {
2305
    // (str.contains x (str.replace y x y)) --->
2306
    //   (str.contains x y)
2307
1090
    if (node[0] == node[1][1] && node[1][0] == node[1][2])
2308
    {
2309
8
      Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0], node[1][0]);
2310
4
      return returnRewrite(node, ret, Rewrite::CTN_REPL);
2311
    }
2312
2313
    // (str.contains x (str.replace "" x y)) --->
2314
    //   (= "" (str.replace "" x y))
2315
    //
2316
    // Note: Length-based reasoning is not sufficient to get this rewrite. We
2317
    // can neither show that str.len(str.replace("", x, y)) - str.len(x) >= 0
2318
    // nor str.len(x) - str.len(str.replace("", x, y)) >= 0
2319
2170
    Node emp = Word::mkEmptyWord(stype);
2320
1086
    if (node[0] == node[1][1] && node[1][0] == emp)
2321
    {
2322
4
      Node ret = nm->mkNode(kind::EQUAL, emp, node[1]);
2323
2
      return returnRewrite(node, ret, Rewrite::CTN_REPL_EMPTY);
2324
    }
2325
  }
2326
2327
47394
  return node;
2328
}
2329
2330
5547
Node SequencesRewriter::rewriteIndexof(Node node)
2331
{
2332
5547
  Assert(node.getKind() == kind::STRING_INDEXOF);
2333
5547
  NodeManager* nm = NodeManager::currentNM();
2334
2335
5547
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() < 0)
2336
  {
2337
    // z<0  implies  str.indexof( x, y, z ) --> -1
2338
356
    Node negone = nm->mkConst(Rational(-1));
2339
178
    return returnRewrite(node, negone, Rewrite::IDOF_NEG);
2340
  }
2341
2342
  // the string type
2343
10738
  TypeNode stype = node[0].getType();
2344
2345
  // evaluation and simple cases
2346
10738
  std::vector<Node> children0;
2347
5369
  utils::getConcat(node[0], children0);
2348
5369
  if (children0[0].isConst() && node[1].isConst() && node[2].isConst())
2349
  {
2350
1110
    cvc5::Rational rMaxInt(cvc5::String::maxSize());
2351
923
    if (node[2].getConst<Rational>() > rMaxInt)
2352
    {
2353
      // We know that, due to limitations on the size of string constants
2354
      // in our implementation, that accessing a position greater than
2355
      // rMaxInt is guaranteed to be out of bounds.
2356
      Node negone = nm->mkConst(Rational(-1));
2357
      return returnRewrite(node, negone, Rewrite::IDOF_MAX);
2358
    }
2359
923
    Assert(node[2].getConst<Rational>().sgn() >= 0);
2360
1110
    Node s = children0[0];
2361
1110
    Node t = node[1];
2362
    uint32_t start =
2363
923
        node[2].getConst<Rational>().getNumerator().toUnsignedInt();
2364
923
    std::size_t ret = Word::find(s, t, start);
2365
923
    if (ret != std::string::npos)
2366
    {
2367
652
      Node retv = nm->mkConst(Rational(static_cast<unsigned>(ret)));
2368
326
      return returnRewrite(node, retv, Rewrite::IDOF_FIND);
2369
    }
2370
597
    else if (children0.size() == 1)
2371
    {
2372
820
      Node negone = nm->mkConst(Rational(-1));
2373
410
      return returnRewrite(node, negone, Rewrite::IDOF_NFIND);
2374
    }
2375
  }
2376
2377
4633
  if (node[0] == node[1])
2378
  {
2379
92
    if (node[2].isConst())
2380
    {
2381
18
      if (node[2].getConst<Rational>().sgn() == 0)
2382
      {
2383
        // indexof( x, x, 0 ) --> 0
2384
36
        Node zero = nm->mkConst(Rational(0));
2385
18
        return returnRewrite(node, zero, Rewrite::IDOF_EQ_CST_START);
2386
      }
2387
    }
2388
74
    if (ArithEntail::check(node[2], true))
2389
    {
2390
      // y>0  implies  indexof( x, x, y ) --> -1
2391
      Node negone = nm->mkConst(Rational(-1));
2392
      return returnRewrite(node, negone, Rewrite::IDOF_EQ_NSTART);
2393
    }
2394
126
    Node emp = Word::mkEmptyWord(stype);
2395
74
    if (node[0] != emp)
2396
    {
2397
      // indexof( x, x, z ) ---> indexof( "", "", z )
2398
44
      Node ret = nm->mkNode(STRING_INDEXOF, emp, emp, node[2]);
2399
22
      return returnRewrite(node, ret, Rewrite::IDOF_EQ_NORM);
2400
    }
2401
  }
2402
2403
9186
  Node len0 = nm->mkNode(STRING_LENGTH, node[0]);
2404
9186
  Node len1 = nm->mkNode(STRING_LENGTH, node[1]);
2405
9186
  Node len0m2 = nm->mkNode(MINUS, len0, node[2]);
2406
2407
4593
  if (node[1].isConst())
2408
  {
2409
4015
    if (Word::isEmpty(node[1]))
2410
    {
2411
77
      if (ArithEntail::check(len0, node[2]) && ArithEntail::check(node[2]))
2412
      {
2413
        // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z
2414
9
        return returnRewrite(node, node[2], Rewrite::IDOF_EMP_IDOF);
2415
      }
2416
    }
2417
  }
2418
2419
4584
  if (ArithEntail::check(len1, len0m2, true))
2420
  {
2421
    // len(x)-z < len(y)  implies  indexof( x, y, z ) ----> -1
2422
16
    Node negone = nm->mkConst(Rational(-1));
2423
8
    return returnRewrite(node, negone, Rewrite::IDOF_LEN);
2424
  }
2425
2426
9152
  Node fstr = node[0];
2427
4576
  if (!node[2].isConst() || node[2].getConst<Rational>().sgn() != 0)
2428
  {
2429
1146
    fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0);
2430
1146
    fstr = Rewriter::rewrite(fstr);
2431
  }
2432
2433
9152
  Node cmp_conr = d_stringsEntail.checkContains(fstr, node[1]);
2434
9152
  Trace("strings-rewrite-debug") << "For " << node << ", check contains("
2435
4576
                                 << fstr << ", " << node[1] << ")" << std::endl;
2436
4576
  Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl;
2437
9152
  std::vector<Node> children1;
2438
4576
  utils::getConcat(node[1], children1);
2439
4576
  if (!cmp_conr.isNull())
2440
  {
2441
351
    if (cmp_conr.getConst<bool>())
2442
    {
2443
346
      if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2444
      {
2445
        // past the first position in node[0] that contains node[1], we can drop
2446
410
        std::vector<Node> nb;
2447
410
        std::vector<Node> ne;
2448
250
        int cc = d_stringsEntail.componentContains(
2449
250
            children0, children1, nb, ne, true, 1);
2450
250
        if (cc != -1 && !ne.empty())
2451
        {
2452
          // For example:
2453
          // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0)
2454
144
          Node nn = utils::mkConcat(children0, stype);
2455
144
          Node ret = nm->mkNode(kind::STRING_INDEXOF, nn, node[1], node[2]);
2456
72
          return returnRewrite(node, ret, Rewrite::IDOF_DEF_CTN);
2457
        }
2458
2459
        // Strip components from the beginning that are guaranteed not to match
2460
178
        if (StringsEntail::stripConstantEndpoints(
2461
                children0, children1, nb, ne, 1))
2462
        {
2463
          // str.indexof(str.++("AB", x, "C"), "C", 0) --->
2464
          // 2 + str.indexof(str.++(x, "C"), "C", 0)
2465
          Node ret = nm->mkNode(
2466
              kind::PLUS,
2467
36
              nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(nb, stype)),
2468
72
              nm->mkNode(kind::STRING_INDEXOF,
2469
36
                         utils::mkConcat(children0, stype),
2470
                         node[1],
2471
72
                         node[2]));
2472
18
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_CNST_ENDPTS);
2473
        }
2474
      }
2475
      // To show that the first argument definitely contains the second, the
2476
      // index must be a valid index in the first argument. This ensures that
2477
      // (str.indexof t "" n) is not rewritten to something other than -1 when n
2478
      // is beyond the length of t. This is not required for the above rewrites,
2479
      // which only apply when n=0.
2480
256
      if (ArithEntail::check(node[2]) && ArithEntail::check(len0, node[2]))
2481
      {
2482
        // strip symbolic length
2483
348
        Node new_len = node[2];
2484
348
        std::vector<Node> nr;
2485
188
        if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
2486
        {
2487
          // For example:
2488
          // z>=0 and z>str.len( x1 ) and str.contains( x2, y )-->true
2489
          // implies
2490
          // str.indexof( str.++( x1, x2 ), y, z ) --->
2491
          // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) )
2492
56
          Node nn = utils::mkConcat(children0, stype);
2493
          Node ret =
2494
              nm->mkNode(PLUS,
2495
56
                         nm->mkNode(MINUS, node[2], new_len),
2496
112
                         nm->mkNode(STRING_INDEXOF, nn, node[1], new_len));
2497
28
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_SYM_LEN);
2498
        }
2499
      }
2500
    }
2501
    else
2502
    {
2503
      // str.contains( x, y ) --> false  implies  str.indexof(x,y,z) --> -1
2504
10
      Node negone = nm->mkConst(Rational(-1));
2505
5
      return returnRewrite(node, negone, Rewrite::IDOF_NCTN);
2506
    }
2507
  }
2508
  else
2509
  {
2510
8417
    Node new_len = node[2];
2511
8417
    std::vector<Node> nr;
2512
4225
    if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
2513
    {
2514
      // Normalize the string before the start index.
2515
      //
2516
      // For example:
2517
      // str.indexof(str.++("ABCD", x), y, 3) --->
2518
      // str.indexof(str.++("AAAD", x), y, 3)
2519
141
      Node nodeNr = utils::mkConcat(nr, stype);
2520
141
      Node normNr = lengthPreserveRewrite(nodeNr);
2521
87
      if (normNr != nodeNr)
2522
      {
2523
66
        std::vector<Node> normNrChildren;
2524
33
        utils::getConcat(normNr, normNrChildren);
2525
66
        std::vector<Node> children(normNrChildren);
2526
33
        children.insert(children.end(), children0.begin(), children0.end());
2527
66
        Node nn = utils::mkConcat(children, stype);
2528
66
        Node res = nm->mkNode(kind::STRING_INDEXOF, nn, node[1], node[2]);
2529
33
        return returnRewrite(node, res, Rewrite::IDOF_NORM_PREFIX);
2530
      }
2531
    }
2532
  }
2533
2534
4420
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2535
  {
2536
6670
    std::vector<Node> cb;
2537
6670
    std::vector<Node> ce;
2538
3340
    if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce, -1))
2539
    {
2540
20
      Node ret = utils::mkConcat(children0, stype);
2541
10
      ret = nm->mkNode(STRING_INDEXOF, ret, node[1], node[2]);
2542
      // For example:
2543
      // str.indexof( str.++( x, "A" ), "B", 0 ) ---> str.indexof( x, "B", 0 )
2544
10
      return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2545
    }
2546
  }
2547
2548
4410
  return node;
2549
}
2550
2551
975
Node SequencesRewriter::rewriteIndexofRe(Node node)
2552
{
2553
975
  Assert(node.getKind() == STRING_INDEXOF_RE);
2554
975
  NodeManager* nm = NodeManager::currentNM();
2555
1950
  Node s = node[0];
2556
1950
  Node r = node[1];
2557
1950
  Node n = node[2];
2558
1950
  Node zero = nm->mkConst(Rational(0));
2559
1950
  Node slen = nm->mkNode(STRING_LENGTH, s);
2560
2561
975
  if (ArithEntail::check(zero, n, true) || ArithEntail::check(n, slen, true))
2562
  {
2563
62
    Node ret = nm->mkConst(Rational(-1));
2564
31
    return returnRewrite(node, ret, Rewrite::INDEXOF_RE_INVALID_INDEX);
2565
  }
2566
2567
944
  if (RegExpEntail::isConstRegExp(r))
2568
  {
2569
674
    if (s.isConst() && n.isConst())
2570
    {
2571
300
      Rational nrat = n.getConst<Rational>();
2572
300
      cvc5::Rational rMaxInt(cvc5::String::maxSize());
2573
150
      if (nrat > rMaxInt)
2574
      {
2575
        // We know that, due to limitations on the size of string constants
2576
        // in our implementation, that accessing a position greater than
2577
        // rMaxInt is guaranteed to be out of bounds.
2578
        Node negone = nm->mkConst(Rational(-1));
2579
        return returnRewrite(node, negone, Rewrite::INDEXOF_RE_MAX_INDEX);
2580
      }
2581
2582
150
      uint32_t start = nrat.getNumerator().toUnsignedInt();
2583
300
      Node rem = nm->mkConst(s.getConst<String>().substr(start));
2584
150
      std::pair<size_t, size_t> match = firstMatch(rem, r);
2585
      Node ret = nm->mkConst(
2586
392
          Rational(match.first == string::npos
2587
                       ? -1
2588
392
                       : static_cast<int64_t>(start + match.first)));
2589
150
      return returnRewrite(node, ret, Rewrite::INDEXOF_RE_EVAL);
2590
    }
2591
2592
524
    if (ArithEntail::check(n, zero) && ArithEntail::check(slen, n))
2593
    {
2594
768
      String emptyStr("");
2595
388
      if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, r))
2596
      {
2597
8
        return returnRewrite(node, n, Rewrite::INDEXOF_RE_EMP_RE);
2598
      }
2599
    }
2600
  }
2601
786
  return node;
2602
}
2603
2604
4207
Node SequencesRewriter::rewriteReplace(Node node)
2605
{
2606
4207
  Assert(node.getKind() == kind::STRING_REPLACE);
2607
4207
  NodeManager* nm = NodeManager::currentNM();
2608
2609
4207
  if (node[1].isConst() && Word::isEmpty(node[1]))
2610
  {
2611
862
    Node ret = nm->mkNode(STRING_CONCAT, node[2], node[0]);
2612
431
    return returnRewrite(node, ret, Rewrite::RPL_RPL_EMPTY);
2613
  }
2614
  // the string type
2615
7552
  TypeNode stype = node.getType();
2616
2617
7552
  std::vector<Node> children0;
2618
3776
  utils::getConcat(node[0], children0);
2619
2620
3776
  if (node[1].isConst() && children0[0].isConst())
2621
  {
2622
918
    Node s = children0[0];
2623
918
    Node t = node[1];
2624
909
    std::size_t p = Word::find(s, t);
2625
909
    if (p == std::string::npos)
2626
    {
2627
588
      if (children0.size() == 1)
2628
      {
2629
579
        return returnRewrite(node, node[0], Rewrite::RPL_CONST_NFIND);
2630
      }
2631
    }
2632
    else
2633
    {
2634
642
      Node s1 = Word::substr(s, 0, p);
2635
642
      Node s3 = Word::substr(s, p + Word::getLength(t));
2636
642
      std::vector<Node> children;
2637
321
      if (!Word::isEmpty(s1))
2638
      {
2639
49
        children.push_back(s1);
2640
      }
2641
321
      children.push_back(node[2]);
2642
321
      if (!Word::isEmpty(s3))
2643
      {
2644
90
        children.push_back(s3);
2645
      }
2646
321
      children.insert(children.end(), children0.begin() + 1, children0.end());
2647
642
      Node ret = utils::mkConcat(children, stype);
2648
321
      return returnRewrite(node, ret, Rewrite::RPL_CONST_FIND);
2649
    }
2650
  }
2651
2652
  // rewrites that apply to both replace and replaceall
2653
5752
  Node rri = rewriteReplaceInternal(node);
2654
2876
  if (!rri.isNull())
2655
  {
2656
    // printing of the rewrite managed by the call above
2657
176
    return rri;
2658
  }
2659
2660
2700
  if (node[0] == node[2])
2661
  {
2662
    // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x
2663
903
    Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2664
903
    Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2665
602
    if (ArithEntail::check(l1, l0))
2666
    {
2667
295
      return returnRewrite(node, node[0], Rewrite::RPL_RPL_LEN_ID);
2668
    }
2669
2670
    // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x)
2671
    // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "")
2672
307
    if (StringsEntail::checkLengthOne(node[0]))
2673
    {
2674
246
      Node empty = Word::mkEmptyWord(stype);
2675
      Node rn1 = Rewriter::rewrite(
2676
246
          rewriteEqualityExt(nm->mkNode(EQUAL, node[1], empty)));
2677
126
      if (rn1 != node[1])
2678
      {
2679
246
        std::vector<Node> emptyNodes;
2680
        bool allEmptyEqs;
2681
126
        std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(rn1);
2682
2683
126
        if (allEmptyEqs)
2684
        {
2685
182
          Node nn1 = utils::mkConcat(emptyNodes, stype);
2686
94
          if (node[1] != nn1)
2687
          {
2688
12
            Node ret = nm->mkNode(STRING_REPLACE, node[0], nn1, node[2]);
2689
6
            return returnRewrite(node, ret, Rewrite::RPL_X_Y_X_SIMP);
2690
          }
2691
        }
2692
      }
2693
    }
2694
  }
2695
2696
4798
  std::vector<Node> children1;
2697
2399
  utils::getConcat(node[1], children1);
2698
2699
  // check if contains definitely does (or does not) hold
2700
4798
  Node cmp_con = nm->mkNode(kind::STRING_CONTAINS, node[0], node[1]);
2701
4798
  Node cmp_conr = Rewriter::rewrite(cmp_con);
2702
2399
  if (cmp_conr.isConst())
2703
  {
2704
606
    if (cmp_conr.getConst<bool>())
2705
    {
2706
      // component-wise containment
2707
966
      std::vector<Node> cb;
2708
966
      std::vector<Node> ce;
2709
594
      int cc = d_stringsEntail.componentContains(
2710
594
          children0, children1, cb, ce, true, 1);
2711
594
      if (cc != -1)
2712
      {
2713
502
        if (cc == 0 && children0[0] == children1[0])
2714
        {
2715
          // definitely a prefix, can do the replace
2716
          // for example,
2717
          //   str.replace( str.++( x, "ab" ), str.++( x, "a" ), y )  --->
2718
          //   str.++( y, "b" )
2719
166
          std::vector<Node> cres;
2720
83
          cres.push_back(node[2]);
2721
83
          cres.insert(cres.end(), ce.begin(), ce.end());
2722
166
          Node ret = utils::mkConcat(cres, stype);
2723
83
          return returnRewrite(node, ret, Rewrite::RPL_CCTN_RPL);
2724
        }
2725
419
        else if (!ce.empty())
2726
        {
2727
          // we can pull remainder past first definite containment
2728
          // for example,
2729
          //   str.replace( str.++( x, "ab" ), "a", y ) --->
2730
          //   str.++( str.replace( str.++( x, "a" ), "a", y ), "b" )
2731
          // this is independent of whether the second argument may be empty
2732
278
          std::vector<Node> scc;
2733
417
          scc.push_back(NodeManager::currentNM()->mkNode(
2734
              kind::STRING_REPLACE,
2735
278
              utils::mkConcat(children0, stype),
2736
              node[1],
2737
              node[2]));
2738
139
          scc.insert(scc.end(), ce.begin(), ce.end());
2739
278
          Node ret = utils::mkConcat(scc, stype);
2740
139
          return returnRewrite(node, ret, Rewrite::RPL_CCTN);
2741
        }
2742
      }
2743
    }
2744
    else
2745
    {
2746
      // ~contains( t, s ) => ( replace( t, s, r ) ----> t )
2747
12
      return returnRewrite(node, node[0], Rewrite::RPL_NCTN);
2748
    }
2749
  }
2750
1793
  else if (cmp_conr.getKind() == kind::EQUAL || cmp_conr.getKind() == kind::AND)
2751
  {
2752
    // Rewriting the str.contains may return equalities of the form (= x "").
2753
    // In that case, we can substitute the variables appearing in those
2754
    // equalities with the empty string in the third argument of the
2755
    // str.replace. For example:
2756
    //
2757
    // (str.replace x (str.++ x y) y) --> (str.replace x (str.++ x y) "")
2758
    //
2759
    // This can be done because str.replace changes x iff (str.++ x y) is in x
2760
    // but that means that y must be empty in that case. Thus, we can
2761
    // substitute y with "" in the third argument. Note that the third argument
2762
    // does not matter when the str.replace does not apply.
2763
    //
2764
708
    Node empty = Word::mkEmptyWord(stype);
2765
2766
708
    std::vector<Node> emptyNodes;
2767
    bool allEmptyEqs;
2768
370
    std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(cmp_conr);
2769
2770
370
    if (emptyNodes.size() > 0)
2771
    {
2772
      // Perform the substitutions
2773
428
      std::vector<TNode> substs(emptyNodes.size(), TNode(empty));
2774
      Node nn2 = node[2].substitute(
2775
428
          emptyNodes.begin(), emptyNodes.end(), substs.begin(), substs.end());
2776
2777
      // If the contains rewrites to a conjunction of empty-string equalities
2778
      // and we are doing the replacement in an empty string, we can rewrite
2779
      // the string-to-replace with a concatenation of all the terms that must
2780
      // be empty:
2781
      //
2782
      // (str.replace "" y z) ---> (str.replace "" (str.++ y1 ... yn)  z)
2783
      // if (str.contains "" y) ---> (and (= y1 "") ... (= yn ""))
2784
230
      if (node[0] == empty && allEmptyEqs)
2785
      {
2786
188
        std::vector<Node> emptyNodesList(emptyNodes.begin(), emptyNodes.end());
2787
188
        Node nn1 = utils::mkConcat(emptyNodesList, stype);
2788
98
        if (nn1 != node[1] || nn2 != node[2])
2789
        {
2790
16
          Node res = nm->mkNode(kind::STRING_REPLACE, node[0], nn1, nn2);
2791
8
          return returnRewrite(node, res, Rewrite::RPL_EMP_CNTS_SUBSTS);
2792
        }
2793
      }
2794
2795
222
      if (nn2 != node[2])
2796
      {
2797
48
        Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1], nn2);
2798
24
        return returnRewrite(node, res, Rewrite::RPL_CNTS_SUBSTS);
2799
      }
2800
    }
2801
  }
2802
2803
2133
  if (cmp_conr != cmp_con)
2804
  {
2805
835
    if (StringsEntail::checkNonEmpty(node[1]))
2806
    {
2807
      // pull endpoints that can be stripped
2808
      // for example,
2809
      //   str.replace( str.++( "b", x, "b" ), "a", y ) --->
2810
      //   str.++( "b", str.replace( x, "a", y ), "b" )
2811
981
      std::vector<Node> cb;
2812
981
      std::vector<Node> ce;
2813
499
      if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce))
2814
      {
2815
34
        std::vector<Node> cc;
2816
17
        cc.insert(cc.end(), cb.begin(), cb.end());
2817
17
        cc.push_back(
2818
68
            NodeManager::currentNM()->mkNode(kind::STRING_REPLACE,
2819
34
                                             utils::mkConcat(children0, stype),
2820
                                             node[1],
2821
                                             node[2]));
2822
17
        cc.insert(cc.end(), ce.begin(), ce.end());
2823
34
        Node ret = utils::mkConcat(cc, stype);
2824
17
        return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2825
      }
2826
    }
2827
  }
2828
2829
2116
  children1.clear();
2830
2116
  utils::getConcat(node[1], children1);
2831
4232
  Node lastChild1 = children1[children1.size() - 1];
2832
2116
  if (lastChild1.getKind() == kind::STRING_SUBSTR)
2833
  {
2834
    // (str.replace x (str.++ t (str.substr y i j)) z) --->
2835
    // (str.replace x (str.++ t
2836
    //                  (str.substr y i (+ (str.len x) 1 (- (str.len t))))) z)
2837
    // if j > len(x)
2838
    //
2839
    // Reasoning: If the string to be replaced is longer than x, then it does
2840
    // not matter how much longer it is, the result is always x. Thus, it is
2841
    // fine to only look at the prefix of length len(x) + 1 - len(t).
2842
2843
116
    children1.pop_back();
2844
    // Length of the non-substr components in the second argument
2845
    Node partLen1 =
2846
224
        nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(children1, stype));
2847
224
    Node maxLen1 = nm->mkNode(kind::PLUS, partLen1, lastChild1[2]);
2848
2849
224
    Node zero = nm->mkConst(Rational(0));
2850
224
    Node one = nm->mkConst(Rational(1));
2851
224
    Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]);
2852
224
    Node len0_1 = nm->mkNode(kind::PLUS, len0, one);
2853
    // Check len(t) + j > len(x) + 1
2854
116
    if (ArithEntail::check(maxLen1, len0_1, true))
2855
    {
2856
8
      children1.push_back(nm->mkNode(
2857
          kind::STRING_SUBSTR,
2858
          lastChild1[0],
2859
          lastChild1[1],
2860
16
          nm->mkNode(
2861
16
              kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1))));
2862
      Node res = nm->mkNode(kind::STRING_REPLACE,
2863
                            node[0],
2864
16
                            utils::mkConcat(children1, stype),
2865
32
                            node[2]);
2866
8
      return returnRewrite(node, res, Rewrite::REPL_SUBST_IDX);
2867
    }
2868
  }
2869
2870
2108
  if (node[0].getKind() == STRING_REPLACE)
2871
  {
2872
352
    Node x = node[0];
2873
352
    Node y = node[1];
2874
352
    Node z = node[2];
2875
182
    if (x[0] == x[2] && x[0] == y)
2876
    {
2877
      // (str.replace (str.replace y w y) y z) -->
2878
      //   (str.replace (str.replace y w z) y z)
2879
      // if (str.len w) >= (str.len z) and w != z
2880
      //
2881
      // Reasoning: There are two cases: (1) w does not appear in y and (2) w
2882
      // does appear in y.
2883
      //
2884
      // Case (1): In this case, the reasoning is trivial. The
2885
      // inner replace does not do anything, so we can just replace its third
2886
      // argument with any string.
2887
      //
2888
      // Case (2): After the inner replace, we are guaranteed to have a string
2889
      // that contains y at the index of w in the original string y. The outer
2890
      // replace then replaces that y with z, so we can short-circuit that
2891
      // replace by directly replacing w with z in the inner replace. We can
2892
      // only do that if the result of the new inner replace does not contain
2893
      // y, otherwise we end up doing two replaces that are different from the
2894
      // original expression. We enforce that by requiring that the length of w
2895
      // has to be greater or equal to the length of z and that w and z have to
2896
      // be different. This makes sure that an inner replace changes a string
2897
      // to a string that is shorter than y, making it impossible for the outer
2898
      // replace to match.
2899
124
      Node w = x[1];
2900
2901
      // (str.len w) >= (str.len z)
2902
124
      Node wlen = nm->mkNode(kind::STRING_LENGTH, w);
2903
124
      Node zlen = nm->mkNode(kind::STRING_LENGTH, z);
2904
68
      if (ArithEntail::check(wlen, zlen))
2905
      {
2906
        // w != z
2907
60
        Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z));
2908
36
        if (wEqZ.isConst() && !wEqZ.getConst<bool>())
2909
        {
2910
          Node ret = nm->mkNode(kind::STRING_REPLACE,
2911
24
                                nm->mkNode(kind::STRING_REPLACE, y, w, z),
2912
                                y,
2913
48
                                z);
2914
12
          return returnRewrite(node, ret, Rewrite::REPL_REPL_SHORT_CIRCUIT);
2915
        }
2916
      }
2917
    }
2918
  }
2919
2920
2096
  if (node[1].getKind() == STRING_REPLACE)
2921
  {
2922
258
    if (node[1][0] == node[0])
2923
    {
2924
50
      if (node[1][0] == node[1][2] && node[1][0] == node[2])
2925
      {
2926
        // str.replace( x, str.replace( x, y, x ), x ) ---> x
2927
        return returnRewrite(node, node[0], Rewrite::REPL_REPL2_INV_ID);
2928
      }
2929
50
      bool dualReplIteSuccess = false;
2930
92
      Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2931
50
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2932
      {
2933
        // str.contains( x, z ) ---> false
2934
        //   implies
2935
        // str.replace( x, str.replace( x, y, z ), w ) --->
2936
        // ite( str.contains( x, y ), x, w )
2937
        dualReplIteSuccess = true;
2938
      }
2939
      else
2940
      {
2941
        // str.contains( y, z ) ---> false and str.contains( z, y ) ---> false
2942
        //   implies
2943
        // str.replace( x, str.replace( x, y, z ), w ) --->
2944
        // ite( str.contains( x, y ), x, w )
2945
50
        cmp_con2 = d_stringsEntail.checkContains(node[1][1], node[1][2]);
2946
50
        if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2947
        {
2948
16
          cmp_con2 = d_stringsEntail.checkContains(node[1][2], node[1][1]);
2949
16
          if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2950
          {
2951
8
            dualReplIteSuccess = true;
2952
          }
2953
        }
2954
      }
2955
50
      if (dualReplIteSuccess)
2956
      {
2957
        Node res = nm->mkNode(ITE,
2958
16
                              nm->mkNode(STRING_CONTAINS, node[0], node[1][1]),
2959
                              node[0],
2960
32
                              node[2]);
2961
8
        return returnRewrite(node, res, Rewrite::REPL_DUAL_REPL_ITE);
2962
      }
2963
    }
2964
2965
250
    bool invSuccess = false;
2966
250
    if (node[1][1] == node[0])
2967
    {
2968
52
      if (node[1][0] == node[1][2])
2969
      {
2970
        // str.replace(x, str.replace(y, x, y), w) ---> str.replace(x, y, w)
2971
        invSuccess = true;
2972
      }
2973
52
      else if (node[1][1] == node[2] || node[1][0] == node[2])
2974
      {
2975
        // str.contains(y, z) ----> false and ( y == w or x == w ) implies
2976
        //   implies
2977
        // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w)
2978
40
        Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2979
20
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2980
      }
2981
    }
2982
    else
2983
    {
2984
      // str.contains(x, z) ----> false and str.contains(x, w) ----> false
2985
      //   implies
2986
      // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u)
2987
396
      Node cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][1]);
2988
198
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2989
      {
2990
8
        cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][2]);
2991
8
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2992
      }
2993
    }
2994
250
    if (invSuccess)
2995
    {
2996
      Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1][0], node[2]);
2997
      return returnRewrite(node, res, Rewrite::REPL_REPL2_INV);
2998
    }
2999
  }
3000
2088
  if (node[2].getKind() == STRING_REPLACE)
3001
  {
3002
54
    if (node[2][1] == node[0])
3003
    {
3004
      // str.contains( z, w ) ----> false implies
3005
      // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z )
3006
24
      Node cmp_con2 = d_stringsEntail.checkContains(node[2][0], node[1]);
3007
12
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
3008
      {
3009
        Node res =
3010
            nm->mkNode(kind::STRING_REPLACE, node[0], node[1], node[2][0]);
3011
        return returnRewrite(node, res, Rewrite::REPL_REPL3_INV);
3012
      }
3013
    }
3014
54
    if (node[2][0] == node[1])
3015
    {
3016
4
      bool success = false;
3017
4
      if (node[2][0] == node[2][2] && node[2][1] == node[0])
3018
      {
3019
        // str.replace( x, y, str.replace( y, x, y ) ) ---> x
3020
        success = true;
3021
      }
3022
      else
3023
      {
3024
        // str.contains( x, z ) ----> false implies
3025
        // str.replace( x, y, str.replace( y, z, w ) ) ---> x
3026
4
        cmp_con = d_stringsEntail.checkContains(node[0], node[2][1]);
3027
4
        success = !cmp_con.isNull() && !cmp_con.getConst<bool>();
3028
      }
3029
4
      if (success)
3030
      {
3031
        return returnRewrite(node, node[0], Rewrite::REPL_REPL3_INV_ID);
3032
      }
3033
    }
3034
  }
3035
  // miniscope based on components that do not contribute to contains
3036
  // for example,
3037
  //   str.replace( x ++ y ++ x ++ y, "A", z ) -->
3038
  //   str.replace( x ++ y, "A", z ) ++ x ++ y
3039
  // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y.
3040
2088
  if (StringsEntail::checkLengthOne(node[1]))
3041
  {
3042
1356
    Node lastLhs;
3043
682
    unsigned lastCheckIndex = 0;
3044
694
    for (unsigned i = 1, iend = children0.size(); i < iend; i++)
3045
    {
3046
126
      unsigned checkIndex = children0.size() - i;
3047
138
      std::vector<Node> checkLhs;
3048
126
      checkLhs.insert(
3049
252
          checkLhs.end(), children0.begin(), children0.begin() + checkIndex);
3050
138
      Node lhs = utils::mkConcat(checkLhs, stype);
3051
138
      Node rhs = children0[checkIndex];
3052
138
      Node ctn = d_stringsEntail.checkContains(lhs, rhs);
3053
126
      if (!ctn.isNull() && ctn.getConst<bool>())
3054
      {
3055
12
        lastLhs = lhs;
3056
12
        lastCheckIndex = checkIndex;
3057
      }
3058
      else
3059
      {
3060
114
        break;
3061
      }
3062
    }
3063
682
    if (!lastLhs.isNull())
3064
    {
3065
16
      std::vector<Node> remc(children0.begin() + lastCheckIndex,
3066
32
                             children0.end());
3067
16
      Node rem = utils::mkConcat(remc, stype);
3068
      Node ret =
3069
          nm->mkNode(STRING_CONCAT,
3070
16
                     nm->mkNode(STRING_REPLACE, lastLhs, node[1], node[2]),
3071
32
                     rem);
3072
      // for example:
3073
      //   str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x
3074
      // Since we know that the first occurrence of "A" cannot be in the
3075
      // second occurrence of x. Notice this is specific to single characters
3076
      // due to complications with finds that span multiple components for
3077
      // non-characters.
3078
8
      return returnRewrite(node, ret, Rewrite::REPL_CHAR_NCONTRIB_FIND);
3079
    }
3080
  }
3081
3082
  // TODO (#1180) incorporate these?
3083
  // contains( t, s ) =>
3084
  //   replace( replace( x, t, s ), s, r ) ----> replace( x, t, r )
3085
  // contains( t, s ) =>
3086
  //   contains( replace( t, s, r ), r ) ----> true
3087
3088
2080
  return node;
3089
}
3090
3091
202
Node SequencesRewriter::rewriteReplaceAll(Node node)
3092
{
3093
202
  Assert(node.getKind() == STRING_REPLACE_ALL);
3094
3095
404
  TypeNode stype = node.getType();
3096
3097
202
  if (node[0].isConst() && node[1].isConst())
3098
  {
3099
80
    std::vector<Node> children;
3100
80
    Node s = node[0];
3101
80
    Node t = node[1];
3102
40
    if (Word::isEmpty(s) || Word::isEmpty(t))
3103
    {
3104
10
      return returnRewrite(node, node[0], Rewrite::REPLALL_EMPTY_FIND);
3105
    }
3106
30
    std::size_t sizeS = Word::getLength(s);
3107
30
    std::size_t sizeT = Word::getLength(t);
3108
30
    std::size_t index = 0;
3109
30
    std::size_t curr = 0;
3110
65
    do
3111
    {
3112
95
      curr = Word::find(s, t, index);
3113
95
      if (curr != std::string::npos)
3114
      {
3115
65
        if (curr > index)
3116
        {
3117
32
          children.push_back(Word::substr(s, index, curr - index));
3118
        }
3119
65
        children.push_back(node[2]);
3120
65
        index = curr + sizeT;
3121
      }
3122
      else
3123
      {
3124
30
        children.push_back(Word::substr(s, index, sizeS - index));
3125
      }
3126
95
    } while (curr != std::string::npos && curr < sizeS);
3127
    // constant evaluation
3128
60
    Node res = utils::mkConcat(children, stype);
3129
30
    return returnRewrite(node, res, Rewrite::REPLALL_CONST);
3130
  }
3131
3132
  // rewrites that apply to both replace and replaceall
3133
324
  Node rri = rewriteReplaceInternal(node);
3134
162
  if (!rri.isNull())
3135
  {
3136
    // printing of the rewrite managed by the call above
3137
4
    return rri;
3138
  }
3139
3140
158
  return node;
3141
}
3142
3143
3038
Node SequencesRewriter::rewriteReplaceInternal(Node node)
3144
{
3145
3038
  Kind nk = node.getKind();
3146
3038
  Assert(nk == STRING_REPLACE || nk == STRING_REPLACE_ALL);
3147
3148
3038
  if (node[1] == node[2])
3149
  {
3150
80
    return returnRewrite(node, node[0], Rewrite::RPL_ID);
3151
  }
3152
3153
2958
  if (node[0] == node[1])
3154
  {
3155
    // only holds for replaceall if non-empty
3156
124
    if (nk == STRING_REPLACE || StringsEntail::checkNonEmpty(node[1]))
3157
    {
3158
100
      return returnRewrite(node, node[2], Rewrite::RPL_REPLACE);
3159
    }
3160
  }
3161
3162
2858
  return Node::null();
3163
}
3164
3165
443
Node SequencesRewriter::rewriteReplaceRe(Node node)
3166
{
3167
443
  Assert(node.getKind() == STRING_REPLACE_RE);
3168
443
  NodeManager* nm = NodeManager::currentNM();
3169
886
  Node x = node[0];
3170
886
  Node y = node[1];
3171
886
  Node z = node[2];
3172
3173
443
  if (RegExpEntail::isConstRegExp(y))
3174
  {
3175
313
    if (x.isConst())
3176
    {
3177
      // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z"
3178
77
      std::pair<size_t, size_t> match = firstMatch(x, y);
3179
77
      if (match.first != string::npos)
3180
      {
3181
84
        String s = x.getConst<String>();
3182
        Node ret = nm->mkNode(STRING_CONCAT,
3183
84
                              nm->mkConst(s.substr(0, match.first)),
3184
                              z,
3185
168
                              nm->mkConst(s.substr(match.second)));
3186
42
        return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL);
3187
      }
3188
      else
3189
      {
3190
35
        return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL);
3191
      }
3192
    }
3193
    // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true
3194
468
    String emptyStr("");
3195
236
    if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y))
3196
    {
3197
8
      Node ret = nm->mkNode(STRING_CONCAT, z, x);
3198
4
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE);
3199
    }
3200
  }
3201
362
  return node;
3202
}
3203
3204
245
Node SequencesRewriter::rewriteReplaceReAll(Node node)
3205
{
3206
245
  Assert(node.getKind() == STRING_REPLACE_RE_ALL);
3207
245
  NodeManager* nm = NodeManager::currentNM();
3208
490
  Node x = node[0];
3209
490
  Node y = node[1];
3210
490
  Node z = node[2];
3211
3212
245
  if (RegExpEntail::isConstRegExp(y))
3213
  {
3214
199
    if (x.isConst())
3215
    {
3216
      // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) --->
3217
      //   "Z" ++ y ++ "Z" ++ y
3218
114
      TypeNode t = x.getType();
3219
114
      Node emp = Word::mkEmptyWord(t);
3220
      Node yp = Rewriter::rewrite(
3221
114
          nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)));
3222
114
      std::vector<Node> res;
3223
114
      String rem = x.getConst<String>();
3224
57
      std::pair<size_t, size_t> match(0, 0);
3225
151
      while (rem.size() != 0)
3226
      {
3227
56
        match = firstMatch(nm->mkConst(rem), yp);
3228
56
        if (match.first == string::npos)
3229
        {
3230
9
          break;
3231
        }
3232
47
        res.push_back(nm->mkConst(rem.substr(0, match.first)));
3233
47
        res.push_back(z);
3234
47
        rem = rem.substr(match.second);
3235
      }
3236
57
      res.push_back(nm->mkConst(rem));
3237
114
      Node ret = utils::mkConcat(res, t);
3238
57
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL);
3239
    }
3240
  }
3241
3242
188
  return node;
3243
}
3244
3245
283
std::pair<size_t, size_t> SequencesRewriter::firstMatch(Node n, Node r)
3246
{
3247
283
  Assert(n.isConst() && n.getType().isStringLike());
3248
283
  Assert(r.getType().isRegExp());
3249
283
  NodeManager* nm = NodeManager::currentNM();
3250
3251
566
  Node sigmaStar = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
3252
566
  Node re = nm->mkNode(REGEXP_CONCAT, r, sigmaStar);
3253
566
  String s = n.getConst<String>();
3254
3255
283
  if (s.size() == 0)
3256
  {
3257
62
    if (RegExpEntail::testConstStringInRegExp(s, 0, r))
3258
    {
3259
6
      return std::make_pair(0, 0);
3260
    }
3261
    else
3262
    {
3263
56
      return std::make_pair(string::npos, string::npos);
3264
    }
3265
  }
3266
3267
338
  for (size_t i = 0, size = s.size(); i < size; i++)
3268
  {
3269
292
    if (RegExpEntail::testConstStringInRegExp(s, i, re))
3270
    {
3271
591
      for (size_t j = i; j <= size; j++)
3272
      {
3273
1007
        String substr = s.substr(i, j - i);
3274
591
        if (RegExpEntail::testConstStringInRegExp(substr, 0, r))
3275
        {
3276
175
          return std::make_pair(i, j);
3277
        }
3278
      }
3279
    }
3280
  }
3281
3282
46
  return std::make_pair(string::npos, string::npos);
3283
}
3284
3285
241
Node SequencesRewriter::rewriteStrReverse(Node node)
3286
{
3287
241
  Assert(node.getKind() == STRING_REV);
3288
241
  NodeManager* nm = NodeManager::currentNM();
3289
482
  Node x = node[0];
3290
241
  if (x.isConst())
3291
  {
3292
    // reverse the characters in the constant
3293
126
    Node retNode = Word::reverse(x);
3294
63
    return returnRewrite(node, retNode, Rewrite::STR_CONV_CONST);
3295
  }
3296
178
  else if (x.getKind() == STRING_CONCAT)
3297
  {
3298
78
    std::vector<Node> children;
3299
119
    for (const Node& nc : x)
3300
    {
3301
80
      children.push_back(nm->mkNode(STRING_REV, nc));
3302
    }
3303
39
    std::reverse(children.begin(), children.end());
3304
    // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 )
3305
78
    Node retNode = nm->mkNode(STRING_CONCAT, children);
3306
39
    return returnRewrite(node, retNode, Rewrite::STR_REV_MINSCOPE_CONCAT);
3307
  }
3308
139
  else if (x.getKind() == STRING_REV)
3309
  {
3310
    // rev( rev( x ) ) --> x
3311
10
    Node retNode = x[0];
3312
5
    return returnRewrite(node, retNode, Rewrite::STR_REV_IDEM);
3313
  }
3314
134
  return node;
3315
}
3316
3317
185
Node SequencesRewriter::rewritePrefixSuffix(Node n)
3318
{
3319
185
  Assert(n.getKind() == kind::STRING_PREFIX
3320
         || n.getKind() == kind::STRING_SUFFIX);
3321
185
  bool isPrefix = n.getKind() == kind::STRING_PREFIX;
3322
185
  if (n[0] == n[1])
3323
  {
3324
16
    Node ret = NodeManager::currentNM()->mkConst(true);
3325
8
    return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EQ);
3326
  }
3327
177
  if (n[0].isConst())
3328
  {
3329
86
    if (Word::isEmpty(n[0]))
3330
    {
3331
8
      Node ret = NodeManager::currentNM()->mkConst(true);
3332
4
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY_CONST);
3333
    }
3334
  }
3335
173
  if (n[1].isConst())
3336
  {
3337
47
    Node s = n[1];
3338
47
    size_t lenS = Word::getLength(s);
3339
47
    if (n[0].isConst())
3340
    {
3341
      Node ret = NodeManager::currentNM()->mkConst(false);
3342
      Node t = n[0];
3343
      size_t lenT = Word::getLength(t);
3344
      if (lenS >= lenT)
3345
      {
3346
        if ((isPrefix && t == Word::prefix(s, lenT))
3347
            || (!isPrefix && t == Word::suffix(s, lenT)))
3348
        {
3349
          ret = NodeManager::currentNM()->mkConst(true);
3350
        }
3351
      }
3352
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CONST);
3353
    }
3354
47
    else if (lenS == 0)
3355
    {
3356
8
      Node ret = n[0].eqNode(n[1]);
3357
4
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY);
3358
    }
3359
43
    else if (lenS == 1)
3360
    {
3361
      // (str.prefix x "A") and (str.suffix x "A") are equivalent to
3362
      // (str.contains "A" x )
3363
      Node ret =
3364
86
          NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS, n[1], n[0]);
3365
43
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CTN);
3366
    }
3367
  }
3368
252
  Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]);
3369
252
  Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]);
3370
252
  Node val;
3371
126
  if (isPrefix)
3372
  {
3373
99
    val = NodeManager::currentNM()->mkConst(::cvc5::Rational(0));
3374
  }
3375
  else
3376
  {
3377
27
    val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens);
3378
  }
3379
3380
  // Check if we can turn the prefix/suffix into equalities by showing that the
3381
  // prefix/suffix is at least as long as the string
3382
252
  Node eqs = StringsEntail::inferEqsFromContains(n[1], n[0]);
3383
126
  if (!eqs.isNull())
3384
  {
3385
20
    return returnRewrite(n, eqs, Rewrite::SUF_PREFIX_TO_EQS);
3386
  }
3387
3388
  // general reduction to equality + substr
3389
  Node retNode = n[0].eqNode(
3390
212
      NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens));
3391
3392
106
  return returnRewrite(n, retNode, Rewrite::SUF_PREFIX_ELIM);
3393
}
3394
3395
91
Node SequencesRewriter::lengthPreserveRewrite(Node n)
3396
{
3397
91
  NodeManager* nm = NodeManager::currentNM();
3398
182
  Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
3399
182
  Node res = canonicalStrForSymbolicLength(len, n.getType());
3400
182
  return res.isNull() ? n : res;
3401
}
3402
3403
103
Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
3404
{
3405
103
  NodeManager* nm = NodeManager::currentNM();
3406
3407
206
  Node res;
3408
103
  if (len.getKind() == CONST_RATIONAL)
3409
  {
3410
    // c -> "A" repeated c times
3411
182
    Rational ratLen = len.getConst<Rational>();
3412
91
    Assert(ratLen.getDenominator() == 1);
3413
182
    Integer intLen = ratLen.getNumerator();
3414
91
    uint32_t u = intLen.getUnsignedInt();
3415
91
    if (stype.isString())  // string-only
3416
    {
3417
91
      res = nm->mkConst(String(std::string(u, 'A')));
3418
    }
3419
    // we could do this for sequences, but we need to be careful: some
3420
    // sorts do not permit values that the solver can handle (e.g. uninterpreted
3421
    // sorts and arrays).
3422
  }
3423
12
  else if (len.getKind() == PLUS)
3424
  {
3425
    // x + y -> norm(x) + norm(y)
3426
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3427
12
    for (const auto& n : len)
3428
    {
3429
16
      Node sn = canonicalStrForSymbolicLength(n, stype);
3430
8
      if (sn.isNull())
3431
      {
3432
        return Node::null();
3433
      }
3434
16
      std::vector<Node> snChildren;
3435
8
      utils::getConcat(sn, snChildren);
3436
8
      concatBuilder.append(snChildren);
3437
    }
3438
4
    res = concatBuilder.constructNode();
3439
  }
3440
20
  else if (len.getKind() == MULT && len.getNumChildren() == 2
3441
20
           && len[0].isConst())
3442
  {
3443
    // c * x -> norm(x) repeated c times
3444
8
    Rational ratReps = len[0].getConst<Rational>();
3445
4
    Assert(ratReps.getDenominator() == 1);
3446
8
    Integer intReps = ratReps.getNumerator();
3447
3448
8
    Node nRep = canonicalStrForSymbolicLength(len[1], stype);
3449
4
    if (nRep.isNull())
3450
    {
3451
      return Node::null();
3452
    }
3453
8
    std::vector<Node> nRepChildren;
3454
4
    utils::getConcat(nRep, nRepChildren);
3455
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3456
12
    for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
3457
    {
3458
8
      concatBuilder.append(nRepChildren);
3459
    }
3460
4
    res = concatBuilder.constructNode();
3461
  }
3462
4
  else if (len.getKind() == STRING_LENGTH)
3463
  {
3464
    // len(x) -> x
3465
4
    res = len[0];
3466
  }
3467
103
  return res;
3468
}
3469
3470
393
Node SequencesRewriter::rewriteSeqUnit(Node node)
3471
{
3472
393
  NodeManager* nm = NodeManager::currentNM();
3473
393
  if (node[0].isConst())
3474
  {
3475
274
    std::vector<Node> seq;
3476
137
    seq.push_back(node[0]);
3477
274
    TypeNode stype = node[0].getType();
3478
274
    Node ret = nm->mkConst(Sequence(stype, seq));
3479
137
    return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL);
3480
  }
3481
256
  return node;
3482
}
3483
3484
214768
Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r)
3485
{
3486
429536
  Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << r
3487
214768
                           << "." << std::endl;
3488
214768
  if (d_statistics != nullptr)
3489
  {
3490
214244
    (*d_statistics) << r;
3491
  }
3492
214768
  return ret;
3493
}
3494
3495
103631
Node SequencesRewriter::postProcessRewrite(Node node, Node ret)
3496
{
3497
103631
  NodeManager* nm = NodeManager::currentNM();
3498
  // standard post-processing
3499
  // We rewrite (string) equalities immediately here. This allows us to forego
3500
  // the standard invariant on equality rewrites (that s=t must rewrite to one
3501
  // of { s=t, t=s, true, false } ).
3502
103631
  Kind retk = ret.getKind();
3503
103631
  if (retk == OR || retk == AND)
3504
  {
3505
6620
    std::vector<Node> children;
3506
3310
    bool childChanged = false;
3507
12620
    for (const Node& cret : ret)
3508
    {
3509
18620
      Node creter = cret;
3510
9310
      if (cret.getKind() == EQUAL)
3511
      {
3512
2726
        creter = rewriteEqualityExt(cret);
3513
      }
3514
6584
      else if (cret.getKind() == NOT && cret[0].getKind() == EQUAL)
3515
      {
3516
        creter = nm->mkNode(NOT, rewriteEqualityExt(cret[0]));
3517
      }
3518
9310
      childChanged = childChanged || cret != creter;
3519
9310
      children.push_back(creter);
3520
    }
3521
3310
    if (childChanged)
3522
    {
3523
356
      ret = nm->mkNode(retk, children);
3524
3310
    }
3525
  }
3526
100321
  else if (retk == NOT && ret[0].getKind() == EQUAL)
3527
  {
3528
    ret = nm->mkNode(NOT, rewriteEqualityExt(ret[0]));
3529
  }
3530
100321
  else if (retk == EQUAL && node.getKind() != EQUAL)
3531
  {
3532
3292
    Trace("strings-rewrite")
3533
1646
        << "Apply extended equality rewrite on " << ret << std::endl;
3534
1646
    ret = rewriteEqualityExt(ret);
3535
  }
3536
103631
  return ret;
3537
}
3538
3539
}  // namespace strings
3540
}  // namespace theory
3541
29577
}  // namespace cvc5