GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/sequences_rewriter.cpp Lines: 1592 1649 96.5 %
Date: 2021-08-01 Branches: 5039 9664 52.1 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of the theory of strings.
14
 */
15
16
#include "theory/strings/sequences_rewriter.h"
17
18
#include "expr/attribute.h"
19
#include "expr/node_builder.h"
20
#include "expr/sequence.h"
21
#include "theory/rewriter.h"
22
#include "theory/strings/arith_entail.h"
23
#include "theory/strings/regexp_entail.h"
24
#include "theory/strings/skolem_cache.h"
25
#include "theory/strings/strings_rewriter.h"
26
#include "theory/strings/theory_strings_utils.h"
27
#include "theory/strings/word.h"
28
#include "util/rational.h"
29
#include "util/regexp.h"
30
#include "util/statistics_registry.h"
31
#include "util/string.h"
32
33
using namespace std;
34
using namespace cvc5::kind;
35
36
namespace cvc5 {
37
namespace theory {
38
namespace strings {
39
40
10027
SequencesRewriter::SequencesRewriter(HistogramStat<Rewrite>* statistics)
41
10027
    : d_statistics(statistics), d_stringsEntail(*this)
42
{
43
10027
}
44
45
119568
Node SequencesRewriter::rewriteEquality(Node node)
46
{
47
119568
  Assert(node.getKind() == kind::EQUAL);
48
119568
  if (node[0] == node[1])
49
  {
50
8318
    Node ret = NodeManager::currentNM()->mkConst(true);
51
4159
    return returnRewrite(node, ret, Rewrite::EQ_REFL);
52
  }
53
115409
  else if (node[0].isConst() && node[1].isConst())
54
  {
55
4362
    Node ret = NodeManager::currentNM()->mkConst(false);
56
2181
    return returnRewrite(node, ret, Rewrite::EQ_CONST_FALSE);
57
  }
58
59
  // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false )
60
338998
  for (unsigned r = 0; r < 2; r++)
61
  {
62
    // must call rewrite contains directly to avoid infinite loop
63
    // we do a fix point since we may rewrite contains terms to simpler
64
    // contains terms.
65
452055
    Node ctn = d_stringsEntail.checkContains(node[r], node[1 - r], false);
66
226285
    if (!ctn.isNull())
67
    {
68
36326
      if (!ctn.getConst<bool>())
69
      {
70
515
        return returnRewrite(node, ctn, Rewrite::EQ_NCTN);
71
      }
72
      else
73
      {
74
        // definitely contains but not syntactically equal
75
        // We may be able to simplify, e.g.
76
        //  str.++( x, "a" ) == "a"  ----> x = ""
77
      }
78
    }
79
  }
80
81
  // ( len( s ) != len( t ) ) => ( s == t ---> false )
82
  // This covers cases like str.++( x, x ) == "a" ---> false
83
225426
  Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
84
225426
  Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
85
225426
  Node len_eq = len0.eqNode(len1);
86
112713
  len_eq = Rewriter::rewrite(len_eq);
87
112713
  if (len_eq.isConst() && !len_eq.getConst<bool>())
88
  {
89
5
    return returnRewrite(node, len_eq, Rewrite::EQ_LEN_DEQ);
90
  }
91
92
225416
  std::vector<Node> c[2];
93
338124
  for (unsigned i = 0; i < 2; i++)
94
  {
95
225416
    utils::getConcat(node[i], c[i]);
96
  }
97
98
  // check if the prefix, suffix mismatches
99
  //   For example, str.++( x, "a", y ) == str.++( x, "bc", z ) ---> false
100
112708
  unsigned minsize = std::min(c[0].size(), c[1].size());
101
338051
  for (unsigned r = 0; r < 2; r++)
102
  {
103
238089
    for (unsigned i = 0; i < minsize; i++)
104
    {
105
235856
      unsigned index1 = r == 0 ? i : (c[0].size() - 1) - i;
106
235856
      unsigned index2 = r == 0 ? i : (c[1].size() - 1) - i;
107
248556
      Node s = c[0][index1];
108
248556
      Node t = c[1][index2];
109
235856
      if (s.isConst() && t.isConst())
110
      {
111
5858
        size_t lenS = Word::getLength(s);
112
5858
        size_t lenT = Word::getLength(t);
113
5858
        size_t lenShort = lenS <= lenT ? lenS : lenT;
114
15600
        bool isSameFix = r == 1 ? Word::rstrncmp(s, t, lenShort)
115
9742
                                : Word::strncmp(s, t, lenShort);
116
5858
        if (!isSameFix)
117
        {
118
92
          Node ret = NodeManager::currentNM()->mkConst(false);
119
46
          return returnRewrite(node, ret, Rewrite::EQ_NFIX);
120
        }
121
      }
122
235810
      if (s != t)
123
      {
124
223110
        break;
125
      }
126
    }
127
  }
128
129
  // standard ordering
130
112662
  if (node[0] > node[1])
131
  {
132
49524
    Node ret = NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]);
133
24762
    return returnRewrite(node, ret, Rewrite::EQ_SYM);
134
  }
135
87900
  return node;
136
}
137
138
113156
Node SequencesRewriter::rewriteEqualityExt(Node node)
139
{
140
113156
  Assert(node.getKind() == EQUAL);
141
113156
  if (node[0].getType().isInteger())
142
  {
143
20770
    return rewriteArithEqualityExt(node);
144
  }
145
92386
  if (node[0].getType().isStringLike())
146
  {
147
92386
    return rewriteStrEqualityExt(node);
148
  }
149
  return node;
150
}
151
152
92386
Node SequencesRewriter::rewriteStrEqualityExt(Node node)
153
{
154
92386
  Assert(node.getKind() == EQUAL && node[0].getType().isStringLike());
155
184772
  TypeNode stype = node[0].getType();
156
157
92386
  NodeManager* nm = NodeManager::currentNM();
158
184772
  std::vector<Node> c[2];
159
184772
  Node new_ret;
160
277158
  for (unsigned i = 0; i < 2; i++)
161
  {
162
184772
    utils::getConcat(node[i], c[i]);
163
  }
164
  // ------- equality unification
165
92386
  bool changed = false;
166
277158
  for (unsigned i = 0; i < 2; i++)
167
  {
168
193944
    while (!c[0].empty() && !c[1].empty() && c[0].back() == c[1].back())
169
    {
170
4586
      c[0].pop_back();
171
4586
      c[1].pop_back();
172
4586
      changed = true;
173
    }
174
    // splice constants
175
548175
    if (!c[0].empty() && !c[1].empty() && c[0].back().isConst()
176
337800
        && c[1].back().isConst())
177
    {
178
1194
      Node cs[2];
179
      size_t csl[2];
180
1791
      for (unsigned j = 0; j < 2; j++)
181
      {
182
1194
        cs[j] = c[j].back();
183
1194
        csl[j] = Word::getLength(cs[j]);
184
      }
185
597
      size_t larger = csl[0] > csl[1] ? 0 : 1;
186
597
      size_t smallerSize = csl[1 - larger];
187
1194
      if (cs[1 - larger]
188
1194
          == (i == 0 ? Word::suffix(cs[larger], smallerSize)
189
                     : Word::prefix(cs[larger], smallerSize)))
190
      {
191
577
        size_t sizeDiff = csl[larger] - smallerSize;
192
577
        c[larger][c[larger].size() - 1] =
193
1154
            i == 0 ? Word::prefix(cs[larger], sizeDiff)
194
                   : Word::suffix(cs[larger], sizeDiff);
195
577
        c[1 - larger].pop_back();
196
577
        changed = true;
197
      }
198
    }
199
554316
    for (unsigned j = 0; j < 2; j++)
200
    {
201
369544
      std::reverse(c[j].begin(), c[j].end());
202
    }
203
  }
204
92386
  if (changed)
205
  {
206
    // e.g. x++y = x++z ---> y = z, "AB" ++ x = "A" ++ y --> "B" ++ x = y
207
5486
    Node s1 = utils::mkConcat(c[0], stype);
208
5486
    Node s2 = utils::mkConcat(c[1], stype);
209
4439
    if (s1 != node[0] || s2 != node[1])
210
    {
211
3392
      new_ret = s1.eqNode(s2);
212
      // We generally don't apply the extended equality rewriter if the
213
      // original node was an equality but we may be able to do additional
214
      // rewriting here, e.g.,
215
      // x++y = "" --> x = "" and y = ""
216
3392
      return returnRewrite(node, new_ret, Rewrite::STR_EQ_UNIFY, true);
217
    }
218
  }
219
220
  // ------- homogeneous constants
221
266885
  for (unsigned i = 0; i < 2; i++)
222
  {
223
355841
    Node cn = StringsEntail::checkHomogeneousString(node[i]);
224
177950
    if (!cn.isNull() && !Word::isEmpty(cn))
225
    {
226
26278
      Assert(cn.isConst());
227
26278
      Assert(Word::getLength(cn) == 1);
228
229
      // The operands of the concat on each side of the equality without
230
      // constant strings
231
78834
      std::vector<Node> trimmed[2];
232
      // Counts the number of `cn`s on each side
233
26278
      size_t numCns[2] = {0, 0};
234
78795
      for (size_t j = 0; j < 2; j++)
235
      {
236
        // Sort the operands of the concats on both sides of the equality
237
        // (since both sides may only contain one char, the order does not
238
        // matter)
239
52537
        std::sort(c[j].begin(), c[j].end());
240
104990
        for (const Node& cc : c[j])
241
        {
242
52473
          if (cc.isConst())
243
          {
244
            // Count the number of `cn`s in the string constant and make
245
            // sure that all chars are `cn`s
246
52406
            std::vector<Node> veccc = Word::getChars(cc);
247
51721
            for (const Node& cv : veccc)
248
            {
249
25528
              if (cv != cn)
250
              {
251
                // This conflict case should mostly should be taken care of by
252
                // multiset reasoning in the strings rewriter, but we recognize
253
                // this conflict just in case.
254
20
                new_ret = nm->mkConst(false);
255
                return returnRewrite(
256
20
                    node, new_ret, Rewrite::STR_EQ_CONST_NHOMOG);
257
              }
258
25508
              numCns[j]++;
259
            }
260
          }
261
          else
262
          {
263
26260
            trimmed[j].push_back(cc);
264
          }
265
        }
266
      }
267
268
      // We have to remove the same number of `cn`s from both sides, so the
269
      // side with less `cn`s determines how many we can remove
270
26258
      size_t trimmedConst = std::min(numCns[0], numCns[1]);
271
78774
      for (size_t j = 0; j < 2; j++)
272
      {
273
52516
        size_t diff = numCns[j] - trimmedConst;
274
52516
        if (diff != 0)
275
        {
276
          // Add a constant string to the side with more `cn`s to restore
277
          // the difference in number of `cn`s
278
46822
          std::vector<Node> vec(diff, cn);
279
23411
          trimmed[j].push_back(Word::mkWordFlatten(vec));
280
        }
281
      }
282
283
52477
      Node lhs = utils::mkConcat(trimmed[i], stype);
284
52477
      Node ss = utils::mkConcat(trimmed[1 - i], stype);
285
26258
      if (lhs != node[i] || ss != node[1 - i])
286
      {
287
        // e.g.
288
        //  "AA" = y ++ x ---> "AA" = x ++ y if x < y
289
        //  "AAA" = y ++ "A" ++ z ---> "AA" = y ++ z
290
        //
291
        // We generally don't apply the extended equality rewriter if the
292
        // original node was an equality but we may be able to do additional
293
        // rewriting here.
294
39
        new_ret = lhs.eqNode(ss);
295
39
        return returnRewrite(node, new_ret, Rewrite::STR_EQ_HOMOG_CONST, true);
296
      }
297
    }
298
  }
299
300
  // ------- rewrites for (= "" _)
301
177870
  Node empty = Word::mkEmptyWord(stype);
302
251697
  for (size_t i = 0; i < 2; i++)
303
  {
304
170426
    if (node[i] == empty)
305
    {
306
110194
      Node ne = node[1 - i];
307
58929
      if (ne.getKind() == STRING_REPLACE)
308
      {
309
        // (= "" (str.replace x y x)) ---> (= x "")
310
2093
        if (ne[0] == ne[2])
311
        {
312
454
          Node ret = nm->mkNode(EQUAL, ne[0], empty);
313
227
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_X_Y_X);
314
        }
315
316
        // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y "")))
317
1866
        if (StringsEntail::checkNonEmpty(ne[2]))
318
        {
319
          Node ret =
320
              nm->mkNode(AND,
321
686
                         nm->mkNode(EQUAL, ne[0], empty),
322
1372
                         nm->mkNode(NOT, nm->mkNode(EQUAL, ne[1], empty)));
323
343
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
324
        }
325
326
        // (= "" (str.replace x "A" "")) ---> (str.prefix x "A")
327
1523
        if (StringsEntail::checkLengthOne(ne[1], true) && ne[2] == empty)
328
        {
329
530
          Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]);
330
265
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
331
        }
332
      }
333
56836
      else if (ne.getKind() == STRING_SUBSTR)
334
      {
335
43305
        Node zero = nm->mkConst(Rational(0));
336
337
25067
        if (ArithEntail::check(ne[1], false) && ArithEntail::check(ne[2], true))
338
        {
339
          // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0
340
4215
          if (ne[1] == zero)
341
          {
342
3906
            Node ret = nm->mkNode(EQUAL, ne[0], empty);
343
1953
            return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
344
          }
345
346
          // (= "" (str.substr x n m)) ---> (<= (str.len x) n)
347
          // if n >= 0 and m > 0
348
4524
          Node ret = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, ne[0]), ne[1]);
349
2262
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
350
        }
351
352
        // (= "" (str.substr "A" 0 z)) ---> (<= z 0)
353
20852
        if (StringsEntail::checkNonEmpty(ne[0]) && ne[1] == zero)
354
        {
355
5228
          Node ret = nm->mkNode(LEQ, ne[2], zero);
356
2614
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_Z);
357
        }
358
      }
359
    }
360
  }
361
362
  // ------- rewrites for (= (str.replace _ _ _) _)
363
243277
  for (size_t i = 0; i < 2; i++)
364
  {
365
162495
    if (node[i].getKind() == STRING_REPLACE)
366
    {
367
5609
      Node repl = node[i];
368
5609
      Node x = node[1 - i];
369
370
      // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x))
371
3049
      if (StringsEntail::checkNonEmpty(x) && repl[0] == empty)
372
      {
373
        Node ret = nm->mkNode(
374
38
            EQUAL, empty, nm->mkNode(STRING_REPLACE, x, repl[2], repl[1]));
375
19
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_EMP);
376
      }
377
378
      // (= x (str.replace y x y)) ---> (= x y)
379
3030
      if (repl[0] == repl[2] && x == repl[1])
380
      {
381
4
        Node ret = nm->mkNode(EQUAL, x, repl[0]);
382
2
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_EQ);
383
      }
384
385
      // (= x (str.replace x "A" "B")) ---> (not (str.contains x "A"))
386
3028
      if (x == repl[0])
387
      {
388
2520
        Node eq = Rewriter::rewrite(nm->mkNode(EQUAL, repl[1], repl[2]));
389
1492
        if (eq.isConst() && !eq.getConst<bool>())
390
        {
391
928
          Node ret = nm->mkNode(NOT, nm->mkNode(STRING_CONTAINS, x, repl[1]));
392
464
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_NOT_CTN);
393
        }
394
      }
395
396
      // (= (str.replace x y z) z) --> (or (= x y) (= x z))
397
      // if (str.len y) = (str.len z)
398
2564
      if (repl[2] == x)
399
      {
400
2650
        Node lenY = nm->mkNode(STRING_LENGTH, repl[1]);
401
2650
        Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]);
402
1327
        if (ArithEntail::checkEq(lenY, lenZ))
403
        {
404
          Node ret = nm->mkNode(OR,
405
8
                                nm->mkNode(EQUAL, repl[0], repl[1]),
406
16
                                nm->mkNode(EQUAL, repl[0], repl[2]));
407
4
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_DIS);
408
        }
409
      }
410
    }
411
  }
412
413
  // Try to rewrite (= x y) into a conjunction of equalities based on length
414
  // entailment.
415
  //
416
  // (<= (str.len x) (str.++ y1 ... yn)) AND (= x (str.++ y1 ... yn)) --->
417
  //  (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
418
  //
419
  // where yi' and yi'' correspond to some yj and
420
  //   (<= (str.len x) (str.++ y1' ... ym'))
421
239008
  for (unsigned i = 0; i < 2; i++)
422
  {
423
159941
    if (node[1 - i].getKind() == STRING_CONCAT)
424
    {
425
2159
      new_ret = StringsEntail::inferEqsFromContains(node[i], node[1 - i]);
426
2159
      if (!new_ret.isNull())
427
      {
428
1715
        return returnRewrite(node, new_ret, Rewrite::STR_EQ_CONJ_LEN_ENTAIL);
429
      }
430
    }
431
  }
432
433
79067
  if (node[0].getKind() == STRING_CONCAT && node[1].getKind() == STRING_CONCAT)
434
  {
435
    // (= (str.++ x_1 ... x_i x_{i + 1} ... x_n)
436
    //    (str.++ y_1 ... y_j y_{j + 1} ... y_m)) --->
437
    //  (and (= (str.++ x_1 ... x_i) (str.++ y_1 ... y_j))
438
    //       (= (str.++ x_{i + 1} ... x_n) (str.++ y_{j + 1} ... y_m)))
439
    //
440
    // if (str.len (str.++ x_1 ... x_i)) = (str.len (str.++ y_1 ... y_j))
441
    //
442
    // This rewrite performs length-based equality splitting: If we can show
443
    // that two prefixes have the same length, we can split an equality into
444
    // two equalities, one over the prefixes and another over the suffixes.
445
28
    std::vector<Node> v0, v1;
446
18
    utils::getConcat(node[0], v0);
447
18
    utils::getConcat(node[1], v1);
448
18
    size_t startRhs = 0;
449
70
    for (size_t i = 0, size0 = v0.size(); i <= size0; i++)
450
    {
451
112
      const std::vector<Node> pfxv0(v0.begin(), v0.begin() + i);
452
112
      Node pfx0 = utils::mkConcat(pfxv0, stype);
453
222
      for (size_t j = startRhs, size1 = v1.size(); j <= size1; j++)
454
      {
455
192
        if (!(i == 0 && j == 0) && !(i == v0.size() && j == v1.size()))
456
        {
457
298
          std::vector<Node> pfxv1(v1.begin(), v1.begin() + j);
458
298
          Node pfx1 = utils::mkConcat(pfxv1, stype);
459
298
          Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0);
460
298
          Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1);
461
462
164
          if (ArithEntail::checkEq(lenPfx0, lenPfx1))
463
          {
464
8
            std::vector<Node> sfxv0(v0.begin() + i, v0.end());
465
8
            std::vector<Node> sfxv1(v1.begin() + j, v1.end());
466
            Node ret = nm->mkNode(kind::AND,
467
8
                                  pfx0.eqNode(pfx1),
468
8
                                  utils::mkConcat(sfxv0, stype)
469
16
                                      .eqNode(utils::mkConcat(sfxv1, stype)));
470
4
            return returnRewrite(node, ret, Rewrite::SPLIT_EQ);
471
          }
472
160
          else if (ArithEntail::check(lenPfx1, lenPfx0, true))
473
          {
474
            // The prefix on the right-hand side is strictly longer than the
475
            // prefix on the left-hand side, so we try to strip the right-hand
476
            // prefix by the length of the left-hand prefix
477
            //
478
            // Example:
479
            // (= (str.++ "A" x y) (str.++ x "AB" z)) --->
480
            //   (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z)))
481
22
            std::vector<Node> rpfxv1;
482
22
            if (StringsEntail::stripSymbolicLength(
483
                    pfxv1, rpfxv1, 1, lenPfx0, true))
484
            {
485
              // The rewrite requires the full left-hand prefix length to be
486
              // stripped (otherwise we would have to keep parts of the
487
              // left-hand prefix).
488
              if (lenPfx0.isConst() && lenPfx0.getConst<Rational>().isZero())
489
              {
490
                std::vector<Node> sfxv0(v0.begin() + i, v0.end());
491
                pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end());
492
                Node ret =
493
                    nm->mkNode(kind::AND,
494
                               pfx0.eqNode(utils::mkConcat(rpfxv1, stype)),
495
                               utils::mkConcat(sfxv0, stype)
496
                                   .eqNode(utils::mkConcat(pfxv1, stype)));
497
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_R);
498
              }
499
            }
500
501
            // If the prefix of the right-hand side is (strictly) longer than
502
            // the prefix of the left-hand side, we can advance the left-hand
503
            // side (since the length of the right-hand side is only increasing
504
            // in the inner loop)
505
22
            break;
506
          }
507
138
          else if (ArithEntail::check(lenPfx0, lenPfx1, true))
508
          {
509
            // The prefix on the left-hand side is strictly longer than the
510
            // prefix on the right-hand side, so we try to strip the left-hand
511
            // prefix by the length of the right-hand prefix
512
            //
513
            // Example:
514
            // (= (str.++ x "AB" z) (str.++ "A" x y)) --->
515
            //   (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y))
516
56
            std::vector<Node> sfxv0 = pfxv0;
517
56
            std::vector<Node> rpfxv0;
518
30
            if (StringsEntail::stripSymbolicLength(
519
                    sfxv0, rpfxv0, 1, lenPfx1, true))
520
            {
521
              // The rewrite requires the full right-hand prefix length to be
522
              // stripped (otherwise we would have to keep parts of the
523
              // right-hand prefix).
524
6
              if (lenPfx1.isConst() && lenPfx1.getConst<Rational>().isZero())
525
              {
526
4
                sfxv0.insert(sfxv0.end(), v0.begin() + i, v0.end());
527
8
                std::vector<Node> sfxv1(v1.begin() + j, v1.end());
528
                Node ret =
529
                    nm->mkNode(kind::AND,
530
8
                               utils::mkConcat(rpfxv0, stype).eqNode(pfx1),
531
8
                               utils::mkConcat(sfxv0, stype)
532
16
                                   .eqNode(utils::mkConcat(sfxv1, stype)));
533
4
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_L);
534
              }
535
            }
536
537
            // If the prefix of the left-hand side is (strictly) longer than
538
            // the prefix of the right-hand side, then we don't need to check
539
            // that right-hand prefix for future left-hand prefixes anymore
540
            // (since they are increasing in length)
541
26
            startRhs = j + 1;
542
          }
543
        }
544
      }
545
    }
546
  }
547
548
79059
  return node;
549
}
550
551
20770
Node SequencesRewriter::rewriteArithEqualityExt(Node node)
552
{
553
20770
  Assert(node.getKind() == EQUAL && node[0].getType().isInteger());
554
555
  // cases where we can solve the equality
556
557
  // notice we cannot rewrite str.to.int(x)=n to x="n" due to leading zeroes.
558
559
20770
  return node;
560
}
561
562
222423
Node SequencesRewriter::rewriteLength(Node node)
563
{
564
222423
  Assert(node.getKind() == STRING_LENGTH);
565
222423
  NodeManager* nm = NodeManager::currentNM();
566
222423
  Kind nk0 = node[0].getKind();
567
222423
  if (node[0].isConst())
568
  {
569
13538
    Node retNode = nm->mkConst(Rational(Word::getLength(node[0])));
570
6769
    return returnRewrite(node, retNode, Rewrite::LEN_EVAL);
571
  }
572
215654
  else if (nk0 == kind::STRING_CONCAT)
573
  {
574
10879
    Node tmpNode = node[0];
575
10879
    if (tmpNode.getKind() == kind::STRING_CONCAT)
576
    {
577
21758
      std::vector<Node> node_vec;
578
42372
      for (unsigned int i = 0; i < tmpNode.getNumChildren(); ++i)
579
      {
580
31493
        if (tmpNode[i].isConst())
581
        {
582
7640
          node_vec.push_back(
583
15280
              nm->mkConst(Rational(Word::getLength(tmpNode[i]))));
584
        }
585
        else
586
        {
587
23853
          node_vec.push_back(NodeManager::currentNM()->mkNode(
588
              kind::STRING_LENGTH, tmpNode[i]));
589
        }
590
      }
591
21758
      Node retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec);
592
10879
      return returnRewrite(node, retNode, Rewrite::LEN_CONCAT);
593
    }
594
  }
595
204775
  else if (nk0 == STRING_REPLACE || nk0 == STRING_REPLACE_ALL)
596
  {
597
1901
    Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1]));
598
1901
    Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2]));
599
971
    if (len1 == len2)
600
    {
601
      // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x )
602
82
      Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
603
41
      return returnRewrite(node, retNode, Rewrite::LEN_REPL_INV);
604
930
    }
605
  }
606
203804
  else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER || nk0 == STRING_REV
607
203700
           || nk0 == STRING_UPDATE)
608
  {
609
    // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev.
610
    // len( update( x, n, y ) ) = len( x )
611
240
    Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
612
120
    return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV);
613
  }
614
203684
  else if (nk0 == SEQ_UNIT)
615
  {
616
244
    Node retNode = nm->mkConst(Rational(1));
617
122
    return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT);
618
  }
619
204492
  return node;
620
}
621
622
// TODO (#1180) add rewrite
623
//  str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) --->
624
//  str.substr( x, n1, n2+n3 )
625
57858
Node SequencesRewriter::rewriteConcat(Node node)
626
{
627
57858
  Assert(node.getKind() == kind::STRING_CONCAT);
628
115716
  Trace("strings-rewrite-debug")
629
57858
      << "Strings::rewriteConcat start " << node << std::endl;
630
115716
  std::vector<Node> node_vec;
631
115716
  Node preNode = Node::null();
632
249677
  for (Node tmpNode : node)
633
  {
634
191819
    if (tmpNode.getKind() == STRING_CONCAT)
635
    {
636
1289
      unsigned j = 0;
637
      // combine the first term with the previous constant if applicable
638
1289
      if (!preNode.isNull())
639
      {
640
315
        if (tmpNode[0].isConst())
641
        {
642
278
          std::vector<Node> wvec;
643
139
          wvec.push_back(preNode);
644
139
          wvec.push_back(tmpNode[0]);
645
139
          preNode = Word::mkWordFlatten(wvec);
646
139
          node_vec.push_back(preNode);
647
        }
648
        else
649
        {
650
176
          node_vec.push_back(preNode);
651
176
          node_vec.push_back(tmpNode[0]);
652
        }
653
315
        preNode = Node::null();
654
315
        ++j;
655
      }
656
      // insert the middle terms to node_vec
657
1289
      if (j <= tmpNode.getNumChildren() - 1)
658
      {
659
1289
        node_vec.insert(node_vec.end(), tmpNode.begin() + j, tmpNode.end() - 1);
660
      }
661
      // take the last term as the current
662
1289
      tmpNode = tmpNode[tmpNode.getNumChildren() - 1];
663
    }
664
191819
    if (!tmpNode.isConst())
665
    {
666
91496
      if (!preNode.isNull())
667
      {
668
17480
        if (preNode.isConst() && !Word::isEmpty(preNode))
669
        {
670
16731
          node_vec.push_back(preNode);
671
        }
672
17480
        preNode = Node::null();
673
      }
674
91496
      node_vec.push_back(tmpNode);
675
    }
676
    else
677
    {
678
100323
      if (preNode.isNull())
679
      {
680
49079
        preNode = tmpNode;
681
      }
682
      else
683
      {
684
102488
        std::vector<Node> vec;
685
51244
        vec.push_back(preNode);
686
51244
        vec.push_back(tmpNode);
687
51244
        preNode = Word::mkWordFlatten(vec);
688
      }
689
    }
690
  }
691
57858
  if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode)))
692
  {
693
30231
    node_vec.push_back(preNode);
694
  }
695
696
  // Sort adjacent operands in str.++ that all result in the same string or the
697
  // empty string.
698
  //
699
  // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) -->
700
  // (str.++ ... [sort those 3 arguments] ... )
701
57858
  size_t lastIdx = 0;
702
115716
  Node lastX;
703
198932
  for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++)
704
  {
705
282148
    Node s = StringsEntail::getStringOrEmpty(node_vec[i]);
706
141074
    bool nextX = false;
707
141074
    if (s != lastX)
708
    {
709
137666
      nextX = true;
710
    }
711
712
141074
    if (nextX)
713
    {
714
137666
      std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i);
715
137666
      lastX = s;
716
137666
      lastIdx = i;
717
    }
718
  }
719
57858
  std::sort(node_vec.begin() + lastIdx, node_vec.end());
720
721
115716
  TypeNode tn = node.getType();
722
115716
  Node retNode = utils::mkConcat(node_vec, tn);
723
115716
  Trace("strings-rewrite-debug")
724
57858
      << "Strings::rewriteConcat end " << retNode << std::endl;
725
57858
  if (retNode != node)
726
  {
727
19814
    return returnRewrite(node, retNode, Rewrite::CONCAT_NORM);
728
  }
729
38044
  return node;
730
}
731
732
2864
Node SequencesRewriter::rewriteConcatRegExp(TNode node)
733
{
734
2864
  Assert(node.getKind() == kind::REGEXP_CONCAT);
735
2864
  NodeManager* nm = NodeManager::currentNM();
736
5728
  Trace("strings-rewrite-debug")
737
2864
      << "Strings::rewriteConcatRegExp flatten " << node << std::endl;
738
5728
  Node retNode = node;
739
5728
  std::vector<Node> vec;
740
2864
  bool changed = false;
741
5728
  Node emptyRe;
742
743
  // get the string type that are members of this regular expression
744
5728
  TypeNode rtype = node.getType();
745
5728
  TypeNode stype;
746
2864
  if (rtype.isRegExp())
747
  {
748
    // standard regular expressions are for strings
749
2864
    stype = nm->stringType();
750
  }
751
  else
752
  {
753
    Unimplemented();
754
  }
755
756
10948
  for (const Node& c : node)
757
  {
758
8120
    if (c.getKind() == REGEXP_CONCAT)
759
    {
760
218
      changed = true;
761
698
      for (const Node& cc : c)
762
      {
763
480
        vec.push_back(cc);
764
      }
765
    }
766
19174
    else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst()
767
19094
             && Word::isEmpty(c[0]))
768
    {
769
128
      changed = true;
770
128
      emptyRe = c;
771
    }
772
7774
    else if (c.getKind() == REGEXP_EMPTY)
773
    {
774
      // re.++( ..., empty, ... ) ---> empty
775
72
      Node ret = nm->mkNode(REGEXP_EMPTY);
776
36
      return returnRewrite(node, ret, Rewrite::RE_CONCAT_EMPTY);
777
    }
778
    else
779
    {
780
7738
      vec.push_back(c);
781
    }
782
  }
783
2828
  if (changed)
784
  {
785
    // flatten
786
    // this handles nested re.++ and elimination or str.to.re(""), e.g.:
787
    // re.++( re.++( R1, R2 ), str.to.re(""), R3 ) ---> re.++( R1, R2, R3 )
788
328
    if (vec.empty())
789
    {
790
2
      Assert(!emptyRe.isNull());
791
2
      retNode = emptyRe;
792
    }
793
    else
794
    {
795
326
      retNode = vec.size() == 1 ? vec[0] : nm->mkNode(REGEXP_CONCAT, vec);
796
    }
797
328
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_FLATTEN);
798
  }
799
5000
  Trace("strings-rewrite-debug")
800
2500
      << "Strings::rewriteConcatRegExp start " << node << std::endl;
801
5000
  std::vector<Node> cvec;
802
  // the current accumulation of constant strings
803
5000
  std::vector<Node> preReStr;
804
  // whether the last component was (_)*
805
2500
  bool lastAllStar = false;
806
5000
  String emptyStr = String("");
807
  // this loop checks to see if components can be combined or dropped
808
12324
  for (unsigned i = 0, size = vec.size(); i <= size; i++)
809
  {
810
19648
    Node curr;
811
9824
    if (i < size)
812
    {
813
7324
      curr = vec[i];
814
7324
      Assert(curr.getKind() != REGEXP_CONCAT);
815
    }
816
    // update preReStr
817
9824
    if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP)
818
    {
819
3008
      lastAllStar = false;
820
3008
      preReStr.push_back(curr[0]);
821
3008
      curr = Node::null();
822
    }
823
6816
    else if (!preReStr.empty())
824
    {
825
2552
      Assert(!lastAllStar);
826
      // this groups consecutive strings a++b ---> ab
827
5104
      Node acc = nm->mkNode(STRING_TO_REGEXP, utils::mkConcat(preReStr, stype));
828
2552
      cvec.push_back(acc);
829
2552
      preReStr.clear();
830
    }
831
4264
    else if (!curr.isNull() && lastAllStar)
832
    {
833
      // if empty, drop it
834
      // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)*
835
324
      if (RegExpEntail::isConstRegExp(curr)
836
324
          && RegExpEntail::testConstStringInRegExp(emptyStr, 0, curr))
837
      {
838
44
        curr = Node::null();
839
      }
840
    }
841
9824
    if (!curr.isNull())
842
    {
843
4272
      lastAllStar = false;
844
4272
      if (curr.getKind() == REGEXP_STAR)
845
      {
846
        // we can group stars (a)* ++ (a)* ---> (a)*
847
2252
        if (!cvec.empty() && cvec.back() == curr)
848
        {
849
8
          curr = Node::null();
850
        }
851
2244
        else if (curr[0].getKind() == REGEXP_SIGMA)
852
        {
853
902
          Assert(!lastAllStar);
854
902
          lastAllStar = true;
855
          // go back and remove empty ones from back of cvec
856
          // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)*
857
2572
          while (!cvec.empty() && RegExpEntail::isConstRegExp(cvec.back())
858
3452
                 && RegExpEntail::testConstStringInRegExp(
859
722
                     emptyStr, 0, cvec.back()))
860
          {
861
8
            cvec.pop_back();
862
          }
863
        }
864
      }
865
    }
866
9824
    if (!curr.isNull())
867
    {
868
4264
      cvec.push_back(curr);
869
    }
870
  }
871
2500
  Assert(!cvec.empty());
872
2500
  retNode = utils::mkConcat(cvec, rtype);
873
2500
  if (retNode != node)
874
  {
875
    // handles all cases where consecutive re constants are combined or
876
    // dropped as described in the loop above.
877
198
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT);
878
  }
879
880
  // flipping adjacent star arguments
881
2302
  changed = false;
882
6356
  for (size_t i = 0, size = cvec.size() - 1; i < size; i++)
883
  {
884
4054
    if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1])
885
    {
886
      // by convention, flip the order (a*)++a ---> a++(a*)
887
64
      std::swap(cvec[i], cvec[i + 1]);
888
64
      changed = true;
889
    }
890
  }
891
2302
  if (changed)
892
  {
893
48
    retNode = utils::mkConcat(cvec, rtype);
894
48
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_OPT);
895
  }
896
2254
  return node;
897
}
898
899
1170
Node SequencesRewriter::rewriteStarRegExp(TNode node)
900
{
901
1170
  Assert(node.getKind() == REGEXP_STAR);
902
1170
  NodeManager* nm = NodeManager::currentNM();
903
2340
  Node retNode = node;
904
1170
  if (node[0].getKind() == REGEXP_STAR)
905
  {
906
    // ((R)*)* ---> R*
907
4
    return returnRewrite(node, node[0], Rewrite::RE_STAR_NESTED_STAR);
908
  }
909
3962
  else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst()
910
3902
           && Word::isEmpty(node[0][0]))
911
  {
912
    // ("")* ---> ""
913
18
    return returnRewrite(node, node[0], Rewrite::RE_STAR_EMPTY_STRING);
914
  }
915
1148
  else if (node[0].getKind() == REGEXP_EMPTY)
916
  {
917
    // (empty)* ---> ""
918
8
    retNode = nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")));
919
8
    return returnRewrite(node, retNode, Rewrite::RE_STAR_EMPTY);
920
  }
921
1140
  else if (node[0].getKind() == REGEXP_UNION)
922
  {
923
    // simplification of unions under star
924
256
    if (RegExpEntail::hasEpsilonNode(node[0]))
925
    {
926
38
      bool changed = false;
927
38
      std::vector<Node> node_vec;
928
120
      for (const Node& nc : node[0])
929
      {
930
240
        if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst()
931
236
            && Word::isEmpty(nc[0]))
932
        {
933
          // can be removed
934
38
          changed = true;
935
        }
936
        else
937
        {
938
44
          node_vec.push_back(nc);
939
        }
940
      }
941
38
      if (changed)
942
      {
943
38
        retNode = node_vec.size() == 1 ? node_vec[0]
944
                                       : nm->mkNode(REGEXP_UNION, node_vec);
945
38
        retNode = nm->mkNode(REGEXP_STAR, retNode);
946
        // simplification of union beneath star based on loop above
947
        // for example, ( "" | "a" )* ---> ("a")*
948
38
        return returnRewrite(node, retNode, Rewrite::RE_STAR_UNION);
949
      }
950
    }
951
  }
952
1102
  return node;
953
}
954
955
1080
Node SequencesRewriter::rewriteAndOrRegExp(TNode node)
956
{
957
1080
  Kind nk = node.getKind();
958
1080
  Assert(nk == REGEXP_UNION || nk == REGEXP_INTER);
959
2160
  Trace("strings-rewrite-debug")
960
1080
      << "Strings::rewriteAndOrRegExp start " << node << std::endl;
961
2160
  std::vector<Node> node_vec;
962
2160
  std::vector<Node> polRegExp[2];
963
3463
  for (const Node& ni : node)
964
  {
965
2385
    if (ni.getKind() == nk)
966
    {
967
42
      for (const Node& nic : ni)
968
      {
969
28
        if (std::find(node_vec.begin(), node_vec.end(), nic) == node_vec.end())
970
        {
971
26
          node_vec.push_back(nic);
972
        }
973
      }
974
    }
975
2371
    else if (ni.getKind() == REGEXP_EMPTY)
976
    {
977
65
      if (nk == REGEXP_INTER)
978
      {
979
2
        return returnRewrite(node, ni, Rewrite::RE_AND_EMPTY);
980
      }
981
      // otherwise, can ignore
982
    }
983
2306
    else if (ni.getKind() == REGEXP_STAR && ni[0].getKind() == REGEXP_SIGMA)
984
    {
985
8
      if (nk == REGEXP_UNION)
986
      {
987
        return returnRewrite(node, ni, Rewrite::RE_OR_ALL);
988
      }
989
      // otherwise, can ignore
990
    }
991
2298
    else if (std::find(node_vec.begin(), node_vec.end(), ni) == node_vec.end())
992
    {
993
2259
      node_vec.push_back(ni);
994
2259
      uint32_t pindex = ni.getKind() == REGEXP_COMPLEMENT ? 1 : 0;
995
4518
      Node nia = pindex == 1 ? ni[0] : ni;
996
2259
      polRegExp[pindex].push_back(nia);
997
    }
998
  }
999
1078
  NodeManager* nm = NodeManager::currentNM();
1000
  // use inclusion tests
1001
1274
  for (const Node& negMem : polRegExp[1])
1002
  {
1003
346
    for (const Node& posMem : polRegExp[0])
1004
    {
1005
284
      Node m1 = nk == REGEXP_INTER ? negMem : posMem;
1006
284
      Node m2 = nk == REGEXP_INTER ? posMem : negMem;
1007
      // inclusion test for conflicting case m1 contains m2
1008
      // (re.inter (re.comp R1) R2) --> re.none where R1 includes R2
1009
      // (re.union R1 (re.comp R2)) --> (re.* re.allchar) where R1 includes R2
1010
150
      if (RegExpEntail::regExpIncludes(m1, m2))
1011
      {
1012
32
        Node retNode;
1013
16
        if (nk == REGEXP_INTER)
1014
        {
1015
16
          retNode = nm->mkNode(kind::REGEXP_EMPTY);
1016
        }
1017
        else
1018
        {
1019
          retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1020
        }
1021
16
        return returnRewrite(node, retNode, Rewrite::RE_ANDOR_INC_CONFLICT);
1022
      }
1023
    }
1024
  }
1025
2124
  Node retNode;
1026
1062
  if (node_vec.empty())
1027
  {
1028
    if (nk == REGEXP_INTER)
1029
    {
1030
      retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
1031
    }
1032
    else
1033
    {
1034
      retNode = nm->mkNode(kind::REGEXP_EMPTY);
1035
    }
1036
  }
1037
  else
1038
  {
1039
1062
    retNode = node_vec.size() == 1 ? node_vec[0] : nm->mkNode(nk, node_vec);
1040
  }
1041
1062
  if (retNode != node)
1042
  {
1043
    // flattening and removing children, based on loop above
1044
104
    return returnRewrite(node, retNode, Rewrite::RE_ANDOR_FLATTEN);
1045
  }
1046
958
  return node;
1047
}
1048
1049
24
Node SequencesRewriter::rewriteLoopRegExp(TNode node)
1050
{
1051
24
  Assert(node.getKind() == REGEXP_LOOP);
1052
48
  Node retNode = node;
1053
48
  Node r = node[0];
1054
24
  if (r.getKind() == REGEXP_STAR)
1055
  {
1056
    return returnRewrite(node, r, Rewrite::RE_LOOP_STAR);
1057
  }
1058
24
  NodeManager* nm = NodeManager::currentNM();
1059
48
  cvc5::Rational rMaxInt(String::maxSize());
1060
24
  uint32_t l = utils::getLoopMinOccurrences(node);
1061
48
  std::vector<Node> vec_nodes;
1062
110
  for (unsigned i = 0; i < l; i++)
1063
  {
1064
86
    vec_nodes.push_back(r);
1065
  }
1066
  Node n =
1067
24
      vec_nodes.size() == 0
1068
24
          ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")))
1069
48
          : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes);
1070
24
  uint32_t u = utils::getLoopMaxOccurrences(node);
1071
24
  if (u < l)
1072
  {
1073
8
    std::vector<Node> nvec;
1074
4
    retNode = nm->mkNode(REGEXP_EMPTY, nvec);
1075
  }
1076
20
  else if (u == l)
1077
  {
1078
10
    retNode = n;
1079
  }
1080
  else
1081
  {
1082
20
    std::vector<Node> vec2;
1083
10
    vec2.push_back(n);
1084
20
    TypeNode rtype = nm->regExpType();
1085
52
    for (uint32_t j = l; j < u; j++)
1086
    {
1087
42
      vec_nodes.push_back(r);
1088
42
      n = utils::mkConcat(vec_nodes, rtype);
1089
42
      vec2.push_back(n);
1090
    }
1091
10
    retNode = nm->mkNode(REGEXP_UNION, vec2);
1092
  }
1093
48
  Trace("strings-lp") << "Strings::lp " << node << " => " << retNode
1094
24
                      << std::endl;
1095
24
  if (retNode != node)
1096
  {
1097
24
    return returnRewrite(node, retNode, Rewrite::RE_LOOP);
1098
  }
1099
  return node;
1100
}
1101
1102
4
Node SequencesRewriter::rewriteRepeatRegExp(TNode node)
1103
{
1104
4
  Assert(node.getKind() == REGEXP_REPEAT);
1105
4
  NodeManager* nm = NodeManager::currentNM();
1106
  // ((_ re.^ n) R) --> ((_ re.loop n n) R)
1107
4
  unsigned r = utils::getRepeatAmount(node);
1108
8
  Node lop = nm->mkConst(RegExpLoop(r, r));
1109
8
  Node retNode = nm->mkNode(REGEXP_LOOP, lop, node[0]);
1110
8
  return returnRewrite(node, retNode, Rewrite::RE_REPEAT_ELIM);
1111
}
1112
1113
31
Node SequencesRewriter::rewriteOptionRegExp(TNode node)
1114
{
1115
31
  Assert(node.getKind() == REGEXP_OPT);
1116
31
  NodeManager* nm = NodeManager::currentNM();
1117
  Node retNode =
1118
      nm->mkNode(REGEXP_UNION,
1119
62
                 nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))),
1120
124
                 node[0]);
1121
62
  return returnRewrite(node, retNode, Rewrite::RE_OPT_ELIM);
1122
}
1123
1124
45
Node SequencesRewriter::rewritePlusRegExp(TNode node)
1125
{
1126
45
  Assert(node.getKind() == REGEXP_PLUS);
1127
45
  NodeManager* nm = NodeManager::currentNM();
1128
  Node retNode =
1129
90
      nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0]));
1130
90
  return returnRewrite(node, retNode, Rewrite::RE_PLUS_ELIM);
1131
}
1132
1133
55
Node SequencesRewriter::rewriteDifferenceRegExp(TNode node)
1134
{
1135
55
  Assert(node.getKind() == REGEXP_DIFF);
1136
55
  NodeManager* nm = NodeManager::currentNM();
1137
  Node retNode =
1138
110
      nm->mkNode(REGEXP_INTER, node[0], nm->mkNode(REGEXP_COMPLEMENT, node[1]));
1139
110
  return returnRewrite(node, retNode, Rewrite::RE_DIFF_ELIM);
1140
}
1141
1142
376
Node SequencesRewriter::rewriteRangeRegExp(TNode node)
1143
{
1144
376
  Assert(node.getKind() == REGEXP_RANGE);
1145
  unsigned ch[2];
1146
1128
  for (size_t i = 0; i < 2; ++i)
1147
  {
1148
752
    if (!node[i].isConst() || node[i].getConst<String>().size() != 1)
1149
    {
1150
      // not applied to characters, it is not handled
1151
      return node;
1152
    }
1153
752
    ch[i] = node[i].getConst<String>().front();
1154
  }
1155
1156
376
  NodeManager* nm = NodeManager::currentNM();
1157
376
  if (node[0] == node[1])
1158
  {
1159
    Node retNode = nm->mkNode(STRING_TO_REGEXP, node[0]);
1160
    // re.range( "A", "A" ) ---> str.to_re( "A" )
1161
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_SINGLE);
1162
  }
1163
1164
376
  if (ch[0] > ch[1])
1165
  {
1166
    // re.range( "B", "A" ) ---> re.none
1167
4
    Node retNode = nm->mkNode(REGEXP_EMPTY);
1168
2
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_EMPTY);
1169
  }
1170
374
  return node;
1171
}
1172
1173
11572
Node SequencesRewriter::rewriteMembership(TNode node)
1174
{
1175
11572
  NodeManager* nm = NodeManager::currentNM();
1176
23144
  Node x = node[0];
1177
23144
  Node r = node[1];
1178
1179
23144
  TypeNode stype = x.getType();
1180
23144
  TypeNode rtype = r.getType();
1181
1182
11572
  if(r.getKind() == kind::REGEXP_EMPTY)
1183
  {
1184
132
    Node retNode = NodeManager::currentNM()->mkConst(false);
1185
66
    return returnRewrite(node, retNode, Rewrite::RE_IN_EMPTY);
1186
  }
1187
11506
  else if (x.isConst() && RegExpEntail::isConstRegExp(r))
1188
  {
1189
    // test whether x in node[1]
1190
3882
    cvc5::String s = x.getConst<String>();
1191
1941
    bool test = RegExpEntail::testConstStringInRegExp(s, 0, r);
1192
3882
    Node retNode = NodeManager::currentNM()->mkConst(test);
1193
1941
    return returnRewrite(node, retNode, Rewrite::RE_IN_EVAL);
1194
  }
1195
9565
  else if (r.getKind() == kind::REGEXP_SIGMA)
1196
  {
1197
124
    Node one = nm->mkConst(Rational(1));
1198
124
    Node retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x));
1199
62
    return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA);
1200
  }
1201
9503
  else if (r.getKind() == kind::REGEXP_STAR)
1202
  {
1203
3145
    if (x.isConst())
1204
    {
1205
40
      size_t xlen = Word::getLength(x);
1206
40
      if (xlen == 0)
1207
      {
1208
36
        Node retNode = nm->mkConst(true);
1209
        // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true
1210
18
        return returnRewrite(node, retNode, Rewrite::RE_EMPTY_IN_STR_STAR);
1211
      }
1212
22
      else if (xlen == 1)
1213
      {
1214
22
        if (r[0].getKind() == STRING_TO_REGEXP)
1215
        {
1216
4
          Node retNode = r[0][0].eqNode(x);
1217
          // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x
1218
2
          return returnRewrite(node, retNode, Rewrite::RE_CHAR_IN_STR_STAR);
1219
        }
1220
      }
1221
    }
1222
3105
    else if (x.getKind() == STRING_CONCAT)
1223
    {
1224
      // (str.in.re (str.++ x1 ... xn) (re.* R)) -->
1225
      //   (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R))
1226
      //     if the length of all strings in R is one.
1227
1193
      Node flr = RegExpEntail::getFixedLengthForRegexp(r[0]);
1228
692
      if (!flr.isNull())
1229
      {
1230
261
        Node one = nm->mkConst(Rational(1));
1231
226
        if (flr == one)
1232
        {
1233
382
          NodeBuilder nb(AND);
1234
671
          for (const Node& xc : x)
1235
          {
1236
480
            nb << nm->mkNode(STRING_IN_REGEXP, xc, r);
1237
          }
1238
          return returnRewrite(
1239
191
              node, nb.constructNode(), Rewrite::RE_IN_DIST_CHAR_STAR);
1240
        }
1241
      }
1242
    }
1243
2934
    if (r[0].getKind() == kind::REGEXP_SIGMA)
1244
    {
1245
202
      Node retNode = NodeManager::currentNM()->mkConst(true);
1246
101
      return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA_STAR);
1247
    }
1248
  }
1249
6358
  else if (r.getKind() == kind::REGEXP_CONCAT)
1250
  {
1251
4445
    bool allSigma = true;
1252
4445
    bool allSigmaStrict = true;
1253
4445
    unsigned allSigmaMinSize = 0;
1254
8716
    Node constStr;
1255
4445
    size_t constIdx = 0;
1256
4445
    size_t nchildren = r.getNumChildren();
1257
10136
    for (size_t i = 0; i < nchildren; i++)
1258
    {
1259
15112
      Node rc = r[i];
1260
9421
      Assert(rc.getKind() != kind::REGEXP_EMPTY);
1261
9421
      if (rc.getKind() == kind::REGEXP_SIGMA)
1262
      {
1263
982
        allSigmaMinSize++;
1264
      }
1265
8439
      else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA)
1266
      {
1267
2093
        allSigmaStrict = false;
1268
      }
1269
6346
      else if (rc.getKind() == STRING_TO_REGEXP)
1270
      {
1271
3584
        if (constStr.isNull())
1272
        {
1273
2616
          constStr = rc[0];
1274
2616
          constIdx = i;
1275
        }
1276
        else
1277
        {
1278
968
          allSigma = false;
1279
968
          break;
1280
        }
1281
      }
1282
      else
1283
      {
1284
2762
        allSigma = false;
1285
2762
        break;
1286
      }
1287
    }
1288
4445
    if (allSigma)
1289
    {
1290
715
      if (constStr.isNull())
1291
      {
1292
        // x in re.++(_*, _, _) ---> str.len(x) >= 2
1293
40
        Node num = nm->mkConst(Rational(allSigmaMinSize));
1294
40
        Node lenx = nm->mkNode(STRING_LENGTH, x);
1295
40
        Node retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num);
1296
20
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_PURE_ALLCHAR);
1297
      }
1298
695
      else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0
1299
154
               && constIdx != nchildren - 1)
1300
      {
1301
        // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc")
1302
308
        Node retNode = nm->mkNode(STRING_CONTAINS, x, constStr);
1303
154
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_TO_CONTAINS);
1304
      }
1305
    }
1306
  }
1307
3826
  else if (r.getKind() == kind::REGEXP_INTER
1308
1913
           || r.getKind() == kind::REGEXP_UNION)
1309
  {
1310
924
    std::vector<Node> mvec;
1311
1462
    for (unsigned i = 0; i < r.getNumChildren(); i++)
1312
    {
1313
1000
      mvec.push_back(
1314
2000
          NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r[i]));
1315
    }
1316
    Node retNode = NodeManager::currentNM()->mkNode(
1317
924
        r.getKind() == kind::REGEXP_INTER ? kind::AND : kind::OR, mvec);
1318
462
    return returnRewrite(node, retNode, Rewrite::RE_IN_ANDOR);
1319
  }
1320
1451
  else if (r.getKind() == kind::STRING_TO_REGEXP)
1321
  {
1322
1724
    Node retNode = x.eqNode(r[0]);
1323
862
    return returnRewrite(node, retNode, Rewrite::RE_IN_CSTRING);
1324
  }
1325
589
  else if (r.getKind() == REGEXP_RANGE)
1326
  {
1327
    // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j
1328
    // we do not do this if the arguments are not constant
1329
354
    if (RegExpEntail::isConstRegExp(r))
1330
    {
1331
708
      Node xcode = nm->mkNode(STRING_TO_CODE, x);
1332
      Node retNode =
1333
          nm->mkNode(AND,
1334
708
                     nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode),
1335
1416
                     nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1])));
1336
354
      return returnRewrite(node, retNode, Rewrite::RE_IN_RANGE);
1337
    }
1338
  }
1339
235
  else if (r.getKind() == REGEXP_COMPLEMENT)
1340
  {
1341
410
    Node retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate();
1342
205
    return returnRewrite(node, retNode, Rewrite::RE_IN_COMPLEMENT);
1343
  }
1344
1345
  // do simple consumes
1346
14268
  Node retNode = node;
1347
7134
  if (r.getKind() == kind::REGEXP_STAR)
1348
  {
1349
8322
    for (unsigned dir = 0; dir <= 1; dir++)
1350
    {
1351
11143
      std::vector<Node> mchildren;
1352
5654
      utils::getConcat(x, mchildren);
1353
5654
      bool success = true;
1354
17208
      while (success)
1355
      {
1356
5800
        success = false;
1357
11577
        std::vector<Node> children;
1358
5800
        utils::getConcat(r[0], children);
1359
11577
        Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children, dir);
1360
5800
        if (!scn.isNull())
1361
        {
1362
46
          Trace("regexp-ext-rewrite")
1363
23
              << "Regexp star : const conflict : " << node << std::endl;
1364
23
          return returnRewrite(node, scn, Rewrite::RE_CONSUME_S_CCONF);
1365
        }
1366
5777
        else if (children.empty())
1367
        {
1368
          // fully consumed one copy of the STAR
1369
154
          if (mchildren.empty())
1370
          {
1371
            Trace("regexp-ext-rewrite")
1372
                << "Regexp star : full consume : " << node << std::endl;
1373
            Node ret = NodeManager::currentNM()->mkConst(true);
1374
            return returnRewrite(node, ret, Rewrite::RE_CONSUME_S_FULL);
1375
          }
1376
          else
1377
          {
1378
308
            Node prev = retNode;
1379
462
            retNode = nm->mkNode(
1380
308
                STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r);
1381
            // Iterate again if the node changed. It may not have changed if
1382
            // nothing was consumed from mchildren (e.g. if the body of the
1383
            // re.* accepts the empty string.
1384
154
            success = (retNode != prev);
1385
          }
1386
        }
1387
      }
1388
5631
      if (retNode != node)
1389
      {
1390
284
        Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node
1391
142
                                    << " -> " << retNode << std::endl;
1392
142
        return returnRewrite(node, retNode, Rewrite::RE_CONSUME_S);
1393
      }
1394
    }
1395
  }
1396
  else
1397
  {
1398
7899
    std::vector<Node> children;
1399
4301
    utils::getConcat(r, children);
1400
7899
    std::vector<Node> mchildren;
1401
4301
    utils::getConcat(x, mchildren);
1402
4301
    unsigned prevSize = children.size() + mchildren.size();
1403
7899
    Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children);
1404
4301
    if (!scn.isNull())
1405
    {
1406
306
      Trace("regexp-ext-rewrite")
1407
153
          << "Regexp : const conflict : " << node << std::endl;
1408
153
      return returnRewrite(node, scn, Rewrite::RE_CONSUME_CCONF);
1409
    }
1410
4148
    else if ((children.size() + mchildren.size()) != prevSize)
1411
    {
1412
      // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm),
1413
      // above, we strip components to construct an equivalent membership:
1414
      // (str.++ xi .. xj) in (re.++ rk ... rl).
1415
1100
      Node xn = utils::mkConcat(mchildren, stype);
1416
1100
      Node emptyStr = Word::mkEmptyWord(stype);
1417
550
      if (children.empty())
1418
      {
1419
        // If we stripped all components on the right, then the left is
1420
        // equal to the empty string.
1421
        // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "")
1422
66
        retNode = xn.eqNode(emptyStr);
1423
      }
1424
      else
1425
      {
1426
        // otherwise, construct the updated regular expression
1427
484
        retNode =
1428
968
            nm->mkNode(STRING_IN_REGEXP, xn, utils::mkConcat(children, rtype));
1429
      }
1430
1100
      Trace("regexp-ext-rewrite")
1431
550
          << "Regexp : rewrite : " << node << " -> " << retNode << std::endl;
1432
550
      return returnRewrite(node, retNode, Rewrite::RE_SIMPLE_CONSUME);
1433
    }
1434
  }
1435
6266
  return node;
1436
}
1437
1438
530658
RewriteResponse SequencesRewriter::postRewrite(TNode node)
1439
{
1440
1061316
  Trace("sequences-postrewrite")
1441
530658
      << "Strings::SequencesRewriter::postRewrite start " << node << std::endl;
1442
1061316
  Node retNode = node;
1443
530658
  Kind nk = node.getKind();
1444
530658
  if (nk == kind::STRING_CONCAT)
1445
  {
1446
57858
    retNode = rewriteConcat(node);
1447
  }
1448
472800
  else if (nk == kind::EQUAL)
1449
  {
1450
119568
    retNode = rewriteEquality(node);
1451
  }
1452
353232
  else if (nk == kind::STRING_LENGTH)
1453
  {
1454
222423
    retNode = rewriteLength(node);
1455
  }
1456
130809
  else if (nk == kind::STRING_CHARAT)
1457
  {
1458
107
    retNode = rewriteCharAt(node);
1459
  }
1460
130702
  else if (nk == kind::STRING_SUBSTR)
1461
  {
1462
45866
    retNode = rewriteSubstr(node);
1463
  }
1464
84836
  else if (nk == kind::STRING_UPDATE)
1465
  {
1466
180
    retNode = rewriteUpdate(node);
1467
  }
1468
84656
  else if (nk == kind::STRING_CONTAINS)
1469
  {
1470
26985
    retNode = rewriteContains(node);
1471
  }
1472
57671
  else if (nk == kind::STRING_INDEXOF)
1473
  {
1474
5962
    retNode = rewriteIndexof(node);
1475
  }
1476
51709
  else if (nk == kind::STRING_INDEXOF_RE)
1477
  {
1478
937
    retNode = rewriteIndexofRe(node);
1479
  }
1480
50772
  else if (nk == kind::STRING_REPLACE)
1481
  {
1482
3356
    retNode = rewriteReplace(node);
1483
  }
1484
47416
  else if (nk == kind::STRING_REPLACE_ALL)
1485
  {
1486
198
    retNode = rewriteReplaceAll(node);
1487
  }
1488
47218
  else if (nk == kind::STRING_REPLACE_RE)
1489
  {
1490
403
    retNode = rewriteReplaceRe(node);
1491
  }
1492
46815
  else if (nk == kind::STRING_REPLACE_RE_ALL)
1493
  {
1494
261
    retNode = rewriteReplaceReAll(node);
1495
  }
1496
46554
  else if (nk == STRING_REV)
1497
  {
1498
243
    retNode = rewriteStrReverse(node);
1499
  }
1500
46311
  else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX)
1501
  {
1502
153
    retNode = rewritePrefixSuffix(node);
1503
  }
1504
46158
  else if (nk == kind::STRING_IN_REGEXP)
1505
  {
1506
11572
    retNode = rewriteMembership(node);
1507
  }
1508
34586
  else if (nk == REGEXP_CONCAT)
1509
  {
1510
2864
    retNode = rewriteConcatRegExp(node);
1511
  }
1512
31722
  else if (nk == REGEXP_UNION || nk == REGEXP_INTER)
1513
  {
1514
1080
    retNode = rewriteAndOrRegExp(node);
1515
  }
1516
30642
  else if (nk == REGEXP_DIFF)
1517
  {
1518
55
    retNode = rewriteDifferenceRegExp(node);
1519
  }
1520
30587
  else if (nk == REGEXP_STAR)
1521
  {
1522
1170
    retNode = rewriteStarRegExp(node);
1523
  }
1524
29417
  else if (nk == REGEXP_PLUS)
1525
  {
1526
45
    retNode = rewritePlusRegExp(node);
1527
  }
1528
29372
  else if (nk == REGEXP_OPT)
1529
  {
1530
31
    retNode = rewriteOptionRegExp(node);
1531
  }
1532
29341
  else if (nk == REGEXP_RANGE)
1533
  {
1534
376
    retNode = rewriteRangeRegExp(node);
1535
  }
1536
28965
  else if (nk == REGEXP_LOOP)
1537
  {
1538
24
    retNode = rewriteLoopRegExp(node);
1539
  }
1540
28941
  else if (nk == REGEXP_REPEAT)
1541
  {
1542
4
    retNode = rewriteRepeatRegExp(node);
1543
  }
1544
28937
  else if (nk == SEQ_UNIT)
1545
  {
1546
403
    retNode = rewriteSeqUnit(node);
1547
  }
1548
28534
  else if (nk == SEQ_NTH || nk == SEQ_NTH_TOTAL)
1549
  {
1550
388
    retNode = rewriteSeqNth(node);
1551
  }
1552
1553
1061316
  Trace("sequences-postrewrite")
1554
530658
      << "Strings::SequencesRewriter::postRewrite returning " << retNode
1555
530658
      << std::endl;
1556
530658
  if (node != retNode)
1557
  {
1558
195184
    Trace("strings-rewrite-debug") << "Strings::SequencesRewriter::postRewrite "
1559
97592
                                   << node << " to " << retNode << std::endl;
1560
97592
    return RewriteResponse(REWRITE_AGAIN_FULL, retNode);
1561
  }
1562
433066
  Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
1563
433066
  return RewriteResponse(REWRITE_DONE, retNode);
1564
}
1565
1566
327788
RewriteResponse SequencesRewriter::preRewrite(TNode node)
1567
{
1568
327788
  return RewriteResponse(REWRITE_DONE, node);
1569
}
1570
1571
687
TrustNode SequencesRewriter::expandDefinition(Node node)
1572
{
1573
1374
  Trace("strings-exp-def") << "SequencesRewriter::expandDefinition : " << node
1574
687
                           << std::endl;
1575
1576
687
  if (node.getKind() == kind::SEQ_NTH)
1577
  {
1578
2
    NodeManager* nm = NodeManager::currentNM();
1579
4
    Node s = node[0];
1580
4
    Node n = node[1];
1581
    // seq.nth(s, n) --> ite(0 <= n < len(s), seq.nth_total(s,n), Uf(s, n))
1582
    Node cond = nm->mkNode(AND,
1583
4
                           nm->mkNode(LEQ, nm->mkConst(Rational(0)), n),
1584
8
                           nm->mkNode(LT, n, nm->mkNode(STRING_LENGTH, s)));
1585
4
    Node ss = nm->mkNode(SEQ_NTH_TOTAL, s, n);
1586
4
    Node uf = SkolemCache::mkSkolemSeqNth(s.getType(), "Uf");
1587
4
    Node u = nm->mkNode(APPLY_UF, uf, s, n);
1588
4
    Node ret = nm->mkNode(ITE, cond, ss, u);
1589
2
    Trace("strings-exp-def") << "...return " << ret << std::endl;
1590
2
    return TrustNode::mkTrustRewrite(node, ret, nullptr);
1591
  }
1592
685
  return TrustNode::null();
1593
}
1594
1595
388
Node SequencesRewriter::rewriteSeqNth(Node node)
1596
{
1597
388
  Assert(node.getKind() == SEQ_NTH || node.getKind() == SEQ_NTH_TOTAL);
1598
776
  Node s = node[0];
1599
776
  Node i = node[1];
1600
388
  if (s.isConst() && i.isConst())
1601
  {
1602
28
    size_t len = Word::getLength(s);
1603
28
    if (i.getConst<Rational>().sgn() != -1)
1604
    {
1605
28
      size_t pos = i.getConst<Rational>().getNumerator().toUnsignedInt();
1606
28
      if (pos < len)
1607
      {
1608
20
        std::vector<Node> elements = s.getConst<Sequence>().getVec();
1609
10
        const Node& ret = elements[pos];
1610
10
        return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL);
1611
      }
1612
    }
1613
18
    if (node.getKind() == SEQ_NTH_TOTAL)
1614
    {
1615
      // return arbitrary term
1616
4
      Node ret = s.getType().getSequenceElementType().mkGroundValue();
1617
2
      return returnRewrite(node, ret, Rewrite::SEQ_NTH_TOTAL_OOB);
1618
    }
1619
    else
1620
    {
1621
16
      return node;
1622
    }
1623
  }
1624
  else
1625
  {
1626
360
    return node;
1627
  }
1628
}
1629
1630
107
Node SequencesRewriter::rewriteCharAt(Node node)
1631
{
1632
107
  Assert(node.getKind() == STRING_CHARAT);
1633
107
  NodeManager* nm = NodeManager::currentNM();
1634
214
  Node one = nm->mkConst(Rational(1));
1635
214
  Node retNode = nm->mkNode(STRING_SUBSTR, node[0], node[1], one);
1636
214
  return returnRewrite(node, retNode, Rewrite::CHARAT_ELIM);
1637
}
1638
1639
45878
Node SequencesRewriter::rewriteSubstr(Node node)
1640
{
1641
45878
  Assert(node.getKind() == kind::STRING_SUBSTR);
1642
1643
45878
  NodeManager* nm = NodeManager::currentNM();
1644
45878
  if (node[0].isConst())
1645
  {
1646
10951
    if (Word::isEmpty(node[0]))
1647
    {
1648
1888
      Node ret = node[0];
1649
944
      return returnRewrite(node, ret, Rewrite::SS_EMPTYSTR);
1650
    }
1651
    // rewriting for constant arguments
1652
10007
    if (node[1].isConst() && node[2].isConst())
1653
    {
1654
10016
      Node s = node[0];
1655
10016
      cvc5::Rational rMaxInt(String::maxSize());
1656
      uint32_t start;
1657
5008
      if (node[1].getConst<Rational>() > rMaxInt)
1658
      {
1659
        // start beyond the maximum size of strings
1660
        // thus, it must be beyond the end point of this string
1661
8
        Node ret = Word::mkEmptyWord(node.getType());
1662
4
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_MAX_OOB);
1663
      }
1664
5004
      else if (node[1].getConst<Rational>().sgn() < 0)
1665
      {
1666
        // start before the beginning of the string
1667
1018
        Node ret = Word::mkEmptyWord(node.getType());
1668
509
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_NEG);
1669
      }
1670
      else
1671
      {
1672
4495
        start = node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1673
4495
        if (start >= Word::getLength(node[0]))
1674
        {
1675
          // start beyond the end of the string
1676
766
          Node ret = Word::mkEmptyWord(node.getType());
1677
383
          return returnRewrite(node, ret, Rewrite::SS_CONST_START_OOB);
1678
        }
1679
      }
1680
4112
      if (node[2].getConst<Rational>() > rMaxInt)
1681
      {
1682
        // take up to the end of the string
1683
1
        size_t lenS = Word::getLength(s);
1684
2
        Node ret = Word::suffix(s, lenS - start);
1685
1
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_MAX_OOB);
1686
      }
1687
4111
      else if (node[2].getConst<Rational>().sgn() <= 0)
1688
      {
1689
738
        Node ret = Word::mkEmptyWord(node.getType());
1690
369
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_NON_POS);
1691
      }
1692
      else
1693
      {
1694
        uint32_t len =
1695
3742
            node[2].getConst<Rational>().getNumerator().toUnsignedInt();
1696
3742
        if (start + len > Word::getLength(node[0]))
1697
        {
1698
          // take up to the end of the string
1699
325
          size_t lenS = Word::getLength(s);
1700
650
          Node ret = Word::suffix(s, lenS - start);
1701
325
          return returnRewrite(node, ret, Rewrite::SS_CONST_END_OOB);
1702
        }
1703
        else
1704
        {
1705
          // compute the substr using the constant string
1706
6834
          Node ret = Word::substr(s, start, len);
1707
3417
          return returnRewrite(node, ret, Rewrite::SS_CONST_SS);
1708
        }
1709
      }
1710
    }
1711
  }
1712
79852
  Node zero = nm->mkConst(cvc5::Rational(0));
1713
1714
  // if entailed non-positive length or negative start point
1715
39926
  if (ArithEntail::check(zero, node[1], true))
1716
  {
1717
82
    Node ret = Word::mkEmptyWord(node.getType());
1718
41
    return returnRewrite(node, ret, Rewrite::SS_START_NEG);
1719
  }
1720
39885
  else if (ArithEntail::check(zero, node[2]))
1721
  {
1722
1570
    Node ret = Word::mkEmptyWord(node.getType());
1723
785
    return returnRewrite(node, ret, Rewrite::SS_LEN_NON_POS);
1724
  }
1725
1726
39100
  if (node[0].getKind() == STRING_SUBSTR)
1727
  {
1728
    // (str.substr (str.substr x a b) c d) ---> "" if c >= b
1729
    //
1730
    // Note that this rewrite can be generalized to:
1731
    //
1732
    // (str.substr x a b) ---> "" if a >= (str.len x)
1733
    //
1734
    // This can be done when we generalize our entailment methods to
1735
    // accept an optional context. Then we could conjecture that
1736
    // (str.substr x a b) rewrites to "" and do a case analysis:
1737
    //
1738
    // - a < 0 or b < 0 (the result is trivially empty in these cases)
1739
    // - a >= (str.len x) assuming that { a >= 0, b >= 0 }
1740
    //
1741
    // For example, for (str.substr (str.substr x a a) a a), we could
1742
    // then deduce that under those assumptions, "a" is an
1743
    // over-approximation of the length of (str.substr x a a), which
1744
    // then allows us to reason that the result of the whole term must
1745
    // be empty.
1746
3767
    if (ArithEntail::check(node[1], node[0][2]))
1747
    {
1748
68
      Node ret = Word::mkEmptyWord(node.getType());
1749
34
      return returnRewrite(node, ret, Rewrite::SS_START_GEQ_LEN);
1750
    }
1751
  }
1752
35333
  else if (node[0].getKind() == STRING_REPLACE)
1753
  {
1754
    // (str.substr (str.replace x y z) 0 n)
1755
    // 	 ---> (str.replace (str.substr x 0 n) y z)
1756
    // if (str.len y) = 1 and (str.len z) = 1
1757
354
    if (node[1] == zero)
1758
    {
1759
492
      if (StringsEntail::checkLengthOne(node[0][1], true)
1760
492
          && StringsEntail::checkLengthOne(node[0][2], true))
1761
      {
1762
        Node ret = nm->mkNode(
1763
            kind::STRING_REPLACE,
1764
8
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]),
1765
            node[0][1],
1766
16
            node[0][2]);
1767
4
        return returnRewrite(node, ret, Rewrite::SUBSTR_REPL_SWAP);
1768
      }
1769
    }
1770
  }
1771
1772
78124
  std::vector<Node> n1;
1773
39062
  utils::getConcat(node[0], n1);
1774
78124
  TypeNode stype = node.getType();
1775
1776
  // definite inclusion
1777
39062
  if (node[1] == zero)
1778
  {
1779
27178
    Node curr = node[2];
1780
27178
    std::vector<Node> childrenr;
1781
13956
    if (StringsEntail::stripSymbolicLength(n1, childrenr, 1, curr))
1782
    {
1783
734
      if (curr != zero && !n1.empty())
1784
      {
1785
579
        childrenr.push_back(nm->mkNode(
1786
386
            kind::STRING_SUBSTR, utils::mkConcat(n1, stype), node[1], curr));
1787
      }
1788
1468
      Node ret = utils::mkConcat(childrenr, stype);
1789
734
      return returnRewrite(node, ret, Rewrite::SS_LEN_INCLUDE);
1790
    }
1791
  }
1792
1793
  // symbolic length analysis
1794
111727
  for (unsigned r = 0; r < 2; r++)
1795
  {
1796
    // the amount of characters we can strip
1797
148561
    Node curr;
1798
75162
    if (r == 0)
1799
    {
1800
38328
      if (node[1] != zero)
1801
      {
1802
        // strip up to start point off the start of the string
1803
25106
        curr = node[1];
1804
      }
1805
    }
1806
36834
    else if (r == 1)
1807
    {
1808
      Node tot_len =
1809
73603
          Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, node[0]));
1810
73603
      Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2]));
1811
36834
      if (node[2] != tot_len)
1812
      {
1813
35108
        if (ArithEntail::check(node[2], tot_len))
1814
        {
1815
          // end point beyond end point of string, map to tot_len
1816
80
          Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len);
1817
40
          return returnRewrite(node, ret, Rewrite::SS_END_PT_NORM);
1818
        }
1819
        else
1820
        {
1821
          // strip up to ( str.len(node[0]) - end_pt ) off the end of the string
1822
35068
          curr = Rewriter::rewrite(nm->mkNode(kind::MINUS, tot_len, end_pt));
1823
        }
1824
      }
1825
1826
      // (str.substr s x y) --> "" if x < len(s) |= 0 >= y
1827
      Node n1_lt_tot_len =
1828
73563
          Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len));
1829
36794
      if (ArithEntail::checkWithAssumption(n1_lt_tot_len, zero, node[2], false))
1830
      {
1831
36
        Node ret = Word::mkEmptyWord(node.getType());
1832
18
        return returnRewrite(node, ret, Rewrite::SS_START_ENTAILS_ZERO_LEN);
1833
      }
1834
1835
      // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s)
1836
      Node non_zero_len =
1837
73545
          Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2]));
1838
36776
      if (ArithEntail::checkWithAssumption(
1839
              non_zero_len, node[1], tot_len, false))
1840
      {
1841
4
        Node ret = Word::mkEmptyWord(node.getType());
1842
2
        return returnRewrite(node, ret, Rewrite::SS_NON_ZERO_LEN_ENTAILS_OOB);
1843
      }
1844
1845
      // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s)
1846
      Node geq_zero_start =
1847
73543
          Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero));
1848
36774
      if (ArithEntail::checkWithAssumption(
1849
              geq_zero_start, zero, tot_len, false))
1850
      {
1851
4
        Node ret = Word::mkEmptyWord(node.getType());
1852
        return returnRewrite(
1853
2
            node, ret, Rewrite::SS_GEQ_ZERO_START_ENTAILS_EMP_S);
1854
      }
1855
1856
      // (str.substr s x x) ---> "" if (str.len s) <= 1
1857
36772
      if (node[1] == node[2] && StringsEntail::checkLengthOne(node[0]))
1858
      {
1859
6
        Node ret = Word::mkEmptyWord(node.getType());
1860
3
        return returnRewrite(node, ret, Rewrite::SS_LEN_ONE_Z_Z);
1861
      }
1862
    }
1863
75097
    if (!curr.isNull())
1864
    {
1865
      // strip off components while quantity is entailed positive
1866
60149
      int dir = r == 0 ? 1 : -1;
1867
118600
      std::vector<Node> childrenr;
1868
60149
      if (StringsEntail::stripSymbolicLength(n1, childrenr, dir, curr))
1869
      {
1870
1698
        if (r == 0)
1871
        {
1872
          Node ret = nm->mkNode(
1873
2988
              kind::STRING_SUBSTR, utils::mkConcat(n1, stype), curr, node[2]);
1874
1494
          return returnRewrite(node, ret, Rewrite::SS_STRIP_START_PT);
1875
        }
1876
        else
1877
        {
1878
          Node ret = nm->mkNode(kind::STRING_SUBSTR,
1879
408
                                utils::mkConcat(n1, stype),
1880
                                node[1],
1881
816
                                node[2]);
1882
204
          return returnRewrite(node, ret, Rewrite::SS_STRIP_END_PT);
1883
        }
1884
      }
1885
    }
1886
  }
1887
  // combine substr
1888
36565
  if (node[0].getKind() == kind::STRING_SUBSTR)
1889
  {
1890
6567
    Node start_inner = node[0][1];
1891
6567
    Node start_outer = node[1];
1892
3641
    if (ArithEntail::check(start_outer) && ArithEntail::check(start_inner))
1893
    {
1894
      // both are positive
1895
      // thus, start point is definitely start_inner+start_outer.
1896
      // We can rewrite if it for certain what the length is
1897
1898
      // the length of a string from the inner substr subtracts the start point
1899
      // of the outer substr
1900
      Node len_from_inner =
1901
4003
          Rewriter::rewrite(nm->mkNode(kind::MINUS, node[0][2], start_outer));
1902
4003
      Node len_from_outer = node[2];
1903
4003
      Node new_len;
1904
      // take quantity that is for sure smaller than the other
1905
2359
      if (len_from_inner == len_from_outer)
1906
      {
1907
9
        new_len = len_from_inner;
1908
      }
1909
2350
      else if (ArithEntail::check(len_from_inner, len_from_outer))
1910
      {
1911
499
        new_len = len_from_outer;
1912
      }
1913
1851
      else if (ArithEntail::check(len_from_outer, len_from_inner))
1914
      {
1915
207
        new_len = len_from_inner;
1916
      }
1917
2359
      if (!new_len.isNull())
1918
      {
1919
1430
        Node new_start = nm->mkNode(kind::PLUS, start_inner, start_outer);
1920
        Node ret =
1921
1430
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], new_start, new_len);
1922
715
        return returnRewrite(node, ret, Rewrite::SS_COMBINE);
1923
      }
1924
    }
1925
  }
1926
35850
  return node;
1927
}
1928
1929
180
Node SequencesRewriter::rewriteUpdate(Node node)
1930
{
1931
180
  Assert(node.getKind() == kind::STRING_UPDATE);
1932
360
  Node s = node[0];
1933
180
  if (s.isConst())
1934
  {
1935
168
    if (Word::isEmpty(s))
1936
    {
1937
      return returnRewrite(node, s, Rewrite::UPD_EMPTYSTR);
1938
    }
1939
    // rewriting for constant arguments
1940
168
    if (node[1].isConst())
1941
    {
1942
144
      cvc5::Rational rMaxInt(String::maxSize());
1943
124
      if (node[1].getConst<Rational>() > rMaxInt)
1944
      {
1945
        // start beyond the maximum size of strings
1946
        // thus, it must be beyond the end point of this string
1947
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_MAX_OOB);
1948
      }
1949
124
      else if (node[1].getConst<Rational>().sgn() < 0)
1950
      {
1951
        // start before the beginning of the string
1952
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_NEG);
1953
      }
1954
      uint32_t start =
1955
124
          node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1956
124
      size_t len = Word::getLength(s);
1957
124
      if (start >= len)
1958
      {
1959
        // start beyond the end of the string
1960
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_OOB);
1961
      }
1962
124
      if (node[2].isConst())
1963
      {
1964
208
        Node ret = Word::update(s, start, node[2]);
1965
104
        return returnRewrite(node, ret, Rewrite::UPD_EVAL);
1966
      }
1967
    }
1968
  }
1969
1970
76
  return node;
1971
}
1972
1973
253525
Node SequencesRewriter::rewriteContains(Node node)
1974
{
1975
253525
  Assert(node.getKind() == kind::STRING_CONTAINS);
1976
253525
  NodeManager* nm = NodeManager::currentNM();
1977
1978
253525
  if (node[0] == node[1])
1979
  {
1980
1326
    Node ret = NodeManager::currentNM()->mkConst(true);
1981
663
    return returnRewrite(node, ret, Rewrite::CTN_EQ);
1982
  }
1983
252862
  if (node[0].isConst())
1984
  {
1985
71437
    if (node[1].isConst())
1986
    {
1987
9870
      Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos);
1988
4935
      return returnRewrite(node, ret, Rewrite::CTN_CONST);
1989
    }
1990
    else
1991
    {
1992
128852
      Node t = node[1];
1993
66502
      if (Word::isEmpty(node[0]))
1994
      {
1995
        Node len1 =
1996
65101
            NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
1997
32665
        if (ArithEntail::check(len1, true))
1998
        {
1999
          // we handle the false case here since the rewrite for equality
2000
          // uses this function, hence we want to conclude false if possible.
2001
          // len(x)>0 => contains( "", x ) ---> false
2002
458
          Node ret = NodeManager::currentNM()->mkConst(false);
2003
229
          return returnRewrite(node, ret, Rewrite::CTN_LHS_EMPTYSTR);
2004
        }
2005
      }
2006
33837
      else if (StringsEntail::checkLengthOne(t))
2007
      {
2008
7590
        std::vector<Node> vec = Word::getChars(node[0]);
2009
7590
        Node emp = Word::mkEmptyWord(t.getType());
2010
7590
        NodeBuilder nb(OR);
2011
3795
        nb << emp.eqNode(t);
2012
8804
        for (const Node& c : vec)
2013
        {
2014
5009
          Assert(c.getType() == t.getType());
2015
5009
          nb << c.eqNode(t);
2016
        }
2017
2018
        // str.contains("ABCabc", t) --->
2019
        // t = "" v t = "A" v t = "B" v t = "C" v t = "a" v t = "b" v t = "c"
2020
        // if len(t) <= 1
2021
7590
        Node ret = nb;
2022
3795
        return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2023
      }
2024
30042
      else if (node[1].getKind() == kind::STRING_CONCAT)
2025
      {
2026
        int firstc, lastc;
2027
2823
        if (!StringsEntail::canConstantContainConcat(
2028
                node[0], node[1], firstc, lastc))
2029
        {
2030
256
          Node ret = NodeManager::currentNM()->mkConst(false);
2031
128
          return returnRewrite(node, ret, Rewrite::CTN_NCONST_CTN_CONCAT);
2032
        }
2033
      }
2034
    }
2035
  }
2036
243775
  if (node[1].isConst())
2037
  {
2038
63136
    size_t len = Word::getLength(node[1]);
2039
63136
    if (len == 0)
2040
    {
2041
      // contains( x, "" ) ---> true
2042
64776
      Node ret = NodeManager::currentNM()->mkConst(true);
2043
32388
      return returnRewrite(node, ret, Rewrite::CTN_RHS_EMPTYSTR);
2044
    }
2045
30748
    else if (len == 1)
2046
    {
2047
      // The following rewrites are specific to a single character second
2048
      // argument of contains, where we can reason that this character is
2049
      // not split over multiple components in the first argument.
2050
16315
      if (node[0].getKind() == STRING_CONCAT)
2051
      {
2052
2410
        std::vector<Node> nc1;
2053
1205
        utils::getConcat(node[0], nc1);
2054
2410
        NodeBuilder nb(OR);
2055
4381
        for (const Node& ncc : nc1)
2056
        {
2057
3176
          nb << nm->mkNode(STRING_CONTAINS, ncc, node[1]);
2058
        }
2059
2410
        Node ret = nb.constructNode();
2060
        // str.contains( x ++ y, "A" ) --->
2061
        //   str.contains( x, "A" ) OR str.contains( y, "A" )
2062
1205
        return returnRewrite(node, ret, Rewrite::CTN_CONCAT_CHAR);
2063
      }
2064
15110
      else if (node[0].getKind() == STRING_REPLACE)
2065
      {
2066
1043
        Node rplDomain = d_stringsEntail.checkContains(node[0][1], node[1]);
2067
587
        if (!rplDomain.isNull() && !rplDomain.getConst<bool>())
2068
        {
2069
262
          Node d1 = nm->mkNode(STRING_CONTAINS, node[0][0], node[1]);
2070
          Node d2 =
2071
              nm->mkNode(AND,
2072
262
                         nm->mkNode(STRING_CONTAINS, node[0][0], node[0][1]),
2073
524
                         nm->mkNode(STRING_CONTAINS, node[0][2], node[1]));
2074
262
          Node ret = nm->mkNode(OR, d1, d2);
2075
          // If str.contains( y, "A" ) ---> false, then:
2076
          // str.contains( str.replace( x, y, z ), "A" ) --->
2077
          //   str.contains( x, "A" ) OR
2078
          //   ( str.contains( x, y ) AND str.contains( z, "A" ) )
2079
131
          return returnRewrite(node, ret, Rewrite::CTN_REPL_CHAR);
2080
        }
2081
      }
2082
    }
2083
  }
2084
420102
  std::vector<Node> nc1;
2085
210051
  utils::getConcat(node[0], nc1);
2086
420102
  std::vector<Node> nc2;
2087
210051
  utils::getConcat(node[1], nc2);
2088
2089
  // component-wise containment
2090
420102
  std::vector<Node> nc1rb;
2091
420102
  std::vector<Node> nc1re;
2092
210051
  if (d_stringsEntail.componentContains(nc1, nc2, nc1rb, nc1re) != -1)
2093
  {
2094
7814
    Node ret = NodeManager::currentNM()->mkConst(true);
2095
3907
    return returnRewrite(node, ret, Rewrite::CTN_COMPONENT);
2096
  }
2097
412288
  TypeNode stype = node[0].getType();
2098
2099
  // strip endpoints
2100
412288
  std::vector<Node> nb;
2101
412288
  std::vector<Node> ne;
2102
206144
  if (StringsEntail::stripConstantEndpoints(nc1, nc2, nb, ne))
2103
  {
2104
    Node ret = NodeManager::currentNM()->mkNode(
2105
610
        kind::STRING_CONTAINS, utils::mkConcat(nc1, stype), node[1]);
2106
305
    return returnRewrite(node, ret, Rewrite::CTN_STRIP_ENDPT);
2107
  }
2108
2109
522368
  for (const Node& n : nc2)
2110
  {
2111
316702
    if (n.getKind() == kind::STRING_REPLACE)
2112
    {
2113
      // (str.contains x (str.replace y z w)) --> false
2114
      // if (str.contains x y) = false and (str.contains x w) = false
2115
      //
2116
      // Reasoning: (str.contains x y) checks that x does not contain y if the
2117
      // replacement does not change y. (str.contains x w) checks that if the
2118
      // replacement changes anything in y, the w makes it impossible for it to
2119
      // occur in x.
2120
7843
      Node ctnConst = d_stringsEntail.checkContains(node[0], n[0]);
2121
4008
      if (!ctnConst.isNull() && !ctnConst.getConst<bool>())
2122
      {
2123
484
        Node ctnConst2 = d_stringsEntail.checkContains(node[0], n[2]);
2124
246
        if (!ctnConst2.isNull() && !ctnConst2.getConst<bool>())
2125
        {
2126
16
          Node res = nm->mkConst(false);
2127
8
          return returnRewrite(node, res, Rewrite::CTN_RPL_NON_CTN);
2128
        }
2129
      }
2130
2131
      // (str.contains x (str.++ w (str.replace x y x) z)) --->
2132
      //   (and (= w "") (= x (str.replace x y x)) (= z ""))
2133
      //
2134
      // TODO: Remove with under-/over-approximation
2135
4000
      if (node[0] == n[0] && node[0] == n[2])
2136
      {
2137
330
        Node ret;
2138
165
        if (nc2.size() > 1)
2139
        {
2140
46
          Node emp = Word::mkEmptyWord(stype);
2141
46
          NodeBuilder nb2(kind::AND);
2142
69
          for (const Node& n2 : nc2)
2143
          {
2144
46
            if (n2 == n)
2145
            {
2146
23
              nb2 << nm->mkNode(kind::EQUAL, node[0], node[1]);
2147
            }
2148
            else
2149
            {
2150
23
              nb2 << nm->mkNode(kind::EQUAL, emp, n2);
2151
            }
2152
          }
2153
23
          ret = nb2.constructNode();
2154
        }
2155
        else
2156
        {
2157
142
          ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2158
        }
2159
165
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SELF);
2160
      }
2161
    }
2162
  }
2163
2164
  // length entailment
2165
411332
  Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2166
411332
  Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2167
205666
  if (ArithEntail::check(len_n2, len_n1, true))
2168
  {
2169
    // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false
2170
320
    Node ret = NodeManager::currentNM()->mkConst(false);
2171
160
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ);
2172
  }
2173
2174
  // multi-set reasoning
2175
  //   For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false
2176
  //   since the number of a's in the second argument is greater than the number
2177
  //   of a's in the first argument
2178
205506
  if (StringsEntail::checkMultisetSubset(node[0], node[1]))
2179
  {
2180
96
    Node ret = nm->mkConst(false);
2181
48
    return returnRewrite(node, ret, Rewrite::CTN_MSET_NSS);
2182
  }
2183
2184
205458
  if (ArithEntail::check(len_n2, len_n1, false))
2185
  {
2186
    // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2
2187
82460
    Node ret = node[0].eqNode(node[1]);
2188
41230
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ_NSTRICT);
2189
  }
2190
2191
  // splitting
2192
164228
  if (node[0].getKind() == kind::STRING_CONCAT)
2193
  {
2194
55444
    if (node[1].isConst())
2195
    {
2196
4424
      Node t = node[1];
2197
      // Below, we are looking for a constant component of node[0]
2198
      // has no overlap with node[1], which means we can split.
2199
      // Notice that if the first or last components had no
2200
      // overlap, these would have been removed by strip
2201
      // constant endpoints above.
2202
      // Hence, we consider only the inner children.
2203
4364
      for (unsigned i = 1; i < (node[0].getNumChildren() - 1); i++)
2204
      {
2205
        // constant contains
2206
2189
        if (node[0][i].isConst())
2207
        {
2208
          // if no overlap, we can split into disjunction
2209
271
          if (Word::noOverlapWith(node[0][i], node[1]))
2210
          {
2211
148
            std::vector<Node> nc0;
2212
74
            utils::getConcat(node[0], nc0);
2213
148
            std::vector<Node> spl[2];
2214
74
            spl[0].insert(spl[0].end(), nc0.begin(), nc0.begin() + i);
2215
74
            Assert(i < nc0.size() - 1);
2216
74
            spl[1].insert(spl[1].end(), nc0.begin() + i + 1, nc0.end());
2217
            Node ret = NodeManager::currentNM()->mkNode(
2218
                kind::OR,
2219
296
                NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS,
2220
148
                                                 utils::mkConcat(spl[0], stype),
2221
                                                 node[1]),
2222
296
                NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS,
2223
148
                                                 utils::mkConcat(spl[1], stype),
2224
370
                                                 node[1]));
2225
74
            return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2226
          }
2227
        }
2228
      }
2229
    }
2230
  }
2231
108784
  else if (node[0].getKind() == kind::STRING_SUBSTR)
2232
  {
2233
    // (str.contains (str.substr x n (str.len y)) y) --->
2234
    //   (= (str.substr x n (str.len y)) y)
2235
    //
2236
    // TODO: Remove with under-/over-approximation
2237
13732
    if (node[0][2] == nm->mkNode(kind::STRING_LENGTH, node[1]))
2238
    {
2239
56
      Node ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2240
28
      return returnRewrite(node, ret, Rewrite::CTN_SUBSTR);
2241
    }
2242
  }
2243
95052
  else if (node[0].getKind() == kind::STRING_REPLACE)
2244
  {
2245
1892
    if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst())
2246
    {
2247
596
      if (Word::noOverlapWith(node[1], node[0][1])
2248
596
          && Word::noOverlapWith(node[1], node[0][2]))
2249
      {
2250
        // (str.contains (str.replace x c1 c2) c3) ---> (str.contains x c3)
2251
        // if there is no overlap between c1 and c3 and none between c2 and c3
2252
4
        Node ret = nm->mkNode(STRING_CONTAINS, node[0][0], node[1]);
2253
2
        return returnRewrite(node, ret, Rewrite::CTN_REPL_CNSTS_TO_CTN);
2254
      }
2255
    }
2256
2257
1890
    if (node[0][0] == node[0][2])
2258
    {
2259
      // (str.contains (str.replace x y x) y) ---> (str.contains x y)
2260
326
      if (node[0][1] == node[1])
2261
      {
2262
24
        Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0][0], node[1]);
2263
12
        return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN);
2264
      }
2265
2266
      // (str.contains (str.replace x y x) z) ---> (str.contains x z)
2267
      // if (str.len z) <= 1
2268
314
      if (StringsEntail::checkLengthOne(node[1]))
2269
      {
2270
24
        Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0][0], node[1]);
2271
12
        return returnRewrite(node, ret, Rewrite::CTN_REPL_LEN_ONE_TO_CTN);
2272
      }
2273
    }
2274
2275
    // (str.contains (str.replace x y z) z) --->
2276
    //   (or (str.contains x y) (str.contains x z))
2277
1866
    if (node[0][2] == node[1])
2278
    {
2279
      Node ret =
2280
          nm->mkNode(OR,
2281
144
                     nm->mkNode(STRING_CONTAINS, node[0][0], node[0][1]),
2282
288
                     nm->mkNode(STRING_CONTAINS, node[0][0], node[0][2]));
2283
72
      return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN_DISJ);
2284
    }
2285
2286
    // (str.contains (str.replace x y z) w) --->
2287
    //   (str.contains (str.replace x y "") w)
2288
    // if (str.contains z w) ---> false and (str.len w) = 1
2289
1794
    if (StringsEntail::checkLengthOne(node[1]))
2290
    {
2291
480
      Node ctn = d_stringsEntail.checkContains(node[0][2], node[1]);
2292
295
      if (!ctn.isNull() && !ctn.getConst<bool>())
2293
      {
2294
220
        Node empty = Word::mkEmptyWord(stype);
2295
        Node ret = nm->mkNode(
2296
            kind::STRING_CONTAINS,
2297
220
            nm->mkNode(kind::STRING_REPLACE, node[0][0], node[0][1], empty),
2298
440
            node[1]);
2299
110
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SIMP_REPL);
2300
      }
2301
    }
2302
  }
2303
2304
163918
  if (node[1].getKind() == kind::STRING_REPLACE)
2305
  {
2306
    // (str.contains x (str.replace y x y)) --->
2307
    //   (str.contains x y)
2308
1961
    if (node[0] == node[1][1] && node[1][0] == node[1][2])
2309
    {
2310
12
      Node ret = nm->mkNode(kind::STRING_CONTAINS, node[0], node[1][0]);
2311
6
      return returnRewrite(node, ret, Rewrite::CTN_REPL);
2312
    }
2313
2314
    // (str.contains x (str.replace "" x y)) --->
2315
    //   (= "" (str.replace "" x y))
2316
    //
2317
    // Note: Length-based reasoning is not sufficient to get this rewrite. We
2318
    // can neither show that str.len(str.replace("", x, y)) - str.len(x) >= 0
2319
    // nor str.len(x) - str.len(str.replace("", x, y)) >= 0
2320
3908
    Node emp = Word::mkEmptyWord(stype);
2321
1955
    if (node[0] == node[1][1] && node[1][0] == emp)
2322
    {
2323
4
      Node ret = nm->mkNode(kind::EQUAL, emp, node[1]);
2324
2
      return returnRewrite(node, ret, Rewrite::CTN_REPL_EMPTY);
2325
    }
2326
  }
2327
2328
163910
  return node;
2329
}
2330
2331
5962
Node SequencesRewriter::rewriteIndexof(Node node)
2332
{
2333
5962
  Assert(node.getKind() == kind::STRING_INDEXOF);
2334
5962
  NodeManager* nm = NodeManager::currentNM();
2335
2336
5962
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() < 0)
2337
  {
2338
    // z<0  implies  str.indexof( x, y, z ) --> -1
2339
354
    Node negone = nm->mkConst(Rational(-1));
2340
177
    return returnRewrite(node, negone, Rewrite::IDOF_NEG);
2341
  }
2342
2343
  // the string type
2344
11570
  TypeNode stype = node[0].getType();
2345
2346
  // evaluation and simple cases
2347
11570
  std::vector<Node> children0;
2348
5785
  utils::getConcat(node[0], children0);
2349
5785
  if (children0[0].isConst() && node[1].isConst() && node[2].isConst())
2350
  {
2351
1034
    cvc5::Rational rMaxInt(cvc5::String::maxSize());
2352
891
    if (node[2].getConst<Rational>() > rMaxInt)
2353
    {
2354
      // We know that, due to limitations on the size of string constants
2355
      // in our implementation, that accessing a position greater than
2356
      // rMaxInt is guaranteed to be out of bounds.
2357
      Node negone = nm->mkConst(Rational(-1));
2358
      return returnRewrite(node, negone, Rewrite::IDOF_MAX);
2359
    }
2360
891
    Assert(node[2].getConst<Rational>().sgn() >= 0);
2361
1034
    Node s = children0[0];
2362
1034
    Node t = node[1];
2363
    uint32_t start =
2364
891
        node[2].getConst<Rational>().getNumerator().toUnsignedInt();
2365
891
    std::size_t ret = Word::find(s, t, start);
2366
891
    if (ret != std::string::npos)
2367
    {
2368
656
      Node retv = nm->mkConst(Rational(static_cast<unsigned>(ret)));
2369
328
      return returnRewrite(node, retv, Rewrite::IDOF_FIND);
2370
    }
2371
563
    else if (children0.size() == 1)
2372
    {
2373
840
      Node negone = nm->mkConst(Rational(-1));
2374
420
      return returnRewrite(node, negone, Rewrite::IDOF_NFIND);
2375
    }
2376
  }
2377
2378
5037
  if (node[0] == node[1])
2379
  {
2380
102
    if (node[2].isConst())
2381
    {
2382
18
      if (node[2].getConst<Rational>().sgn() == 0)
2383
      {
2384
        // indexof( x, x, 0 ) --> 0
2385
36
        Node zero = nm->mkConst(Rational(0));
2386
18
        return returnRewrite(node, zero, Rewrite::IDOF_EQ_CST_START);
2387
      }
2388
    }
2389
84
    if (ArithEntail::check(node[2], true))
2390
    {
2391
      // y>0  implies  indexof( x, x, y ) --> -1
2392
      Node negone = nm->mkConst(Rational(-1));
2393
      return returnRewrite(node, negone, Rewrite::IDOF_EQ_NSTART);
2394
    }
2395
140
    Node emp = Word::mkEmptyWord(stype);
2396
84
    if (node[0] != emp)
2397
    {
2398
      // indexof( x, x, z ) ---> indexof( "", "", z )
2399
56
      Node ret = nm->mkNode(STRING_INDEXOF, emp, emp, node[2]);
2400
28
      return returnRewrite(node, ret, Rewrite::IDOF_EQ_NORM);
2401
    }
2402
  }
2403
2404
9982
  Node len0 = nm->mkNode(STRING_LENGTH, node[0]);
2405
9982
  Node len1 = nm->mkNode(STRING_LENGTH, node[1]);
2406
9982
  Node len0m2 = nm->mkNode(MINUS, len0, node[2]);
2407
2408
4991
  if (node[1].isConst())
2409
  {
2410
4441
    if (Word::isEmpty(node[1]))
2411
    {
2412
82
      if (ArithEntail::check(len0, node[2]) && ArithEntail::check(node[2]))
2413
      {
2414
        // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z
2415
10
        return returnRewrite(node, node[2], Rewrite::IDOF_EMP_IDOF);
2416
      }
2417
    }
2418
  }
2419
2420
4981
  if (ArithEntail::check(len1, len0m2, true))
2421
  {
2422
    // len(x)-z < len(y)  implies  indexof( x, y, z ) ----> -1
2423
16
    Node negone = nm->mkConst(Rational(-1));
2424
8
    return returnRewrite(node, negone, Rewrite::IDOF_LEN);
2425
  }
2426
2427
9946
  Node fstr = node[0];
2428
4973
  if (!node[2].isConst() || node[2].getConst<Rational>().sgn() != 0)
2429
  {
2430
1072
    fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0);
2431
1072
    fstr = Rewriter::rewrite(fstr);
2432
  }
2433
2434
9946
  Node cmp_conr = d_stringsEntail.checkContains(fstr, node[1]);
2435
9946
  Trace("strings-rewrite-debug") << "For " << node << ", check contains("
2436
4973
                                 << fstr << ", " << node[1] << ")" << std::endl;
2437
4973
  Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl;
2438
9946
  std::vector<Node> children1;
2439
4973
  utils::getConcat(node[1], children1);
2440
4973
  if (!cmp_conr.isNull())
2441
  {
2442
282
    if (cmp_conr.getConst<bool>())
2443
    {
2444
277
      if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2445
      {
2446
        // past the first position in node[0] that contains node[1], we can drop
2447
335
        std::vector<Node> nb;
2448
335
        std::vector<Node> ne;
2449
199
        int cc = d_stringsEntail.componentContains(
2450
199
            children0, children1, nb, ne, true, 1);
2451
199
        if (cc != -1 && !ne.empty())
2452
        {
2453
          // For example:
2454
          // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0)
2455
102
          Node nn = utils::mkConcat(children0, stype);
2456
102
          Node ret = nm->mkNode(kind::STRING_INDEXOF, nn, node[1], node[2]);
2457
51
          return returnRewrite(node, ret, Rewrite::IDOF_DEF_CTN);
2458
        }
2459
2460
        // Strip components from the beginning that are guaranteed not to match
2461
148
        if (StringsEntail::stripConstantEndpoints(
2462
                children0, children1, nb, ne, 1))
2463
        {
2464
          // str.indexof(str.++("AB", x, "C"), "C", 0) --->
2465
          // 2 + str.indexof(str.++(x, "C"), "C", 0)
2466
          Node ret = nm->mkNode(
2467
              kind::PLUS,
2468
24
              nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(nb, stype)),
2469
48
              nm->mkNode(kind::STRING_INDEXOF,
2470
24
                         utils::mkConcat(children0, stype),
2471
                         node[1],
2472
48
                         node[2]));
2473
12
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_CNST_ENDPTS);
2474
        }
2475
      }
2476
      // To show that the first argument definitely contains the second, the
2477
      // index must be a valid index in the first argument. This ensures that
2478
      // (str.indexof t "" n) is not rewritten to something other than -1 when n
2479
      // is beyond the length of t. This is not required for the above rewrites,
2480
      // which only apply when n=0.
2481
214
      if (ArithEntail::check(node[2]) && ArithEntail::check(len0, node[2]))
2482
      {
2483
        // strip symbolic length
2484
278
        Node new_len = node[2];
2485
278
        std::vector<Node> nr;
2486
142
        if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
2487
        {
2488
          // For example:
2489
          // z>=0 and z>str.len( x1 ) and str.contains( x2, y )-->true
2490
          // implies
2491
          // str.indexof( str.++( x1, x2 ), y, z ) --->
2492
          // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) )
2493
12
          Node nn = utils::mkConcat(children0, stype);
2494
          Node ret =
2495
              nm->mkNode(PLUS,
2496
12
                         nm->mkNode(MINUS, node[2], new_len),
2497
24
                         nm->mkNode(STRING_INDEXOF, nn, node[1], new_len));
2498
6
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_SYM_LEN);
2499
        }
2500
      }
2501
    }
2502
    else
2503
    {
2504
      // str.contains( x, y ) --> false  implies  str.indexof(x,y,z) --> -1
2505
10
      Node negone = nm->mkConst(Rational(-1));
2506
5
      return returnRewrite(node, negone, Rewrite::IDOF_NCTN);
2507
    }
2508
  }
2509
  else
2510
  {
2511
9349
    Node new_len = node[2];
2512
9349
    std::vector<Node> nr;
2513
4691
    if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
2514
    {
2515
      // Normalize the string before the start index.
2516
      //
2517
      // For example:
2518
      // str.indexof(str.++("ABCD", x), y, 3) --->
2519
      // str.indexof(str.++("AAAD", x), y, 3)
2520
121
      Node nodeNr = utils::mkConcat(nr, stype);
2521
121
      Node normNr = lengthPreserveRewrite(nodeNr);
2522
77
      if (normNr != nodeNr)
2523
      {
2524
66
        std::vector<Node> normNrChildren;
2525
33
        utils::getConcat(normNr, normNrChildren);
2526
66
        std::vector<Node> children(normNrChildren);
2527
33
        children.insert(children.end(), children0.begin(), children0.end());
2528
66
        Node nn = utils::mkConcat(children, stype);
2529
66
        Node res = nm->mkNode(kind::STRING_INDEXOF, nn, node[1], node[2]);
2530
33
        return returnRewrite(node, res, Rewrite::IDOF_NORM_PREFIX);
2531
      }
2532
    }
2533
  }
2534
2535
4866
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2536
  {
2537
7668
    std::vector<Node> cb;
2538
7668
    std::vector<Node> ce;
2539
3838
    if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce, -1))
2540
    {
2541
16
      Node ret = utils::mkConcat(children0, stype);
2542
8
      ret = nm->mkNode(STRING_INDEXOF, ret, node[1], node[2]);
2543
      // For example:
2544
      // str.indexof( str.++( x, "A" ), "B", 0 ) ---> str.indexof( x, "B", 0 )
2545
8
      return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2546
    }
2547
  }
2548
2549
4858
  return node;
2550
}
2551
2552
937
Node SequencesRewriter::rewriteIndexofRe(Node node)
2553
{
2554
937
  Assert(node.getKind() == STRING_INDEXOF_RE);
2555
937
  NodeManager* nm = NodeManager::currentNM();
2556
1874
  Node s = node[0];
2557
1874
  Node r = node[1];
2558
1874
  Node n = node[2];
2559
1874
  Node zero = nm->mkConst(Rational(0));
2560
1874
  Node slen = nm->mkNode(STRING_LENGTH, s);
2561
2562
937
  if (ArithEntail::check(zero, n, true) || ArithEntail::check(n, slen, true))
2563
  {
2564
68
    Node ret = nm->mkConst(Rational(-1));
2565
34
    return returnRewrite(node, ret, Rewrite::INDEXOF_RE_INVALID_INDEX);
2566
  }
2567
2568
903
  if (RegExpEntail::isConstRegExp(r))
2569
  {
2570
635
    if (s.isConst() && n.isConst())
2571
    {
2572
310
      Rational nrat = n.getConst<Rational>();
2573
310
      cvc5::Rational rMaxInt(cvc5::String::maxSize());
2574
155
      if (nrat > rMaxInt)
2575
      {
2576
        // We know that, due to limitations on the size of string constants
2577
        // in our implementation, that accessing a position greater than
2578
        // rMaxInt is guaranteed to be out of bounds.
2579
        Node negone = nm->mkConst(Rational(-1));
2580
        return returnRewrite(node, negone, Rewrite::INDEXOF_RE_MAX_INDEX);
2581
      }
2582
2583
155
      uint32_t start = nrat.getNumerator().toUnsignedInt();
2584
310
      Node rem = nm->mkConst(s.getConst<String>().substr(start));
2585
155
      std::pair<size_t, size_t> match = firstMatch(rem, r);
2586
      Node ret = nm->mkConst(
2587
405
          Rational(match.first == string::npos
2588
                       ? -1
2589
405
                       : static_cast<int64_t>(start + match.first)));
2590
155
      return returnRewrite(node, ret, Rewrite::INDEXOF_RE_EVAL);
2591
    }
2592
2593
480
    if (ArithEntail::check(n, zero) && ArithEntail::check(slen, n))
2594
    {
2595
676
      String emptyStr("");
2596
342
      if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, r))
2597
      {
2598
8
        return returnRewrite(node, n, Rewrite::INDEXOF_RE_EMP_RE);
2599
      }
2600
    }
2601
  }
2602
740
  return node;
2603
}
2604
2605
3356
Node SequencesRewriter::rewriteReplace(Node node)
2606
{
2607
3356
  Assert(node.getKind() == kind::STRING_REPLACE);
2608
3356
  NodeManager* nm = NodeManager::currentNM();
2609
2610
3356
  if (node[1].isConst() && Word::isEmpty(node[1]))
2611
  {
2612
846
    Node ret = nm->mkNode(STRING_CONCAT, node[2], node[0]);
2613
423
    return returnRewrite(node, ret, Rewrite::RPL_RPL_EMPTY);
2614
  }
2615
  // the string type
2616
5866
  TypeNode stype = node.getType();
2617
2618
5866
  std::vector<Node> children0;
2619
2933
  utils::getConcat(node[0], children0);
2620
2621
2933
  if (node[1].isConst() && children0[0].isConst())
2622
  {
2623
885
    Node s = children0[0];
2624
885
    Node t = node[1];
2625
876
    std::size_t p = Word::find(s, t);
2626
876
    if (p == std::string::npos)
2627
    {
2628
556
      if (children0.size() == 1)
2629
      {
2630
547
        return returnRewrite(node, node[0], Rewrite::RPL_CONST_NFIND);
2631
      }
2632
    }
2633
    else
2634
    {
2635
640
      Node s1 = Word::substr(s, 0, p);
2636
640
      Node s3 = Word::substr(s, p + Word::getLength(t));
2637
640
      std::vector<Node> children;
2638
320
      if (!Word::isEmpty(s1))
2639
      {
2640
51
        children.push_back(s1);
2641
      }
2642
320
      children.push_back(node[2]);
2643
320
      if (!Word::isEmpty(s3))
2644
      {
2645
90
        children.push_back(s3);
2646
      }
2647
320
      children.insert(children.end(), children0.begin() + 1, children0.end());
2648
640
      Node ret = utils::mkConcat(children, stype);
2649
320
      return returnRewrite(node, ret, Rewrite::RPL_CONST_FIND);
2650
    }
2651
  }
2652
2653
  // rewrites that apply to both replace and replaceall
2654
4132
  Node rri = rewriteReplaceInternal(node);
2655
2066
  if (!rri.isNull())
2656
  {
2657
    // printing of the rewrite managed by the call above
2658
135
    return rri;
2659
  }
2660
2661
1931
  if (node[0] == node[2])
2662
  {
2663
    // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x
2664
617
    Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2665
617
    Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2666
354
    if (ArithEntail::check(l1, l0))
2667
    {
2668
79
      return returnRewrite(node, node[0], Rewrite::RPL_RPL_LEN_ID);
2669
    }
2670
2671
    // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x)
2672
    // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "")
2673
275
    if (StringsEntail::checkLengthOne(node[0]))
2674
    {
2675
176
      Node empty = Word::mkEmptyWord(stype);
2676
      Node rn1 = Rewriter::rewrite(
2677
176
          rewriteEqualityExt(nm->mkNode(EQUAL, node[1], empty)));
2678
94
      if (rn1 != node[1])
2679
      {
2680
176
        std::vector<Node> emptyNodes;
2681
        bool allEmptyEqs;
2682
94
        std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(rn1);
2683
2684
94
        if (allEmptyEqs)
2685
        {
2686
108
          Node nn1 = utils::mkConcat(emptyNodes, stype);
2687
60
          if (node[1] != nn1)
2688
          {
2689
24
            Node ret = nm->mkNode(STRING_REPLACE, node[0], nn1, node[2]);
2690
12
            return returnRewrite(node, ret, Rewrite::RPL_X_Y_X_SIMP);
2691
          }
2692
        }
2693
      }
2694
    }
2695
  }
2696
2697
3680
  std::vector<Node> children1;
2698
1840
  utils::getConcat(node[1], children1);
2699
2700
  // check if contains definitely does (or does not) hold
2701
3680
  Node cmp_con = nm->mkNode(kind::STRING_CONTAINS, node[0], node[1]);
2702
3680
  Node cmp_conr = Rewriter::rewrite(cmp_con);
2703
1840
  if (cmp_conr.isConst())
2704
  {
2705
211
    if (cmp_conr.getConst<bool>())
2706
    {
2707
      // component-wise containment
2708
367
      std::vector<Node> cb;
2709
367
      std::vector<Node> ce;
2710
205
      int cc = d_stringsEntail.componentContains(
2711
205
          children0, children1, cb, ce, true, 1);
2712
205
      if (cc != -1)
2713
      {
2714
103
        if (cc == 0 && children0[0] == children1[0])
2715
        {
2716
          // definitely a prefix, can do the replace
2717
          // for example,
2718
          //   str.replace( str.++( x, "ab" ), str.++( x, "a" ), y )  --->
2719
          //   str.++( y, "b" )
2720
56
          std::vector<Node> cres;
2721
28
          cres.push_back(node[2]);
2722
28
          cres.insert(cres.end(), ce.begin(), ce.end());
2723
56
          Node ret = utils::mkConcat(cres, stype);
2724
28
          return returnRewrite(node, ret, Rewrite::RPL_CCTN_RPL);
2725
        }
2726
75
        else if (!ce.empty())
2727
        {
2728
          // we can pull remainder past first definite containment
2729
          // for example,
2730
          //   str.replace( str.++( x, "ab" ), "a", y ) --->
2731
          //   str.++( str.replace( str.++( x, "a" ), "a", y ), "b" )
2732
          // this is independent of whether the second argument may be empty
2733
30
          std::vector<Node> scc;
2734
45
          scc.push_back(NodeManager::currentNM()->mkNode(
2735
              kind::STRING_REPLACE,
2736
30
              utils::mkConcat(children0, stype),
2737
              node[1],
2738
              node[2]));
2739
15
          scc.insert(scc.end(), ce.begin(), ce.end());
2740
30
          Node ret = utils::mkConcat(scc, stype);
2741
15
          return returnRewrite(node, ret, Rewrite::RPL_CCTN);
2742
        }
2743
      }
2744
    }
2745
    else
2746
    {
2747
      // ~contains( t, s ) => ( replace( t, s, r ) ----> t )
2748
6
      return returnRewrite(node, node[0], Rewrite::RPL_NCTN);
2749
    }
2750
  }
2751
1629
  else if (cmp_conr.getKind() == kind::EQUAL || cmp_conr.getKind() == kind::AND)
2752
  {
2753
    // Rewriting the str.contains may return equalities of the form (= x "").
2754
    // In that case, we can substitute the variables appearing in those
2755
    // equalities with the empty string in the third argument of the
2756
    // str.replace. For example:
2757
    //
2758
    // (str.replace x (str.++ x y) y) --> (str.replace x (str.++ x y) "")
2759
    //
2760
    // This can be done because str.replace changes x iff (str.++ x y) is in x
2761
    // but that means that y must be empty in that case. Thus, we can
2762
    // substitute y with "" in the third argument. Note that the third argument
2763
    // does not matter when the str.replace does not apply.
2764
    //
2765
618
    Node empty = Word::mkEmptyWord(stype);
2766
2767
618
    std::vector<Node> emptyNodes;
2768
    bool allEmptyEqs;
2769
318
    std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(cmp_conr);
2770
2771
318
    if (emptyNodes.size() > 0)
2772
    {
2773
      // Perform the substitutions
2774
346
      std::vector<TNode> substs(emptyNodes.size(), TNode(empty));
2775
      Node nn2 = node[2].substitute(
2776
346
          emptyNodes.begin(), emptyNodes.end(), substs.begin(), substs.end());
2777
2778
      // If the contains rewrites to a conjunction of empty-string equalities
2779
      // and we are doing the replacement in an empty string, we can rewrite
2780
      // the string-to-replace with a concatenation of all the terms that must
2781
      // be empty:
2782
      //
2783
      // (str.replace "" y z) ---> (str.replace "" (str.++ y1 ... yn)  z)
2784
      // if (str.contains "" y) ---> (and (= y1 "") ... (= yn ""))
2785
182
      if (node[0] == empty && allEmptyEqs)
2786
      {
2787
192
        std::vector<Node> emptyNodesList(emptyNodes.begin(), emptyNodes.end());
2788
192
        Node nn1 = utils::mkConcat(emptyNodesList, stype);
2789
100
        if (nn1 != node[1] || nn2 != node[2])
2790
        {
2791
16
          Node res = nm->mkNode(kind::STRING_REPLACE, node[0], nn1, nn2);
2792
8
          return returnRewrite(node, res, Rewrite::RPL_EMP_CNTS_SUBSTS);
2793
        }
2794
      }
2795
2796
174
      if (nn2 != node[2])
2797
      {
2798
20
        Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1], nn2);
2799
10
        return returnRewrite(node, res, Rewrite::RPL_CNTS_SUBSTS);
2800
      }
2801
    }
2802
  }
2803
2804
1773
  if (cmp_conr != cmp_con)
2805
  {
2806
589
    if (StringsEntail::checkNonEmpty(node[1]))
2807
    {
2808
      // pull endpoints that can be stripped
2809
      // for example,
2810
      //   str.replace( str.++( "b", x, "b" ), "a", y ) --->
2811
      //   str.++( "b", str.replace( x, "a", y ), "b" )
2812
657
      std::vector<Node> cb;
2813
657
      std::vector<Node> ce;
2814
337
      if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce))
2815
      {
2816
34
        std::vector<Node> cc;
2817
17
        cc.insert(cc.end(), cb.begin(), cb.end());
2818
17
        cc.push_back(
2819
68
            NodeManager::currentNM()->mkNode(kind::STRING_REPLACE,
2820
34
                                             utils::mkConcat(children0, stype),
2821
                                             node[1],
2822
                                             node[2]));
2823
17
        cc.insert(cc.end(), ce.begin(), ce.end());
2824
34
        Node ret = utils::mkConcat(cc, stype);
2825
17
        return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2826
      }
2827
    }
2828
  }
2829
2830
1756
  children1.clear();
2831
1756
  utils::getConcat(node[1], children1);
2832
3512
  Node lastChild1 = children1[children1.size() - 1];
2833
1756
  if (lastChild1.getKind() == kind::STRING_SUBSTR)
2834
  {
2835
    // (str.replace x (str.++ t (str.substr y i j)) z) --->
2836
    // (str.replace x (str.++ t
2837
    //                  (str.substr y i (+ (str.len x) 1 (- (str.len t))))) z)
2838
    // if j > len(x)
2839
    //
2840
    // Reasoning: If the string to be replaced is longer than x, then it does
2841
    // not matter how much longer it is, the result is always x. Thus, it is
2842
    // fine to only look at the prefix of length len(x) + 1 - len(t).
2843
2844
112
    children1.pop_back();
2845
    // Length of the non-substr components in the second argument
2846
    Node partLen1 =
2847
216
        nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(children1, stype));
2848
216
    Node maxLen1 = nm->mkNode(kind::PLUS, partLen1, lastChild1[2]);
2849
2850
216
    Node zero = nm->mkConst(Rational(0));
2851
216
    Node one = nm->mkConst(Rational(1));
2852
216
    Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]);
2853
216
    Node len0_1 = nm->mkNode(kind::PLUS, len0, one);
2854
    // Check len(t) + j > len(x) + 1
2855
112
    if (ArithEntail::check(maxLen1, len0_1, true))
2856
    {
2857
8
      children1.push_back(nm->mkNode(
2858
          kind::STRING_SUBSTR,
2859
          lastChild1[0],
2860
          lastChild1[1],
2861
16
          nm->mkNode(
2862
16
              kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1))));
2863
      Node res = nm->mkNode(kind::STRING_REPLACE,
2864
                            node[0],
2865
16
                            utils::mkConcat(children1, stype),
2866
32
                            node[2]);
2867
8
      return returnRewrite(node, res, Rewrite::REPL_SUBST_IDX);
2868
    }
2869
  }
2870
2871
1748
  if (node[0].getKind() == STRING_REPLACE)
2872
  {
2873
388
    Node x = node[0];
2874
388
    Node y = node[1];
2875
388
    Node z = node[2];
2876
200
    if (x[0] == x[2] && x[0] == y)
2877
    {
2878
      // (str.replace (str.replace y w y) y z) -->
2879
      //   (str.replace (str.replace y w z) y z)
2880
      // if (str.len w) >= (str.len z) and w != z
2881
      //
2882
      // Reasoning: There are two cases: (1) w does not appear in y and (2) w
2883
      // does appear in y.
2884
      //
2885
      // Case (1): In this case, the reasoning is trivial. The
2886
      // inner replace does not do anything, so we can just replace its third
2887
      // argument with any string.
2888
      //
2889
      // Case (2): After the inner replace, we are guaranteed to have a string
2890
      // that contains y at the index of w in the original string y. The outer
2891
      // replace then replaces that y with z, so we can short-circuit that
2892
      // replace by directly replacing w with z in the inner replace. We can
2893
      // only do that if the result of the new inner replace does not contain
2894
      // y, otherwise we end up doing two replaces that are different from the
2895
      // original expression. We enforce that by requiring that the length of w
2896
      // has to be greater or equal to the length of z and that w and z have to
2897
      // be different. This makes sure that an inner replace changes a string
2898
      // to a string that is shorter than y, making it impossible for the outer
2899
      // replace to match.
2900
136
      Node w = x[1];
2901
2902
      // (str.len w) >= (str.len z)
2903
136
      Node wlen = nm->mkNode(kind::STRING_LENGTH, w);
2904
136
      Node zlen = nm->mkNode(kind::STRING_LENGTH, z);
2905
74
      if (ArithEntail::check(wlen, zlen))
2906
      {
2907
        // w != z
2908
72
        Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z));
2909
42
        if (wEqZ.isConst() && !wEqZ.getConst<bool>())
2910
        {
2911
          Node ret = nm->mkNode(kind::STRING_REPLACE,
2912
24
                                nm->mkNode(kind::STRING_REPLACE, y, w, z),
2913
                                y,
2914
48
                                z);
2915
12
          return returnRewrite(node, ret, Rewrite::REPL_REPL_SHORT_CIRCUIT);
2916
        }
2917
      }
2918
    }
2919
  }
2920
2921
1736
  if (node[1].getKind() == STRING_REPLACE)
2922
  {
2923
148
    if (node[1][0] == node[0])
2924
    {
2925
46
      if (node[1][0] == node[1][2] && node[1][0] == node[2])
2926
      {
2927
        // str.replace( x, str.replace( x, y, x ), x ) ---> x
2928
        return returnRewrite(node, node[0], Rewrite::REPL_REPL2_INV_ID);
2929
      }
2930
46
      bool dualReplIteSuccess = false;
2931
92
      Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2932
46
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2933
      {
2934
        // str.contains( x, z ) ---> false
2935
        //   implies
2936
        // str.replace( x, str.replace( x, y, z ), w ) --->
2937
        // ite( str.contains( x, y ), x, w )
2938
        dualReplIteSuccess = true;
2939
      }
2940
      else
2941
      {
2942
        // str.contains( y, z ) ---> false and str.contains( z, y ) ---> false
2943
        //   implies
2944
        // str.replace( x, str.replace( x, y, z ), w ) --->
2945
        // ite( str.contains( x, y ), x, w )
2946
46
        cmp_con2 = d_stringsEntail.checkContains(node[1][1], node[1][2]);
2947
46
        if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2948
        {
2949
8
          cmp_con2 = d_stringsEntail.checkContains(node[1][2], node[1][1]);
2950
8
          if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2951
          {
2952
            dualReplIteSuccess = true;
2953
          }
2954
        }
2955
      }
2956
46
      if (dualReplIteSuccess)
2957
      {
2958
        Node res = nm->mkNode(ITE,
2959
                              nm->mkNode(STRING_CONTAINS, node[0], node[1][1]),
2960
                              node[0],
2961
                              node[2]);
2962
        return returnRewrite(node, res, Rewrite::REPL_DUAL_REPL_ITE);
2963
      }
2964
    }
2965
2966
148
    bool invSuccess = false;
2967
148
    if (node[1][1] == node[0])
2968
    {
2969
74
      if (node[1][0] == node[1][2])
2970
      {
2971
        // str.replace(x, str.replace(y, x, y), w) ---> str.replace(x, y, w)
2972
        invSuccess = true;
2973
      }
2974
74
      else if (node[1][1] == node[2] || node[1][0] == node[2])
2975
      {
2976
        // str.contains(y, z) ----> false and ( y == w or x == w ) implies
2977
        //   implies
2978
        // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w)
2979
44
        Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2980
22
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2981
      }
2982
    }
2983
    else
2984
    {
2985
      // str.contains(x, z) ----> false and str.contains(x, w) ----> false
2986
      //   implies
2987
      // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u)
2988
148
      Node cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][1]);
2989
74
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2990
      {
2991
16
        cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][2]);
2992
16
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2993
      }
2994
    }
2995
148
    if (invSuccess)
2996
    {
2997
      Node res = nm->mkNode(kind::STRING_REPLACE, node[0], node[1][0], node[2]);
2998
      return returnRewrite(node, res, Rewrite::REPL_REPL2_INV);
2999
    }
3000
  }
3001
1736
  if (node[2].getKind() == STRING_REPLACE)
3002
  {
3003
56
    if (node[2][1] == node[0])
3004
    {
3005
      // str.contains( z, w ) ----> false implies
3006
      // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z )
3007
24
      Node cmp_con2 = d_stringsEntail.checkContains(node[2][0], node[1]);
3008
12
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
3009
      {
3010
        Node res =
3011
            nm->mkNode(kind::STRING_REPLACE, node[0], node[1], node[2][0]);
3012
        return returnRewrite(node, res, Rewrite::REPL_REPL3_INV);
3013
      }
3014
    }
3015
56
    if (node[2][0] == node[1])
3016
    {
3017
4
      bool success = false;
3018
4
      if (node[2][0] == node[2][2] && node[2][1] == node[0])
3019
      {
3020
        // str.replace( x, y, str.replace( y, x, y ) ) ---> x
3021
        success = true;
3022
      }
3023
      else
3024
      {
3025
        // str.contains( x, z ) ----> false implies
3026
        // str.replace( x, y, str.replace( y, z, w ) ) ---> x
3027
4
        cmp_con = d_stringsEntail.checkContains(node[0], node[2][1]);
3028
4
        success = !cmp_con.isNull() && !cmp_con.getConst<bool>();
3029
      }
3030
4
      if (success)
3031
      {
3032
        return returnRewrite(node, node[0], Rewrite::REPL_REPL3_INV_ID);
3033
      }
3034
    }
3035
  }
3036
  // miniscope based on components that do not contribute to contains
3037
  // for example,
3038
  //   str.replace( x ++ y ++ x ++ y, "A", z ) -->
3039
  //   str.replace( x ++ y, "A", z ) ++ x ++ y
3040
  // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y.
3041
1736
  if (StringsEntail::checkLengthOne(node[1]))
3042
  {
3043
1352
    Node lastLhs;
3044
680
    unsigned lastCheckIndex = 0;
3045
692
    for (unsigned i = 1, iend = children0.size(); i < iend; i++)
3046
    {
3047
124
      unsigned checkIndex = children0.size() - i;
3048
136
      std::vector<Node> checkLhs;
3049
124
      checkLhs.insert(
3050
248
          checkLhs.end(), children0.begin(), children0.begin() + checkIndex);
3051
136
      Node lhs = utils::mkConcat(checkLhs, stype);
3052
136
      Node rhs = children0[checkIndex];
3053
136
      Node ctn = d_stringsEntail.checkContains(lhs, rhs);
3054
124
      if (!ctn.isNull() && ctn.getConst<bool>())
3055
      {
3056
12
        lastLhs = lhs;
3057
12
        lastCheckIndex = checkIndex;
3058
      }
3059
      else
3060
      {
3061
112
        break;
3062
      }
3063
    }
3064
680
    if (!lastLhs.isNull())
3065
    {
3066
16
      std::vector<Node> remc(children0.begin() + lastCheckIndex,
3067
32
                             children0.end());
3068
16
      Node rem = utils::mkConcat(remc, stype);
3069
      Node ret =
3070
          nm->mkNode(STRING_CONCAT,
3071
16
                     nm->mkNode(STRING_REPLACE, lastLhs, node[1], node[2]),
3072
32
                     rem);
3073
      // for example:
3074
      //   str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x
3075
      // Since we know that the first occurrence of "A" cannot be in the
3076
      // second occurrence of x. Notice this is specific to single characters
3077
      // due to complications with finds that span multiple components for
3078
      // non-characters.
3079
8
      return returnRewrite(node, ret, Rewrite::REPL_CHAR_NCONTRIB_FIND);
3080
    }
3081
  }
3082
3083
  // TODO (#1180) incorporate these?
3084
  // contains( t, s ) =>
3085
  //   replace( replace( x, t, s ), s, r ) ----> replace( x, t, r )
3086
  // contains( t, s ) =>
3087
  //   contains( replace( t, s, r ), r ) ----> true
3088
3089
1728
  return node;
3090
}
3091
3092
198
Node SequencesRewriter::rewriteReplaceAll(Node node)
3093
{
3094
198
  Assert(node.getKind() == STRING_REPLACE_ALL);
3095
3096
396
  TypeNode stype = node.getType();
3097
3098
198
  if (node[0].isConst() && node[1].isConst())
3099
  {
3100
80
    std::vector<Node> children;
3101
80
    Node s = node[0];
3102
80
    Node t = node[1];
3103
40
    if (Word::isEmpty(s) || Word::isEmpty(t))
3104
    {
3105
10
      return returnRewrite(node, node[0], Rewrite::REPLALL_EMPTY_FIND);
3106
    }
3107
30
    std::size_t sizeS = Word::getLength(s);
3108
30
    std::size_t sizeT = Word::getLength(t);
3109
30
    std::size_t index = 0;
3110
30
    std::size_t curr = 0;
3111
65
    do
3112
    {
3113
95
      curr = Word::find(s, t, index);
3114
95
      if (curr != std::string::npos)
3115
      {
3116
65
        if (curr > index)
3117
        {
3118
32
          children.push_back(Word::substr(s, index, curr - index));
3119
        }
3120
65
        children.push_back(node[2]);
3121
65
        index = curr + sizeT;
3122
      }
3123
      else
3124
      {
3125
30
        children.push_back(Word::substr(s, index, sizeS - index));
3126
      }
3127
95
    } while (curr != std::string::npos && curr < sizeS);
3128
    // constant evaluation
3129
60
    Node res = utils::mkConcat(children, stype);
3130
30
    return returnRewrite(node, res, Rewrite::REPLALL_CONST);
3131
  }
3132
3133
  // rewrites that apply to both replace and replaceall
3134
316
  Node rri = rewriteReplaceInternal(node);
3135
158
  if (!rri.isNull())
3136
  {
3137
    // printing of the rewrite managed by the call above
3138
4
    return rri;
3139
  }
3140
3141
154
  return node;
3142
}
3143
3144
2224
Node SequencesRewriter::rewriteReplaceInternal(Node node)
3145
{
3146
2224
  Kind nk = node.getKind();
3147
2224
  Assert(nk == STRING_REPLACE || nk == STRING_REPLACE_ALL);
3148
3149
2224
  if (node[1] == node[2])
3150
  {
3151
60
    return returnRewrite(node, node[0], Rewrite::RPL_ID);
3152
  }
3153
3154
2164
  if (node[0] == node[1])
3155
  {
3156
    // only holds for replaceall if non-empty
3157
107
    if (nk == STRING_REPLACE || StringsEntail::checkNonEmpty(node[1]))
3158
    {
3159
79
      return returnRewrite(node, node[2], Rewrite::RPL_REPLACE);
3160
    }
3161
  }
3162
3163
2085
  return Node::null();
3164
}
3165
3166
403
Node SequencesRewriter::rewriteReplaceRe(Node node)
3167
{
3168
403
  Assert(node.getKind() == STRING_REPLACE_RE);
3169
403
  NodeManager* nm = NodeManager::currentNM();
3170
806
  Node x = node[0];
3171
806
  Node y = node[1];
3172
806
  Node z = node[2];
3173
3174
403
  if (RegExpEntail::isConstRegExp(y))
3175
  {
3176
271
    if (x.isConst())
3177
    {
3178
      // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z"
3179
75
      std::pair<size_t, size_t> match = firstMatch(x, y);
3180
75
      if (match.first != string::npos)
3181
      {
3182
80
        String s = x.getConst<String>();
3183
        Node ret = nm->mkNode(STRING_CONCAT,
3184
80
                              nm->mkConst(s.substr(0, match.first)),
3185
                              z,
3186
160
                              nm->mkConst(s.substr(match.second)));
3187
40
        return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL);
3188
      }
3189
      else
3190
      {
3191
35
        return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL);
3192
      }
3193
    }
3194
    // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true
3195
388
    String emptyStr("");
3196
196
    if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y))
3197
    {
3198
8
      Node ret = nm->mkNode(STRING_CONCAT, z, x);
3199
4
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE);
3200
    }
3201
  }
3202
324
  return node;
3203
}
3204
3205
261
Node SequencesRewriter::rewriteReplaceReAll(Node node)
3206
{
3207
261
  Assert(node.getKind() == STRING_REPLACE_RE_ALL);
3208
261
  NodeManager* nm = NodeManager::currentNM();
3209
522
  Node x = node[0];
3210
522
  Node y = node[1];
3211
522
  Node z = node[2];
3212
3213
261
  if (RegExpEntail::isConstRegExp(y))
3214
  {
3215
207
    if (x.isConst())
3216
    {
3217
      // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) --->
3218
      //   "Z" ++ y ++ "Z" ++ y
3219
138
      TypeNode t = x.getType();
3220
138
      Node emp = Word::mkEmptyWord(t);
3221
      Node yp = Rewriter::rewrite(
3222
138
          nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)));
3223
138
      std::vector<Node> res;
3224
138
      String rem = x.getConst<String>();
3225
69
      std::pair<size_t, size_t> match(0, 0);
3226
183
      while (rem.size() != 0)
3227
      {
3228
67
        match = firstMatch(nm->mkConst(rem), yp);
3229
67
        if (match.first == string::npos)
3230
        {
3231
10
          break;
3232
        }
3233
57
        res.push_back(nm->mkConst(rem.substr(0, match.first)));
3234
57
        res.push_back(z);
3235
57
        rem = rem.substr(match.second);
3236
      }
3237
69
      res.push_back(nm->mkConst(rem));
3238
138
      Node ret = utils::mkConcat(res, t);
3239
69
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL);
3240
    }
3241
  }
3242
3243
192
  return node;
3244
}
3245
3246
297
std::pair<size_t, size_t> SequencesRewriter::firstMatch(Node n, Node r)
3247
{
3248
297
  Assert(n.isConst() && n.getType().isStringLike());
3249
297
  Assert(r.getType().isRegExp());
3250
297
  NodeManager* nm = NodeManager::currentNM();
3251
3252
594
  Node sigmaStar = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA));
3253
594
  Node re = nm->mkNode(REGEXP_CONCAT, r, sigmaStar);
3254
594
  String s = n.getConst<String>();
3255
3256
297
  if (s.size() == 0)
3257
  {
3258
63
    if (RegExpEntail::testConstStringInRegExp(s, 0, r))
3259
    {
3260
6
      return std::make_pair(0, 0);
3261
    }
3262
    else
3263
    {
3264
57
      return std::make_pair(string::npos, string::npos);
3265
    }
3266
  }
3267
3268
341
  for (size_t i = 0, size = s.size(); i < size; i++)
3269
  {
3270
293
    if (RegExpEntail::testConstStringInRegExp(s, i, re))
3271
    {
3272
635
      for (size_t j = i; j <= size; j++)
3273
      {
3274
1084
        String substr = s.substr(i, j - i);
3275
635
        if (RegExpEntail::testConstStringInRegExp(substr, 0, r))
3276
        {
3277
186
          return std::make_pair(i, j);
3278
        }
3279
      }
3280
    }
3281
  }
3282
3283
48
  return std::make_pair(string::npos, string::npos);
3284
}
3285
3286
243
Node SequencesRewriter::rewriteStrReverse(Node node)
3287
{
3288
243
  Assert(node.getKind() == STRING_REV);
3289
243
  NodeManager* nm = NodeManager::currentNM();
3290
486
  Node x = node[0];
3291
243
  if (x.isConst())
3292
  {
3293
    // reverse the characters in the constant
3294
130
    Node retNode = Word::reverse(x);
3295
65
    return returnRewrite(node, retNode, Rewrite::STR_CONV_CONST);
3296
  }
3297
178
  else if (x.getKind() == STRING_CONCAT)
3298
  {
3299
78
    std::vector<Node> children;
3300
119
    for (const Node& nc : x)
3301
    {
3302
80
      children.push_back(nm->mkNode(STRING_REV, nc));
3303
    }
3304
39
    std::reverse(children.begin(), children.end());
3305
    // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 )
3306
78
    Node retNode = nm->mkNode(STRING_CONCAT, children);
3307
39
    return returnRewrite(node, retNode, Rewrite::STR_REV_MINSCOPE_CONCAT);
3308
  }
3309
139
  else if (x.getKind() == STRING_REV)
3310
  {
3311
    // rev( rev( x ) ) --> x
3312
10
    Node retNode = x[0];
3313
5
    return returnRewrite(node, retNode, Rewrite::STR_REV_IDEM);
3314
  }
3315
134
  return node;
3316
}
3317
3318
153
Node SequencesRewriter::rewritePrefixSuffix(Node n)
3319
{
3320
153
  Assert(n.getKind() == kind::STRING_PREFIX
3321
         || n.getKind() == kind::STRING_SUFFIX);
3322
153
  bool isPrefix = n.getKind() == kind::STRING_PREFIX;
3323
153
  if (n[0] == n[1])
3324
  {
3325
16
    Node ret = NodeManager::currentNM()->mkConst(true);
3326
8
    return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EQ);
3327
  }
3328
145
  if (n[0].isConst())
3329
  {
3330
58
    if (Word::isEmpty(n[0]))
3331
    {
3332
8
      Node ret = NodeManager::currentNM()->mkConst(true);
3333
4
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY_CONST);
3334
    }
3335
  }
3336
141
  if (n[1].isConst())
3337
  {
3338
47
    Node s = n[1];
3339
47
    size_t lenS = Word::getLength(s);
3340
47
    if (n[0].isConst())
3341
    {
3342
      Node ret = NodeManager::currentNM()->mkConst(false);
3343
      Node t = n[0];
3344
      size_t lenT = Word::getLength(t);
3345
      if (lenS >= lenT)
3346
      {
3347
        if ((isPrefix && t == Word::prefix(s, lenT))
3348
            || (!isPrefix && t == Word::suffix(s, lenT)))
3349
        {
3350
          ret = NodeManager::currentNM()->mkConst(true);
3351
        }
3352
      }
3353
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CONST);
3354
    }
3355
47
    else if (lenS == 0)
3356
    {
3357
8
      Node ret = n[0].eqNode(n[1]);
3358
4
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY);
3359
    }
3360
43
    else if (lenS == 1)
3361
    {
3362
      // (str.prefix x "A") and (str.suffix x "A") are equivalent to
3363
      // (str.contains "A" x )
3364
      Node ret =
3365
86
          NodeManager::currentNM()->mkNode(kind::STRING_CONTAINS, n[1], n[0]);
3366
43
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CTN);
3367
    }
3368
  }
3369
188
  Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]);
3370
188
  Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]);
3371
188
  Node val;
3372
94
  if (isPrefix)
3373
  {
3374
67
    val = NodeManager::currentNM()->mkConst(::cvc5::Rational(0));
3375
  }
3376
  else
3377
  {
3378
27
    val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens);
3379
  }
3380
3381
  // Check if we can turn the prefix/suffix into equalities by showing that the
3382
  // prefix/suffix is at least as long as the string
3383
188
  Node eqs = StringsEntail::inferEqsFromContains(n[1], n[0]);
3384
94
  if (!eqs.isNull())
3385
  {
3386
8
    return returnRewrite(n, eqs, Rewrite::SUF_PREFIX_TO_EQS);
3387
  }
3388
3389
  // general reduction to equality + substr
3390
  Node retNode = n[0].eqNode(
3391
172
      NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens));
3392
3393
86
  return returnRewrite(n, retNode, Rewrite::SUF_PREFIX_ELIM);
3394
}
3395
3396
81
Node SequencesRewriter::lengthPreserveRewrite(Node n)
3397
{
3398
81
  NodeManager* nm = NodeManager::currentNM();
3399
162
  Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
3400
162
  Node res = canonicalStrForSymbolicLength(len, n.getType());
3401
162
  return res.isNull() ? n : res;
3402
}
3403
3404
93
Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
3405
{
3406
93
  NodeManager* nm = NodeManager::currentNM();
3407
3408
186
  Node res;
3409
93
  if (len.getKind() == CONST_RATIONAL)
3410
  {
3411
    // c -> "A" repeated c times
3412
162
    Rational ratLen = len.getConst<Rational>();
3413
81
    Assert(ratLen.getDenominator() == 1);
3414
162
    Integer intLen = ratLen.getNumerator();
3415
81
    uint32_t u = intLen.getUnsignedInt();
3416
81
    if (stype.isString())  // string-only
3417
    {
3418
81
      res = nm->mkConst(String(std::string(u, 'A')));
3419
    }
3420
    // we could do this for sequences, but we need to be careful: some
3421
    // sorts do not permit values that the solver can handle (e.g. uninterpreted
3422
    // sorts and arrays).
3423
  }
3424
12
  else if (len.getKind() == PLUS)
3425
  {
3426
    // x + y -> norm(x) + norm(y)
3427
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3428
12
    for (const auto& n : len)
3429
    {
3430
16
      Node sn = canonicalStrForSymbolicLength(n, stype);
3431
8
      if (sn.isNull())
3432
      {
3433
        return Node::null();
3434
      }
3435
16
      std::vector<Node> snChildren;
3436
8
      utils::getConcat(sn, snChildren);
3437
8
      concatBuilder.append(snChildren);
3438
    }
3439
4
    res = concatBuilder.constructNode();
3440
  }
3441
20
  else if (len.getKind() == MULT && len.getNumChildren() == 2
3442
20
           && len[0].isConst())
3443
  {
3444
    // c * x -> norm(x) repeated c times
3445
8
    Rational ratReps = len[0].getConst<Rational>();
3446
4
    Assert(ratReps.getDenominator() == 1);
3447
8
    Integer intReps = ratReps.getNumerator();
3448
3449
8
    Node nRep = canonicalStrForSymbolicLength(len[1], stype);
3450
4
    if (nRep.isNull())
3451
    {
3452
      return Node::null();
3453
    }
3454
8
    std::vector<Node> nRepChildren;
3455
4
    utils::getConcat(nRep, nRepChildren);
3456
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3457
12
    for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
3458
    {
3459
8
      concatBuilder.append(nRepChildren);
3460
    }
3461
4
    res = concatBuilder.constructNode();
3462
  }
3463
4
  else if (len.getKind() == STRING_LENGTH)
3464
  {
3465
    // len(x) -> x
3466
4
    res = len[0];
3467
  }
3468
93
  return res;
3469
}
3470
3471
403
Node SequencesRewriter::rewriteSeqUnit(Node node)
3472
{
3473
403
  NodeManager* nm = NodeManager::currentNM();
3474
403
  if (node[0].isConst())
3475
  {
3476
274
    std::vector<Node> seq;
3477
137
    seq.push_back(node[0]);
3478
274
    TypeNode stype = node[0].getType();
3479
274
    Node ret = nm->mkConst(Sequence(stype, seq));
3480
137
    return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL);
3481
  }
3482
266
  return node;
3483
}
3484
3485
194481
Node SequencesRewriter::returnRewrite(Node node,
3486
                                      Node ret,
3487
                                      Rewrite r,
3488
                                      bool rewriteEqAgain)
3489
{
3490
388962
  Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << r
3491
194481
                           << "." << std::endl;
3492
3493
194481
  NodeManager* nm = NodeManager::currentNM();
3494
3495
194481
  if (d_statistics != nullptr)
3496
  {
3497
194415
    (*d_statistics) << r;
3498
  }
3499
3500
  // standard post-processing
3501
  // We rewrite (string) equalities immediately here. This allows us to forego
3502
  // the standard invariant on equality rewrites (that s=t must rewrite to one
3503
  // of { s=t, t=s, true, false } ).
3504
194481
  Kind retk = ret.getKind();
3505
194481
  if (retk == OR || retk == AND)
3506
  {
3507
15690
    std::vector<Node> children;
3508
7845
    bool childChanged = false;
3509
26462
    for (const Node& cret : ret)
3510
    {
3511
37234
      Node creter = cret;
3512
18617
      if (cret.getKind() == EQUAL)
3513
      {
3514
12306
        creter = rewriteEqualityExt(cret);
3515
      }
3516
6311
      else if (cret.getKind() == NOT && cret[0].getKind() == EQUAL)
3517
      {
3518
360
        creter = nm->mkNode(NOT, rewriteEqualityExt(cret[0]));
3519
      }
3520
18617
      childChanged = childChanged || cret != creter;
3521
18617
      children.push_back(creter);
3522
    }
3523
7845
    if (childChanged)
3524
    {
3525
2464
      ret = nm->mkNode(retk, children);
3526
7845
    }
3527
  }
3528
186636
  else if (retk == NOT && ret[0].getKind() == EQUAL)
3529
  {
3530
    ret = nm->mkNode(NOT, rewriteEqualityExt(ret[0]));
3531
  }
3532
186636
  else if (retk == EQUAL && (rewriteEqAgain || node.getKind() != EQUAL))
3533
  {
3534
91866
    Trace("strings-rewrite")
3535
45933
        << "Apply extended equality rewrite on " << ret << std::endl;
3536
45933
    ret = rewriteEqualityExt(ret);
3537
  }
3538
194481
  return ret;
3539
}
3540
3541
}  // namespace strings
3542
}  // namespace theory
3543
29280
}  // namespace cvc5