GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/sequences_rewriter.cpp Lines: 1561 1619 96.4 %
Date: 2021-05-21 Branches: 4893 9422 51.9 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of the theory of strings.
14
 */
15
16
#include "theory/strings/sequences_rewriter.h"
17
18
#include "expr/attribute.h"
19
#include "expr/node_builder.h"
20
#include "expr/sequence.h"
21
#include "theory/rewriter.h"
22
#include "theory/strings/arith_entail.h"
23
#include "theory/strings/regexp_entail.h"
24
#include "theory/strings/skolem_cache.h"
25
#include "theory/strings/strings_rewriter.h"
26
#include "theory/strings/theory_strings_utils.h"
27
#include "theory/strings/word.h"
28
#include "util/statistics_registry.h"
29
30
using namespace std;
31
using namespace cvc5::kind;
32
33
namespace cvc5 {
34
namespace theory {
35
namespace strings {
36
37
9145
SequencesRewriter::SequencesRewriter(HistogramStat<Rewrite>* statistics)
38
9145
    : d_statistics(statistics), d_stringsEntail(*this)
39
{
40
9145
}
41
42
89083
Node SequencesRewriter::rewriteEquality(Node node)
43
{
44
89083
  Assert(node.getKind() == kind::EQUAL);
45
89083
  if (node[0] == node[1])
46
  {
47
6830
    Node ret = NodeManager::currentNM()->mkConst(true);
48
3415
    return returnRewrite(node, ret, Rewrite::EQ_REFL);
49
  }
50
85668
  else if (node[0].isConst() && node[1].isConst())
51
  {
52
4032
    Node ret = NodeManager::currentNM()->mkConst(false);
53
2016
    return returnRewrite(node, ret, Rewrite::EQ_CONST_FALSE);
54
  }
55
56
  // ( ~contains( s, t ) V ~contains( t, s ) ) => ( s == t ---> false )
57
250388
  for (unsigned r = 0; r < 2; r++)
58
  {
59
    // must call rewrite contains directly to avoid infinite loop
60
    // we do a fix point since we may rewrite contains terms to simpler
61
    // contains terms.
62
333889
    Node ctn = d_stringsEntail.checkContains(node[r], node[1 - r], false);
63
167153
    if (!ctn.isNull())
64
    {
65
26870
      if (!ctn.getConst<bool>())
66
      {
67
417
        return returnRewrite(node, ctn, Rewrite::EQ_NCTN);
68
      }
69
      else
70
      {
71
        // definitely contains but not syntactically equal
72
        // We may be able to simplify, e.g.
73
        //  str.++( x, "a" ) == "a"  ----> x = ""
74
      }
75
    }
76
  }
77
78
  // ( len( s ) != len( t ) ) => ( s == t ---> false )
79
  // This covers cases like str.++( x, x ) == "a" ---> false
80
166470
  Node len0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
81
166470
  Node len1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
82
166470
  Node len_eq = len0.eqNode(len1);
83
83235
  len_eq = Rewriter::rewrite(len_eq);
84
83235
  if (len_eq.isConst() && !len_eq.getConst<bool>())
85
  {
86
5
    return returnRewrite(node, len_eq, Rewrite::EQ_LEN_DEQ);
87
  }
88
89
166460
  std::vector<Node> c[2];
90
249690
  for (unsigned i = 0; i < 2; i++)
91
  {
92
166460
    utils::getConcat(node[i], c[i]);
93
  }
94
95
  // check if the prefix, suffix mismatches
96
  //   For example, str.++( x, "a", y ) == str.++( x, "bc", z ) ---> false
97
83230
  unsigned minsize = std::min(c[0].size(), c[1].size());
98
249646
  for (unsigned r = 0; r < 2; r++)
99
  {
100
174763
    for (unsigned i = 0; i < minsize; i++)
101
    {
102
173252
      unsigned index1 = r == 0 ? i : (c[0].size() - 1) - i;
103
173252
      unsigned index2 = r == 0 ? i : (c[1].size() - 1) - i;
104
181574
      Node s = c[0][index1];
105
181574
      Node t = c[1][index2];
106
173252
      if (s.isConst() && t.isConst())
107
      {
108
3697
        size_t lenS = Word::getLength(s);
109
3697
        size_t lenT = Word::getLength(t);
110
3697
        size_t lenShort = lenS <= lenT ? lenS : lenT;
111
9736
        bool isSameFix = r == 1 ? Word::rstrncmp(s, t, lenShort)
112
6039
                                : Word::strncmp(s, t, lenShort);
113
3697
        if (!isSameFix)
114
        {
115
50
          Node ret = NodeManager::currentNM()->mkConst(false);
116
25
          return returnRewrite(node, ret, Rewrite::EQ_NFIX);
117
        }
118
      }
119
173227
      if (s != t)
120
      {
121
164905
        break;
122
      }
123
    }
124
  }
125
126
  // standard ordering
127
83205
  if (node[0] > node[1])
128
  {
129
36378
    Node ret = NodeManager::currentNM()->mkNode(kind::EQUAL, node[1], node[0]);
130
18189
    return returnRewrite(node, ret, Rewrite::EQ_SYM);
131
  }
132
65016
  return node;
133
}
134
135
79160
Node SequencesRewriter::rewriteEqualityExt(Node node)
136
{
137
79160
  Assert(node.getKind() == EQUAL);
138
79160
  if (node[0].getType().isInteger())
139
  {
140
11673
    return rewriteArithEqualityExt(node);
141
  }
142
67487
  if (node[0].getType().isStringLike())
143
  {
144
67487
    return rewriteStrEqualityExt(node);
145
  }
146
  return node;
147
}
148
149
67487
Node SequencesRewriter::rewriteStrEqualityExt(Node node)
150
{
151
67487
  Assert(node.getKind() == EQUAL && node[0].getType().isStringLike());
152
134974
  TypeNode stype = node[0].getType();
153
154
67487
  NodeManager* nm = NodeManager::currentNM();
155
134974
  std::vector<Node> c[2];
156
134974
  Node new_ret;
157
202461
  for (unsigned i = 0; i < 2; i++)
158
  {
159
134974
    utils::getConcat(node[i], c[i]);
160
  }
161
  // ------- equality unification
162
67487
  bool changed = false;
163
202461
  for (unsigned i = 0; i < 2; i++)
164
  {
165
139516
    while (!c[0].empty() && !c[1].empty() && c[0].back() == c[1].back())
166
    {
167
2271
      c[0].pop_back();
168
2271
      c[1].pop_back();
169
2271
      changed = true;
170
    }
171
    // splice constants
172
401749
    if (!c[0].empty() && !c[1].empty() && c[0].back().isConst()
173
247130
        && c[1].back().isConst())
174
    {
175
878
      Node cs[2];
176
      size_t csl[2];
177
1317
      for (unsigned j = 0; j < 2; j++)
178
      {
179
878
        cs[j] = c[j].back();
180
878
        csl[j] = Word::getLength(cs[j]);
181
      }
182
439
      size_t larger = csl[0] > csl[1] ? 0 : 1;
183
439
      size_t smallerSize = csl[1 - larger];
184
878
      if (cs[1 - larger]
185
878
          == (i == 0 ? Word::suffix(cs[larger], smallerSize)
186
                     : Word::prefix(cs[larger], smallerSize)))
187
      {
188
419
        size_t sizeDiff = csl[larger] - smallerSize;
189
419
        c[larger][c[larger].size() - 1] =
190
838
            i == 0 ? Word::prefix(cs[larger], sizeDiff)
191
                   : Word::suffix(cs[larger], sizeDiff);
192
419
        c[1 - larger].pop_back();
193
419
        changed = true;
194
      }
195
    }
196
404922
    for (unsigned j = 0; j < 2; j++)
197
    {
198
269948
      std::reverse(c[j].begin(), c[j].end());
199
    }
200
  }
201
67487
  if (changed)
202
  {
203
    // e.g. x++y = x++z ---> y = z, "AB" ++ x = "A" ++ y --> "B" ++ x = y
204
5138
    Node s1 = utils::mkConcat(c[0], stype);
205
5138
    Node s2 = utils::mkConcat(c[1], stype);
206
2569
    new_ret = s1.eqNode(s2);
207
2569
    node = returnRewrite(node, new_ret, Rewrite::STR_EQ_UNIFY);
208
  }
209
210
  // ------- homogeneous constants
211
202446
  for (unsigned i = 0; i < 2; i++)
212
  {
213
269933
    Node cn = StringsEntail::checkHomogeneousString(node[i]);
214
134974
    if (!cn.isNull() && !Word::isEmpty(cn))
215
    {
216
21908
      Assert(cn.isConst());
217
21908
      Assert(Word::getLength(cn) == 1);
218
219
      // The operands of the concat on each side of the equality without
220
      // constant strings
221
65724
      std::vector<Node> trimmed[2];
222
      // Counts the number of `cn`s on each side
223
21908
      size_t numCns[2] = {0, 0};
224
65694
      for (size_t j = 0; j < 2; j++)
225
      {
226
        // Sort the operands of the concats on both sides of the equality
227
        // (since both sides may only contain one char, the order does not
228
        // matter)
229
43801
        std::sort(c[j].begin(), c[j].end());
230
87460
        for (const Node& cc : c[j])
231
        {
232
43674
          if (cc.isConst())
233
          {
234
            // Count the number of `cn`s in the string constant and make
235
            // sure that all chars are `cn`s
236
43629
            std::vector<Node> veccc = Word::getChars(cc);
237
43244
            for (const Node& cv : veccc)
238
            {
239
21437
              if (cv != cn)
240
              {
241
                // This conflict case should mostly should be taken care of by
242
                // multiset reasoning in the strings rewriter, but we recognize
243
                // this conflict just in case.
244
15
                new_ret = nm->mkConst(false);
245
                return returnRewrite(
246
15
                    node, new_ret, Rewrite::STR_EQ_CONST_NHOMOG);
247
              }
248
21422
              numCns[j]++;
249
            }
250
          }
251
          else
252
          {
253
21852
            trimmed[j].push_back(cc);
254
          }
255
        }
256
      }
257
258
      // We have to remove the same number of `cn`s from both sides, so the
259
      // side with less `cn`s determines how many we can remove
260
21893
      size_t trimmedConst = std::min(numCns[0], numCns[1]);
261
65679
      for (size_t j = 0; j < 2; j++)
262
      {
263
43786
        size_t diff = numCns[j] - trimmedConst;
264
43786
        if (diff != 0)
265
        {
266
          // Add a constant string to the side with more `cn`s to restore
267
          // the difference in number of `cn`s
268
39354
          std::vector<Node> vec(diff, cn);
269
19677
          trimmed[j].push_back(Word::mkWordFlatten(vec));
270
        }
271
      }
272
273
43786
      Node lhs = utils::mkConcat(trimmed[i], stype);
274
43786
      Node ss = utils::mkConcat(trimmed[1 - i], stype);
275
21893
      if (lhs != node[i] || ss != node[1 - i])
276
      {
277
        // e.g.
278
        //  "AA" = y ++ x ---> "AA" = x ++ y if x < y
279
        //  "AAA" = y ++ "A" ++ z ---> "AA" = y ++ z
280
24
        new_ret = lhs.eqNode(ss);
281
24
        node = returnRewrite(node, new_ret, Rewrite::STR_EQ_HOMOG_CONST);
282
      }
283
    }
284
  }
285
286
  // ------- rewrites for (= "" _)
287
134944
  Node empty = Word::mkEmptyWord(stype);
288
190866
  for (size_t i = 0; i < 2; i++)
289
  {
290
129251
    if (node[i] == empty)
291
    {
292
83075
      Node ne = node[1 - i];
293
44466
      if (ne.getKind() == STRING_STRREPL)
294
      {
295
        // (= "" (str.replace x y x)) ---> (= x "")
296
1748
        if (ne[0] == ne[2])
297
        {
298
368
          Node ret = nm->mkNode(EQUAL, ne[0], empty);
299
184
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_X_Y_X);
300
        }
301
302
        // (= "" (str.replace x y "A")) ---> (and (= x "") (not (= y "")))
303
1564
        if (StringsEntail::checkNonEmpty(ne[2]))
304
        {
305
          Node ret =
306
              nm->mkNode(AND,
307
660
                         nm->mkNode(EQUAL, ne[0], empty),
308
1320
                         nm->mkNode(NOT, nm->mkNode(EQUAL, ne[1], empty)));
309
330
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
310
        }
311
312
        // (= "" (str.replace x "A" "")) ---> (str.prefix x "A")
313
1234
        if (StringsEntail::checkLengthOne(ne[1], true) && ne[2] == empty)
314
        {
315
426
          Node ret = nm->mkNode(STRING_PREFIX, ne[0], ne[1]);
316
213
          return returnRewrite(node, ret, Rewrite::STR_EMP_REPL_EMP);
317
        }
318
      }
319
42718
      else if (ne.getKind() == STRING_SUBSTR)
320
      {
321
33186
        Node zero = nm->mkConst(Rational(0));
322
323
19158
        if (ArithEntail::check(ne[1], false) && ArithEntail::check(ne[2], true))
324
        {
325
          // (= "" (str.substr x 0 m)) ---> (= "" x) if m > 0
326
3730
          if (ne[1] == zero)
327
          {
328
2636
            Node ret = nm->mkNode(EQUAL, ne[0], empty);
329
1318
            return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
330
          }
331
332
          // (= "" (str.substr x n m)) ---> (<= (str.len x) n)
333
          // if n >= 0 and m > 0
334
4824
          Node ret = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, ne[0]), ne[1]);
335
2412
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_LEN);
336
        }
337
338
        // (= "" (str.substr "A" 0 z)) ---> (<= z 0)
339
15428
        if (StringsEntail::checkNonEmpty(ne[0]) && ne[1] == zero)
340
        {
341
2800
          Node ret = nm->mkNode(LEQ, ne[2], zero);
342
1400
          return returnRewrite(node, ret, Rewrite::STR_EMP_SUBSTR_LEQ_Z);
343
        }
344
      }
345
    }
346
  }
347
348
  // ------- rewrites for (= (str.replace _ _ _) _)
349
184457
  for (size_t i = 0; i < 2; i++)
350
  {
351
123186
    if (node[i].getKind() == STRING_STRREPL)
352
    {
353
4966
      Node repl = node[i];
354
4966
      Node x = node[1 - i];
355
356
      // (= "A" (str.replace "" x y)) ---> (= "" (str.replace "A" y x))
357
2655
      if (StringsEntail::checkNonEmpty(x) && repl[0] == empty)
358
      {
359
        Node ret = nm->mkNode(
360
68
            EQUAL, empty, nm->mkNode(STRING_STRREPL, x, repl[2], repl[1]));
361
34
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_EMP);
362
      }
363
364
      // (= x (str.replace y x y)) ---> (= x y)
365
2621
      if (repl[0] == repl[2] && x == repl[1])
366
      {
367
4
        Node ret = nm->mkNode(EQUAL, x, repl[0]);
368
2
        return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_EQ);
369
      }
370
371
      // (= x (str.replace x "A" "B")) ---> (not (str.contains x "A"))
372
2619
      if (x == repl[0])
373
      {
374
2368
        Node eq = Rewriter::rewrite(nm->mkNode(EQUAL, repl[1], repl[2]));
375
1336
        if (eq.isConst() && !eq.getConst<bool>())
376
        {
377
608
          Node ret = nm->mkNode(NOT, nm->mkNode(STRING_STRCTN, x, repl[1]));
378
304
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_NOT_CTN);
379
        }
380
      }
381
382
      // (= (str.replace x y z) z) --> (or (= x y) (= x z))
383
      // if (str.len y) = (str.len z)
384
2315
      if (repl[2] == x)
385
      {
386
2524
        Node lenY = nm->mkNode(STRING_LENGTH, repl[1]);
387
2524
        Node lenZ = nm->mkNode(STRING_LENGTH, repl[2]);
388
1264
        if (ArithEntail::checkEq(lenY, lenZ))
389
        {
390
          Node ret = nm->mkNode(OR,
391
8
                                nm->mkNode(EQUAL, repl[0], repl[1]),
392
16
                                nm->mkNode(EQUAL, repl[0], repl[2]));
393
4
          return returnRewrite(node, ret, Rewrite::STR_EQ_REPL_TO_DIS);
394
        }
395
      }
396
    }
397
  }
398
399
  // Try to rewrite (= x y) into a conjunction of equalities based on length
400
  // entailment.
401
  //
402
  // (<= (str.len x) (str.++ y1 ... yn)) AND (= x (str.++ y1 ... yn)) --->
403
  //  (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
404
  //
405
  // where yi' and yi'' correspond to some yj and
406
  //   (<= (str.len x) (str.++ y1' ... ym'))
407
181432
  for (unsigned i = 0; i < 2; i++)
408
  {
409
121362
    if (node[1 - i].getKind() == STRING_CONCAT)
410
    {
411
1553
      new_ret = StringsEntail::inferEqsFromContains(node[i], node[1 - i]);
412
1553
      if (!new_ret.isNull())
413
      {
414
1201
        return returnRewrite(node, new_ret, Rewrite::STR_EQ_CONJ_LEN_ENTAIL);
415
      }
416
    }
417
  }
418
419
60070
  if (node[0].getKind() == STRING_CONCAT && node[1].getKind() == STRING_CONCAT)
420
  {
421
    // (= (str.++ x_1 ... x_i x_{i + 1} ... x_n)
422
    //    (str.++ y_1 ... y_j y_{j + 1} ... y_m)) --->
423
    //  (and (= (str.++ x_1 ... x_i) (str.++ y_1 ... y_j))
424
    //       (= (str.++ x_{i + 1} ... x_n) (str.++ y_{j + 1} ... y_m)))
425
    //
426
    // if (str.len (str.++ x_1 ... x_i)) = (str.len (str.++ y_1 ... y_j))
427
    //
428
    // This rewrite performs length-based equality splitting: If we can show
429
    // that two prefixes have the same length, we can split an equality into
430
    // two equalities, one over the prefixes and another over the suffixes.
431
34
    std::vector<Node> v0, v1;
432
21
    utils::getConcat(node[0], v0);
433
21
    utils::getConcat(node[1], v1);
434
21
    size_t startRhs = 0;
435
84
    for (size_t i = 0, size0 = v0.size(); i <= size0; i++)
436
    {
437
134
      std::vector<Node> pfxv0(v0.begin(), v0.begin() + i);
438
134
      Node pfx0 = utils::mkConcat(pfxv0, stype);
439
259
      for (size_t j = startRhs, size1 = v1.size(); j <= size1; j++)
440
      {
441
219
        if (!(i == 0 && j == 0) && !(i == v0.size() && j == v1.size()))
442
        {
443
339
          std::vector<Node> pfxv1(v1.begin(), v1.begin() + j);
444
339
          Node pfx1 = utils::mkConcat(pfxv1, stype);
445
339
          Node lenPfx0 = nm->mkNode(STRING_LENGTH, pfx0);
446
339
          Node lenPfx1 = nm->mkNode(STRING_LENGTH, pfx1);
447
448
185
          if (ArithEntail::checkEq(lenPfx0, lenPfx1))
449
          {
450
8
            std::vector<Node> sfxv0(v0.begin() + i, v0.end());
451
8
            std::vector<Node> sfxv1(v1.begin() + j, v1.end());
452
            Node ret = nm->mkNode(kind::AND,
453
8
                                  pfx0.eqNode(pfx1),
454
8
                                  utils::mkConcat(sfxv0, stype)
455
16
                                      .eqNode(utils::mkConcat(sfxv1, stype)));
456
4
            return returnRewrite(node, ret, Rewrite::SPLIT_EQ);
457
          }
458
181
          else if (ArithEntail::check(lenPfx1, lenPfx0, true))
459
          {
460
            // The prefix on the right-hand side is strictly longer than the
461
            // prefix on the left-hand side, so we try to strip the right-hand
462
            // prefix by the length of the left-hand prefix
463
            //
464
            // Example:
465
            // (= (str.++ "A" x y) (str.++ x "AB" z)) --->
466
            //   (and (= (str.++ "A" x) (str.++ x "A")) (= y (str.++ "B" z)))
467
23
            std::vector<Node> rpfxv1;
468
23
            if (StringsEntail::stripSymbolicLength(
469
                    pfxv1, rpfxv1, 1, lenPfx0, true))
470
            {
471
              // The rewrite requires the full left-hand prefix length to be
472
              // stripped (otherwise we would have to keep parts of the
473
              // left-hand prefix).
474
              if (lenPfx0.isConst() && lenPfx0.getConst<Rational>().isZero())
475
              {
476
                std::vector<Node> sfxv0(v0.begin() + i, v0.end());
477
                pfxv1.insert(pfxv1.end(), v1.begin() + j, v1.end());
478
                Node ret =
479
                    nm->mkNode(kind::AND,
480
                               pfx0.eqNode(utils::mkConcat(rpfxv1, stype)),
481
                               utils::mkConcat(sfxv0, stype)
482
                                   .eqNode(utils::mkConcat(pfxv1, stype)));
483
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_R);
484
              }
485
            }
486
487
            // If the prefix of the right-hand side is (strictly) longer than
488
            // the prefix of the left-hand side, we can advance the left-hand
489
            // side (since the length of the right-hand side is only increasing
490
            // in the inner loop)
491
23
            break;
492
          }
493
158
          else if (ArithEntail::check(lenPfx0, lenPfx1, true))
494
          {
495
            // The prefix on the left-hand side is strictly longer than the
496
            // prefix on the right-hand side, so we try to strip the left-hand
497
            // prefix by the length of the right-hand prefix
498
            //
499
            // Example:
500
            // (= (str.++ x "AB" z) (str.++ "A" x y)) --->
501
            //   (and (= (str.++ x "A") (str.++ "A" x)) (= (str.++ "B" z) y))
502
62
            std::vector<Node> rpfxv0;
503
33
            if (StringsEntail::stripSymbolicLength(
504
                    pfxv0, rpfxv0, 1, lenPfx1, true))
505
            {
506
              // The rewrite requires the full right-hand prefix length to be
507
              // stripped (otherwise we would have to keep parts of the
508
              // right-hand prefix).
509
6
              if (lenPfx1.isConst() && lenPfx1.getConst<Rational>().isZero())
510
              {
511
4
                pfxv0.insert(pfxv0.end(), v0.begin() + i, v0.end());
512
8
                std::vector<Node> sfxv1(v1.begin() + j, v1.end());
513
                Node ret =
514
                    nm->mkNode(kind::AND,
515
8
                               utils::mkConcat(rpfxv0, stype).eqNode(pfx1),
516
8
                               utils::mkConcat(pfxv0, stype)
517
16
                                   .eqNode(utils::mkConcat(sfxv1, stype)));
518
4
                return returnRewrite(node, ret, Rewrite::SPLIT_EQ_STRIP_L);
519
              }
520
            }
521
522
            // If the prefix of the left-hand side is (strictly) longer than
523
            // the prefix of the right-hand side, then we don't need to check
524
            // that right-hand prefix for future left-hand prefixes anymore
525
            // (since they are increasing in length)
526
29
            startRhs = j + 1;
527
          }
528
        }
529
      }
530
    }
531
  }
532
533
60062
  return node;
534
}
535
536
11673
Node SequencesRewriter::rewriteArithEqualityExt(Node node)
537
{
538
11673
  Assert(node.getKind() == EQUAL && node[0].getType().isInteger());
539
540
  // cases where we can solve the equality
541
542
  // notice we cannot rewrite str.to.int(x)=n to x="n" due to leading zeroes.
543
544
11673
  return node;
545
}
546
547
153191
Node SequencesRewriter::rewriteLength(Node node)
548
{
549
153191
  Assert(node.getKind() == STRING_LENGTH);
550
153191
  NodeManager* nm = NodeManager::currentNM();
551
153191
  Kind nk0 = node[0].getKind();
552
153191
  if (node[0].isConst())
553
  {
554
10934
    Node retNode = nm->mkConst(Rational(Word::getLength(node[0])));
555
5467
    return returnRewrite(node, retNode, Rewrite::LEN_EVAL);
556
  }
557
147724
  else if (nk0 == kind::STRING_CONCAT)
558
  {
559
7588
    Node tmpNode = node[0];
560
7588
    if (tmpNode.getKind() == kind::STRING_CONCAT)
561
    {
562
15176
      std::vector<Node> node_vec;
563
29979
      for (unsigned int i = 0; i < tmpNode.getNumChildren(); ++i)
564
      {
565
22391
        if (tmpNode[i].isConst())
566
        {
567
5069
          node_vec.push_back(
568
10138
              nm->mkConst(Rational(Word::getLength(tmpNode[i]))));
569
        }
570
        else
571
        {
572
17322
          node_vec.push_back(NodeManager::currentNM()->mkNode(
573
              kind::STRING_LENGTH, tmpNode[i]));
574
        }
575
      }
576
15176
      Node retNode = NodeManager::currentNM()->mkNode(kind::PLUS, node_vec);
577
7588
      return returnRewrite(node, retNode, Rewrite::LEN_CONCAT);
578
    }
579
  }
580
140136
  else if (nk0 == STRING_STRREPL || nk0 == STRING_STRREPLALL)
581
  {
582
1783
    Node len1 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][1]));
583
1783
    Node len2 = Rewriter::rewrite(nm->mkNode(STRING_LENGTH, node[0][2]));
584
911
    if (len1 == len2)
585
    {
586
      // len( y ) == len( z ) => len( str.replace( x, y, z ) ) ---> len( x )
587
78
      Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
588
39
      return returnRewrite(node, retNode, Rewrite::LEN_REPL_INV);
589
872
    }
590
  }
591
139225
  else if (nk0 == STRING_TOLOWER || nk0 == STRING_TOUPPER || nk0 == STRING_REV
592
139119
           || nk0 == STRING_UPDATE)
593
  {
594
    // len( f( x ) ) == len( x ) where f is tolower, toupper, or rev.
595
    // len( update( x, n, y ) ) = len( x )
596
240
    Node retNode = nm->mkNode(STRING_LENGTH, node[0][0]);
597
120
    return returnRewrite(node, retNode, Rewrite::LEN_CONV_INV);
598
  }
599
139105
  else if (nk0 == SEQ_UNIT)
600
  {
601
186
    Node retNode = nm->mkConst(Rational(1));
602
93
    return returnRewrite(node, retNode, Rewrite::LEN_SEQ_UNIT);
603
  }
604
139884
  return node;
605
}
606
607
// TODO (#1180) add rewrite
608
//  str.++( str.substr( x, n1, n2 ), str.substr( x, n1+n2, n3 ) ) --->
609
//  str.substr( x, n1, n2+n3 )
610
38295
Node SequencesRewriter::rewriteConcat(Node node)
611
{
612
38295
  Assert(node.getKind() == kind::STRING_CONCAT);
613
76590
  Trace("strings-rewrite-debug")
614
38295
      << "Strings::rewriteConcat start " << node << std::endl;
615
76590
  std::vector<Node> node_vec;
616
76590
  Node preNode = Node::null();
617
152126
  for (Node tmpNode : node)
618
  {
619
113831
    if (tmpNode.getKind() == STRING_CONCAT)
620
    {
621
618
      unsigned j = 0;
622
      // combine the first term with the previous constant if applicable
623
618
      if (!preNode.isNull())
624
      {
625
202
        if (tmpNode[0].isConst())
626
        {
627
224
          std::vector<Node> wvec;
628
112
          wvec.push_back(preNode);
629
112
          wvec.push_back(tmpNode[0]);
630
112
          preNode = Word::mkWordFlatten(wvec);
631
112
          node_vec.push_back(preNode);
632
        }
633
        else
634
        {
635
90
          node_vec.push_back(preNode);
636
90
          node_vec.push_back(tmpNode[0]);
637
        }
638
202
        preNode = Node::null();
639
202
        ++j;
640
      }
641
      // insert the middle terms to node_vec
642
618
      if (j <= tmpNode.getNumChildren() - 1)
643
      {
644
618
        node_vec.insert(node_vec.end(), tmpNode.begin() + j, tmpNode.end() - 1);
645
      }
646
      // take the last term as the current
647
618
      tmpNode = tmpNode[tmpNode.getNumChildren() - 1];
648
    }
649
113831
    if (!tmpNode.isConst())
650
    {
651
66020
      if (!preNode.isNull())
652
      {
653
12203
        if (preNode.isConst() && !Word::isEmpty(preNode))
654
        {
655
11590
          node_vec.push_back(preNode);
656
        }
657
12203
        preNode = Node::null();
658
      }
659
66020
      node_vec.push_back(tmpNode);
660
    }
661
    else
662
    {
663
47811
      if (preNode.isNull())
664
      {
665
30382
        preNode = tmpNode;
666
      }
667
      else
668
      {
669
34858
        std::vector<Node> vec;
670
17429
        vec.push_back(preNode);
671
17429
        vec.push_back(tmpNode);
672
17429
        preNode = Word::mkWordFlatten(vec);
673
      }
674
    }
675
  }
676
38295
  if (!preNode.isNull() && (!preNode.isConst() || !Word::isEmpty(preNode)))
677
  {
678
17178
    node_vec.push_back(preNode);
679
  }
680
681
  // Sort adjacent operands in str.++ that all result in the same string or the
682
  // empty string.
683
  //
684
  // E.g.: (str.++ ... (str.replace "A" x "") "A" (str.substr "A" 0 z) ...) -->
685
  // (str.++ ... [sort those 3 arguments] ... )
686
38295
  size_t lastIdx = 0;
687
76590
  Node lastX;
688
134603
  for (size_t i = 0, nsize = node_vec.size(); i < nsize; i++)
689
  {
690
192616
    Node s = StringsEntail::getStringOrEmpty(node_vec[i]);
691
96308
    bool nextX = false;
692
96308
    if (s != lastX)
693
    {
694
92731
      nextX = true;
695
    }
696
697
96308
    if (nextX)
698
    {
699
92731
      std::sort(node_vec.begin() + lastIdx, node_vec.begin() + i);
700
92731
      lastX = s;
701
92731
      lastIdx = i;
702
    }
703
  }
704
38295
  std::sort(node_vec.begin() + lastIdx, node_vec.end());
705
706
76590
  TypeNode tn = node.getType();
707
76590
  Node retNode = utils::mkConcat(node_vec, tn);
708
76590
  Trace("strings-rewrite-debug")
709
38295
      << "Strings::rewriteConcat end " << retNode << std::endl;
710
38295
  if (retNode != node)
711
  {
712
12291
    return returnRewrite(node, retNode, Rewrite::CONCAT_NORM);
713
  }
714
26004
  return node;
715
}
716
717
2350
Node SequencesRewriter::rewriteConcatRegExp(TNode node)
718
{
719
2350
  Assert(node.getKind() == kind::REGEXP_CONCAT);
720
2350
  NodeManager* nm = NodeManager::currentNM();
721
4700
  Trace("strings-rewrite-debug")
722
2350
      << "Strings::rewriteConcatRegExp flatten " << node << std::endl;
723
4700
  Node retNode = node;
724
4700
  std::vector<Node> vec;
725
2350
  bool changed = false;
726
4700
  Node emptyRe;
727
728
  // get the string type that are members of this regular expression
729
4700
  TypeNode rtype = node.getType();
730
4700
  TypeNode stype;
731
2350
  if (rtype.isRegExp())
732
  {
733
    // standard regular expressions are for strings
734
2350
    stype = nm->stringType();
735
  }
736
  else
737
  {
738
    Unimplemented();
739
  }
740
741
9145
  for (const Node& c : node)
742
  {
743
6829
    if (c.getKind() == REGEXP_CONCAT)
744
    {
745
177
      changed = true;
746
567
      for (const Node& cc : c)
747
      {
748
390
        vec.push_back(cc);
749
      }
750
    }
751
16175
    else if (c.getKind() == STRING_TO_REGEXP && c[0].isConst()
752
16125
             && Word::isEmpty(c[0]))
753
    {
754
108
      changed = true;
755
108
      emptyRe = c;
756
    }
757
6544
    else if (c.getKind() == REGEXP_EMPTY)
758
    {
759
      // re.++( ..., empty, ... ) ---> empty
760
68
      std::vector<Node> nvec;
761
68
      Node ret = nm->mkNode(REGEXP_EMPTY, nvec);
762
34
      return returnRewrite(node, ret, Rewrite::RE_CONCAT_EMPTY);
763
    }
764
    else
765
    {
766
6510
      vec.push_back(c);
767
    }
768
  }
769
2316
  if (changed)
770
  {
771
    // flatten
772
    // this handles nested re.++ and elimination or str.to.re(""), e.g.:
773
    // re.++( re.++( R1, R2 ), str.to.re(""), R3 ) ---> re.++( R1, R2, R3 )
774
267
    if (vec.empty())
775
    {
776
2
      Assert(!emptyRe.isNull());
777
2
      retNode = emptyRe;
778
    }
779
    else
780
    {
781
265
      retNode = vec.size() == 1 ? vec[0] : nm->mkNode(REGEXP_CONCAT, vec);
782
    }
783
267
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_FLATTEN);
784
  }
785
4098
  Trace("strings-rewrite-debug")
786
2049
      << "Strings::rewriteConcatRegExp start " << node << std::endl;
787
4098
  std::vector<Node> cvec;
788
  // the current accumulation of constant strings
789
4098
  std::vector<Node> preReStr;
790
  // whether the last component was (_)*
791
2049
  bool lastAllStar = false;
792
4098
  String emptyStr = String("");
793
  // this loop checks to see if components can be combined or dropped
794
10256
  for (unsigned i = 0, size = vec.size(); i <= size; i++)
795
  {
796
16414
    Node curr;
797
8207
    if (i < size)
798
    {
799
6158
      curr = vec[i];
800
6158
      Assert(curr.getKind() != REGEXP_CONCAT);
801
    }
802
    // update preReStr
803
8207
    if (!curr.isNull() && curr.getKind() == STRING_TO_REGEXP)
804
    {
805
2567
      lastAllStar = false;
806
2567
      preReStr.push_back(curr[0]);
807
2567
      curr = Node::null();
808
    }
809
5640
    else if (!preReStr.empty())
810
    {
811
2113
      Assert(!lastAllStar);
812
      // this groups consecutive strings a++b ---> ab
813
4226
      Node acc = nm->mkNode(STRING_TO_REGEXP, utils::mkConcat(preReStr, stype));
814
2113
      cvec.push_back(acc);
815
2113
      preReStr.clear();
816
    }
817
3527
    else if (!curr.isNull() && lastAllStar)
818
    {
819
      // if empty, drop it
820
      // e.g. this ensures we rewrite (_)* ++ (a)* ---> (_)*
821
351
      if (RegExpEntail::isConstRegExp(curr)
822
351
          && RegExpEntail::testConstStringInRegExp(emptyStr, 0, curr))
823
      {
824
49
        curr = Node::null();
825
      }
826
    }
827
8207
    if (!curr.isNull())
828
    {
829
3542
      lastAllStar = false;
830
3542
      if (curr.getKind() == REGEXP_STAR)
831
      {
832
        // we can group stars (a)* ++ (a)* ---> (a)*
833
1798
        if (!cvec.empty() && cvec.back() == curr)
834
        {
835
8
          curr = Node::null();
836
        }
837
1790
        else if (curr[0].getKind() == REGEXP_SIGMA)
838
        {
839
820
          Assert(!lastAllStar);
840
820
          lastAllStar = true;
841
          // go back and remove empty ones from back of cvec
842
          // e.g. this ensures we rewrite (a)* ++ (_)* ---> (_)*
843
2351
          while (!cvec.empty() && RegExpEntail::isConstRegExp(cvec.back())
844
3163
                 && RegExpEntail::testConstStringInRegExp(
845
679
                     emptyStr, 0, cvec.back()))
846
          {
847
8
            cvec.pop_back();
848
          }
849
        }
850
      }
851
    }
852
8207
    if (!curr.isNull())
853
    {
854
3534
      cvec.push_back(curr);
855
    }
856
  }
857
2049
  Assert(!cvec.empty());
858
2049
  retNode = utils::mkConcat(cvec, rtype);
859
2049
  if (retNode != node)
860
  {
861
    // handles all cases where consecutive re constants are combined or
862
    // dropped as described in the loop above.
863
201
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT);
864
  }
865
866
  // flipping adjacent star arguments
867
1848
  changed = false;
868
5174
  for (size_t i = 0, size = cvec.size() - 1; i < size; i++)
869
  {
870
3326
    if (cvec[i].getKind() == REGEXP_STAR && cvec[i][0] == cvec[i + 1])
871
    {
872
      // by convention, flip the order (a*)++a ---> a++(a*)
873
64
      std::swap(cvec[i], cvec[i + 1]);
874
64
      changed = true;
875
    }
876
  }
877
1848
  if (changed)
878
  {
879
48
    retNode = utils::mkConcat(cvec, rtype);
880
48
    return returnRewrite(node, retNode, Rewrite::RE_CONCAT_OPT);
881
  }
882
1800
  return node;
883
}
884
885
884
Node SequencesRewriter::rewriteStarRegExp(TNode node)
886
{
887
884
  Assert(node.getKind() == REGEXP_STAR);
888
884
  NodeManager* nm = NodeManager::currentNM();
889
1768
  Node retNode = node;
890
884
  if (node[0].getKind() == REGEXP_STAR)
891
  {
892
    // ((R)*)* ---> R*
893
4
    return returnRewrite(node, node[0], Rewrite::RE_STAR_NESTED_STAR);
894
  }
895
2982
  else if (node[0].getKind() == STRING_TO_REGEXP && node[0][0].isConst()
896
2956
           && Word::isEmpty(node[0][0]))
897
  {
898
    // ("")* ---> ""
899
14
    return returnRewrite(node, node[0], Rewrite::RE_STAR_EMPTY_STRING);
900
  }
901
866
  else if (node[0].getKind() == REGEXP_EMPTY)
902
  {
903
    // (empty)* ---> ""
904
8
    retNode = nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")));
905
8
    return returnRewrite(node, retNode, Rewrite::RE_STAR_EMPTY);
906
  }
907
858
  else if (node[0].getKind() == REGEXP_UNION)
908
  {
909
    // simplification of unions under star
910
168
    if (RegExpEntail::hasEpsilonNode(node[0]))
911
    {
912
22
      bool changed = false;
913
22
      std::vector<Node> node_vec;
914
66
      for (const Node& nc : node[0])
915
      {
916
130
        if (nc.getKind() == STRING_TO_REGEXP && nc[0].isConst()
917
130
            && Word::isEmpty(nc[0]))
918
        {
919
          // can be removed
920
22
          changed = true;
921
        }
922
        else
923
        {
924
22
          node_vec.push_back(nc);
925
        }
926
      }
927
22
      if (changed)
928
      {
929
22
        retNode = node_vec.size() == 1 ? node_vec[0]
930
                                       : nm->mkNode(REGEXP_UNION, node_vec);
931
22
        retNode = nm->mkNode(REGEXP_STAR, retNode);
932
        // simplification of union beneath star based on loop above
933
        // for example, ( "" | "a" )* ---> ("a")*
934
22
        return returnRewrite(node, retNode, Rewrite::RE_STAR_UNION);
935
      }
936
    }
937
  }
938
836
  return node;
939
}
940
941
731
Node SequencesRewriter::rewriteAndOrRegExp(TNode node)
942
{
943
731
  Kind nk = node.getKind();
944
731
  Assert(nk == REGEXP_UNION || nk == REGEXP_INTER);
945
1462
  Trace("strings-rewrite-debug")
946
731
      << "Strings::rewriteAndOrRegExp start " << node << std::endl;
947
1462
  std::vector<Node> node_vec;
948
1462
  std::vector<Node> polRegExp[2];
949
2331
  for (const Node& ni : node)
950
  {
951
1602
    if (ni.getKind() == nk)
952
    {
953
33
      for (const Node& nic : ni)
954
      {
955
22
        if (std::find(node_vec.begin(), node_vec.end(), nic) == node_vec.end())
956
        {
957
22
          node_vec.push_back(nic);
958
        }
959
      }
960
    }
961
1591
    else if (ni.getKind() == REGEXP_EMPTY)
962
    {
963
39
      if (nk == REGEXP_INTER)
964
      {
965
2
        return returnRewrite(node, ni, Rewrite::RE_AND_EMPTY);
966
      }
967
      // otherwise, can ignore
968
    }
969
1552
    else if (ni.getKind() == REGEXP_STAR && ni[0].getKind() == REGEXP_SIGMA)
970
    {
971
8
      if (nk == REGEXP_UNION)
972
      {
973
        return returnRewrite(node, ni, Rewrite::RE_OR_ALL);
974
      }
975
      // otherwise, can ignore
976
    }
977
1544
    else if (std::find(node_vec.begin(), node_vec.end(), ni) == node_vec.end())
978
    {
979
1525
      node_vec.push_back(ni);
980
1525
      uint32_t pindex = ni.getKind() == REGEXP_COMPLEMENT ? 1 : 0;
981
3050
      Node nia = pindex == 1 ? ni[0] : ni;
982
1525
      polRegExp[pindex].push_back(nia);
983
    }
984
  }
985
729
  NodeManager* nm = NodeManager::currentNM();
986
1458
  std::vector<Node> emptyVec;
987
  // use inclusion tests
988
847
  for (const Node& negMem : polRegExp[1])
989
  {
990
222
    for (const Node& posMem : polRegExp[0])
991
    {
992
192
      Node m1 = nk == REGEXP_INTER ? negMem : posMem;
993
192
      Node m2 = nk == REGEXP_INTER ? posMem : negMem;
994
      // inclusion test for conflicting case m1 contains m2
995
      // (re.inter (re.comp R1) R2) --> re.none where R1 includes R2
996
      // (re.union R1 (re.comp R2)) --> (re.* re.allchar) where R1 includes R2
997
104
      if (RegExpEntail::regExpIncludes(m1, m2))
998
      {
999
32
        Node retNode;
1000
16
        if (nk == REGEXP_INTER)
1001
        {
1002
16
          retNode = nm->mkNode(kind::REGEXP_EMPTY, emptyVec);
1003
        }
1004
        else
1005
        {
1006
          retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
1007
        }
1008
16
        return returnRewrite(node, retNode, Rewrite::RE_ANDOR_INC_CONFLICT);
1009
      }
1010
    }
1011
  }
1012
1426
  Node retNode;
1013
713
  if (node_vec.empty())
1014
  {
1015
    if (nk == REGEXP_INTER)
1016
    {
1017
      retNode = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
1018
    }
1019
    else
1020
    {
1021
      retNode = nm->mkNode(kind::REGEXP_EMPTY, emptyVec);
1022
    }
1023
  }
1024
  else
1025
  {
1026
713
    retNode = node_vec.size() == 1 ? node_vec[0] : nm->mkNode(nk, node_vec);
1027
  }
1028
713
  if (retNode != node)
1029
  {
1030
    // flattening and removing children, based on loop above
1031
65
    return returnRewrite(node, retNode, Rewrite::RE_ANDOR_FLATTEN);
1032
  }
1033
648
  return node;
1034
}
1035
1036
24
Node SequencesRewriter::rewriteLoopRegExp(TNode node)
1037
{
1038
24
  Assert(node.getKind() == REGEXP_LOOP);
1039
48
  Node retNode = node;
1040
48
  Node r = node[0];
1041
24
  if (r.getKind() == REGEXP_STAR)
1042
  {
1043
    return returnRewrite(node, r, Rewrite::RE_LOOP_STAR);
1044
  }
1045
24
  NodeManager* nm = NodeManager::currentNM();
1046
48
  cvc5::Rational rMaxInt(String::maxSize());
1047
24
  uint32_t l = utils::getLoopMinOccurrences(node);
1048
48
  std::vector<Node> vec_nodes;
1049
110
  for (unsigned i = 0; i < l; i++)
1050
  {
1051
86
    vec_nodes.push_back(r);
1052
  }
1053
  Node n =
1054
24
      vec_nodes.size() == 0
1055
24
          ? nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String("")))
1056
48
          : vec_nodes.size() == 1 ? r : nm->mkNode(REGEXP_CONCAT, vec_nodes);
1057
24
  uint32_t u = utils::getLoopMaxOccurrences(node);
1058
24
  if (u < l)
1059
  {
1060
8
    std::vector<Node> nvec;
1061
4
    retNode = nm->mkNode(REGEXP_EMPTY, nvec);
1062
  }
1063
20
  else if (u == l)
1064
  {
1065
10
    retNode = n;
1066
  }
1067
  else
1068
  {
1069
20
    std::vector<Node> vec2;
1070
10
    vec2.push_back(n);
1071
20
    TypeNode rtype = nm->regExpType();
1072
52
    for (uint32_t j = l; j < u; j++)
1073
    {
1074
42
      vec_nodes.push_back(r);
1075
42
      n = utils::mkConcat(vec_nodes, rtype);
1076
42
      vec2.push_back(n);
1077
    }
1078
10
    retNode = nm->mkNode(REGEXP_UNION, vec2);
1079
  }
1080
48
  Trace("strings-lp") << "Strings::lp " << node << " => " << retNode
1081
24
                      << std::endl;
1082
24
  if (retNode != node)
1083
  {
1084
24
    return returnRewrite(node, retNode, Rewrite::RE_LOOP);
1085
  }
1086
  return node;
1087
}
1088
1089
4
Node SequencesRewriter::rewriteRepeatRegExp(TNode node)
1090
{
1091
4
  Assert(node.getKind() == REGEXP_REPEAT);
1092
4
  NodeManager* nm = NodeManager::currentNM();
1093
  // ((_ re.^ n) R) --> ((_ re.loop n n) R)
1094
4
  unsigned r = utils::getRepeatAmount(node);
1095
8
  Node lop = nm->mkConst(RegExpLoop(r, r));
1096
8
  Node retNode = nm->mkNode(REGEXP_LOOP, lop, node[0]);
1097
8
  return returnRewrite(node, retNode, Rewrite::RE_REPEAT_ELIM);
1098
}
1099
1100
25
Node SequencesRewriter::rewriteOptionRegExp(TNode node)
1101
{
1102
25
  Assert(node.getKind() == REGEXP_OPT);
1103
25
  NodeManager* nm = NodeManager::currentNM();
1104
  Node retNode =
1105
      nm->mkNode(REGEXP_UNION,
1106
50
                 nm->mkNode(STRING_TO_REGEXP, nm->mkConst(String(""))),
1107
100
                 node[0]);
1108
50
  return returnRewrite(node, retNode, Rewrite::RE_OPT_ELIM);
1109
}
1110
1111
35
Node SequencesRewriter::rewritePlusRegExp(TNode node)
1112
{
1113
35
  Assert(node.getKind() == REGEXP_PLUS);
1114
35
  NodeManager* nm = NodeManager::currentNM();
1115
  Node retNode =
1116
70
      nm->mkNode(REGEXP_CONCAT, node[0], nm->mkNode(REGEXP_STAR, node[0]));
1117
70
  return returnRewrite(node, retNode, Rewrite::RE_PLUS_ELIM);
1118
}
1119
1120
32
Node SequencesRewriter::rewriteDifferenceRegExp(TNode node)
1121
{
1122
32
  Assert(node.getKind() == REGEXP_DIFF);
1123
32
  NodeManager* nm = NodeManager::currentNM();
1124
  Node retNode =
1125
64
      nm->mkNode(REGEXP_INTER, node[0], nm->mkNode(REGEXP_COMPLEMENT, node[1]));
1126
64
  return returnRewrite(node, retNode, Rewrite::RE_DIFF_ELIM);
1127
}
1128
1129
348
Node SequencesRewriter::rewriteRangeRegExp(TNode node)
1130
{
1131
348
  Assert(node.getKind() == REGEXP_RANGE);
1132
  unsigned ch[2];
1133
1044
  for (size_t i = 0; i < 2; ++i)
1134
  {
1135
696
    if (!node[i].isConst() || node[i].getConst<String>().size() != 1)
1136
    {
1137
      // not applied to characters, it is not handled
1138
      return node;
1139
    }
1140
696
    ch[i] = node[i].getConst<String>().front();
1141
  }
1142
1143
348
  NodeManager* nm = NodeManager::currentNM();
1144
348
  if (node[0] == node[1])
1145
  {
1146
    Node retNode = nm->mkNode(STRING_TO_REGEXP, node[0]);
1147
    // re.range( "A", "A" ) ---> str.to_re( "A" )
1148
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_SINGLE);
1149
  }
1150
1151
348
  if (ch[0] > ch[1])
1152
  {
1153
    // re.range( "B", "A" ) ---> re.none
1154
4
    std::vector<Node> emptyVec;
1155
4
    Node retNode = nm->mkNode(REGEXP_EMPTY, emptyVec);
1156
2
    return returnRewrite(node, retNode, Rewrite::RE_RANGE_EMPTY);
1157
  }
1158
346
  return node;
1159
}
1160
1161
6807
Node SequencesRewriter::rewriteMembership(TNode node)
1162
{
1163
6807
  NodeManager* nm = NodeManager::currentNM();
1164
13614
  Node x = node[0];
1165
13614
  Node r = node[1];
1166
1167
13614
  TypeNode stype = x.getType();
1168
13614
  TypeNode rtype = r.getType();
1169
1170
6807
  if(r.getKind() == kind::REGEXP_EMPTY)
1171
  {
1172
132
    Node retNode = NodeManager::currentNM()->mkConst(false);
1173
66
    return returnRewrite(node, retNode, Rewrite::RE_IN_EMPTY);
1174
  }
1175
6741
  else if (x.isConst() && RegExpEntail::isConstRegExp(r))
1176
  {
1177
    // test whether x in node[1]
1178
1984
    cvc5::String s = x.getConst<String>();
1179
992
    bool test = RegExpEntail::testConstStringInRegExp(s, 0, r);
1180
1984
    Node retNode = NodeManager::currentNM()->mkConst(test);
1181
992
    return returnRewrite(node, retNode, Rewrite::RE_IN_EVAL);
1182
  }
1183
5749
  else if (r.getKind() == kind::REGEXP_SIGMA)
1184
  {
1185
96
    Node one = nm->mkConst(Rational(1));
1186
96
    Node retNode = one.eqNode(nm->mkNode(STRING_LENGTH, x));
1187
48
    return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA);
1188
  }
1189
5701
  else if (r.getKind() == kind::REGEXP_STAR)
1190
  {
1191
2045
    if (x.isConst())
1192
    {
1193
27
      size_t xlen = Word::getLength(x);
1194
27
      if (xlen == 0)
1195
      {
1196
18
        Node retNode = nm->mkConst(true);
1197
        // e.g. (str.in.re "" (re.* (str.to.re x))) ----> true
1198
9
        return returnRewrite(node, retNode, Rewrite::RE_EMPTY_IN_STR_STAR);
1199
      }
1200
18
      else if (xlen == 1)
1201
      {
1202
18
        if (r[0].getKind() == STRING_TO_REGEXP)
1203
        {
1204
4
          Node retNode = r[0][0].eqNode(x);
1205
          // e.g. (str.in.re "A" (re.* (str.to.re x))) ----> "A" = x
1206
2
          return returnRewrite(node, retNode, Rewrite::RE_CHAR_IN_STR_STAR);
1207
        }
1208
      }
1209
    }
1210
2018
    else if (x.getKind() == STRING_CONCAT)
1211
    {
1212
      // (str.in.re (str.++ x1 ... xn) (re.* R)) -->
1213
      //   (str.in.re x1 (re.* R)) AND ... AND (str.in.re xn (re.* R))
1214
      //     if the length of all strings in R is one.
1215
373
      Node flr = RegExpEntail::getFixedLengthForRegexp(r[0]);
1216
247
      if (!flr.isNull())
1217
      {
1218
121
        Node one = nm->mkConst(Rational(1));
1219
121
        if (flr == one)
1220
        {
1221
242
          NodeBuilder nb(AND);
1222
438
          for (const Node& xc : x)
1223
          {
1224
317
            nb << nm->mkNode(STRING_IN_REGEXP, xc, r);
1225
          }
1226
          return returnRewrite(
1227
121
              node, nb.constructNode(), Rewrite::RE_IN_DIST_CHAR_STAR);
1228
        }
1229
      }
1230
    }
1231
1913
    if (r[0].getKind() == kind::REGEXP_SIGMA)
1232
    {
1233
190
      Node retNode = NodeManager::currentNM()->mkConst(true);
1234
95
      return returnRewrite(node, retNode, Rewrite::RE_IN_SIGMA_STAR);
1235
    }
1236
  }
1237
3656
  else if (r.getKind() == kind::REGEXP_CONCAT)
1238
  {
1239
2614
    bool allSigma = true;
1240
2614
    bool allSigmaStrict = true;
1241
2614
    unsigned allSigmaMinSize = 0;
1242
5191
    Node constStr;
1243
2614
    size_t constIdx = 0;
1244
2614
    size_t nchildren = r.getNumChildren();
1245
5866
    for (size_t i = 0; i < nchildren; i++)
1246
    {
1247
8636
      Node rc = r[i];
1248
5384
      Assert(rc.getKind() != kind::REGEXP_EMPTY);
1249
5384
      if (rc.getKind() == kind::REGEXP_SIGMA)
1250
      {
1251
813
        allSigmaMinSize++;
1252
      }
1253
4571
      else if (rc.getKind() == REGEXP_STAR && rc[0].getKind() == REGEXP_SIGMA)
1254
      {
1255
1050
        allSigmaStrict = false;
1256
      }
1257
3521
      else if (rc.getKind() == STRING_TO_REGEXP)
1258
      {
1259
1834
        if (constStr.isNull())
1260
        {
1261
1389
          constStr = rc[0];
1262
1389
          constIdx = i;
1263
        }
1264
        else
1265
        {
1266
445
          allSigma = false;
1267
445
          break;
1268
        }
1269
      }
1270
      else
1271
      {
1272
1687
        allSigma = false;
1273
1687
        break;
1274
      }
1275
    }
1276
2614
    if (allSigma)
1277
    {
1278
482
      if (constStr.isNull())
1279
      {
1280
        // x in re.++(_*, _, _) ---> str.len(x) >= 2
1281
40
        Node num = nm->mkConst(Rational(allSigmaMinSize));
1282
40
        Node lenx = nm->mkNode(STRING_LENGTH, x);
1283
40
        Node retNode = nm->mkNode(allSigmaStrict ? EQUAL : GEQ, lenx, num);
1284
20
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_PURE_ALLCHAR);
1285
      }
1286
462
      else if (allSigmaMinSize == 0 && nchildren >= 3 && constIdx != 0
1287
17
               && constIdx != nchildren - 1)
1288
      {
1289
        // x in re.++(_*, "abc", _*) ---> str.contains(x, "abc")
1290
34
        Node retNode = nm->mkNode(STRING_STRCTN, x, constStr);
1291
17
        return returnRewrite(node, retNode, Rewrite::RE_CONCAT_TO_CONTAINS);
1292
      }
1293
    }
1294
  }
1295
2084
  else if (r.getKind() == kind::REGEXP_INTER
1296
1042
           || r.getKind() == kind::REGEXP_UNION)
1297
  {
1298
450
    std::vector<Node> mvec;
1299
725
    for (unsigned i = 0; i < r.getNumChildren(); i++)
1300
    {
1301
500
      mvec.push_back(
1302
1000
          NodeManager::currentNM()->mkNode(kind::STRING_IN_REGEXP, x, r[i]));
1303
    }
1304
    Node retNode = NodeManager::currentNM()->mkNode(
1305
450
        r.getKind() == kind::REGEXP_INTER ? kind::AND : kind::OR, mvec);
1306
225
    return returnRewrite(node, retNode, Rewrite::RE_IN_ANDOR);
1307
  }
1308
817
  else if (r.getKind() == kind::STRING_TO_REGEXP)
1309
  {
1310
840
    Node retNode = x.eqNode(r[0]);
1311
420
    return returnRewrite(node, retNode, Rewrite::RE_IN_CSTRING);
1312
  }
1313
397
  else if (r.getKind() == REGEXP_RANGE)
1314
  {
1315
    // x in re.range( char_i, char_j ) ---> i <= str.code(x) <= j
1316
    // we do not do this if the arguments are not constant
1317
319
    if (RegExpEntail::isConstRegExp(r))
1318
    {
1319
638
      Node xcode = nm->mkNode(STRING_TO_CODE, x);
1320
      Node retNode =
1321
          nm->mkNode(AND,
1322
638
                     nm->mkNode(LEQ, nm->mkNode(STRING_TO_CODE, r[0]), xcode),
1323
1276
                     nm->mkNode(LEQ, xcode, nm->mkNode(STRING_TO_CODE, r[1])));
1324
319
      return returnRewrite(node, retNode, Rewrite::RE_IN_RANGE);
1325
    }
1326
  }
1327
78
  else if (r.getKind() == REGEXP_COMPLEMENT)
1328
  {
1329
96
    Node retNode = nm->mkNode(STRING_IN_REGEXP, x, r[0]).negate();
1330
48
    return returnRewrite(node, retNode, Rewrite::RE_IN_COMPLEMENT);
1331
  }
1332
1333
  // do simple consumes
1334
8850
  Node retNode = node;
1335
4425
  if (r.getKind() == kind::REGEXP_STAR)
1336
  {
1337
5444
    for (unsigned dir = 0; dir <= 1; dir++)
1338
    {
1339
7262
      std::vector<Node> mchildren;
1340
3636
      utils::getConcat(x, mchildren);
1341
3636
      bool success = true;
1342
10928
      while (success)
1343
      {
1344
3646
        success = false;
1345
7292
        std::vector<Node> children;
1346
3646
        utils::getConcat(r[0], children);
1347
7292
        Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children, dir);
1348
3646
        if (!scn.isNull())
1349
        {
1350
          Trace("regexp-ext-rewrite")
1351
              << "Regexp star : const conflict : " << node << std::endl;
1352
          return returnRewrite(node, scn, Rewrite::RE_CONSUME_S_CCONF);
1353
        }
1354
3646
        else if (children.empty())
1355
        {
1356
          // fully consumed one copy of the STAR
1357
18
          if (mchildren.empty())
1358
          {
1359
            Trace("regexp-ext-rewrite")
1360
                << "Regexp star : full consume : " << node << std::endl;
1361
            Node ret = NodeManager::currentNM()->mkConst(true);
1362
            return returnRewrite(node, ret, Rewrite::RE_CONSUME_S_FULL);
1363
          }
1364
          else
1365
          {
1366
36
            Node prev = retNode;
1367
54
            retNode = nm->mkNode(
1368
36
                STRING_IN_REGEXP, utils::mkConcat(mchildren, stype), r);
1369
            // Iterate again if the node changed. It may not have changed if
1370
            // nothing was consumed from mchildren (e.g. if the body of the
1371
            // re.* accepts the empty string.
1372
18
            success = (retNode != prev);
1373
          }
1374
        }
1375
      }
1376
3636
      if (retNode != node)
1377
      {
1378
20
        Trace("regexp-ext-rewrite") << "Regexp star : rewrite " << node
1379
10
                                    << " -> " << retNode << std::endl;
1380
10
        return returnRewrite(node, retNode, Rewrite::RE_CONSUME_S);
1381
      }
1382
    }
1383
  }
1384
  else
1385
  {
1386
4925
    std::vector<Node> children;
1387
2607
    utils::getConcat(r, children);
1388
4925
    std::vector<Node> mchildren;
1389
2607
    utils::getConcat(x, mchildren);
1390
2607
    unsigned prevSize = children.size() + mchildren.size();
1391
4925
    Node scn = RegExpEntail::simpleRegexpConsume(mchildren, children);
1392
2607
    if (!scn.isNull())
1393
    {
1394
52
      Trace("regexp-ext-rewrite")
1395
26
          << "Regexp : const conflict : " << node << std::endl;
1396
26
      return returnRewrite(node, scn, Rewrite::RE_CONSUME_CCONF);
1397
    }
1398
2581
    else if ((children.size() + mchildren.size()) != prevSize)
1399
    {
1400
      // Given a membership (str.++ x1 ... xn) in (re.++ r1 ... rm),
1401
      // above, we strip components to construct an equivalent membership:
1402
      // (str.++ xi .. xj) in (re.++ rk ... rl).
1403
526
      Node xn = utils::mkConcat(mchildren, stype);
1404
526
      Node emptyStr = Word::mkEmptyWord(stype);
1405
263
      if (children.empty())
1406
      {
1407
        // If we stripped all components on the right, then the left is
1408
        // equal to the empty string.
1409
        // e.g. (str.++ "a" x) in (re.++ (str.to.re "a")) ---> (= x "")
1410
4
        retNode = xn.eqNode(emptyStr);
1411
      }
1412
      else
1413
      {
1414
        // otherwise, construct the updated regular expression
1415
259
        retNode =
1416
518
            nm->mkNode(STRING_IN_REGEXP, xn, utils::mkConcat(children, rtype));
1417
      }
1418
526
      Trace("regexp-ext-rewrite")
1419
263
          << "Regexp : rewrite : " << node << " -> " << retNode << std::endl;
1420
263
      return returnRewrite(node, retNode, Rewrite::RE_SIMPLE_CONSUME);
1421
    }
1422
  }
1423
4126
  return node;
1424
}
1425
1426
381511
RewriteResponse SequencesRewriter::postRewrite(TNode node)
1427
{
1428
763022
  Trace("sequences-postrewrite")
1429
381511
      << "Strings::SequencesRewriter::postRewrite start " << node << std::endl;
1430
763022
  Node retNode = node;
1431
381511
  Kind nk = node.getKind();
1432
381511
  if (nk == kind::STRING_CONCAT)
1433
  {
1434
38295
    retNode = rewriteConcat(node);
1435
  }
1436
343216
  else if (nk == kind::EQUAL)
1437
  {
1438
89083
    retNode = rewriteEquality(node);
1439
  }
1440
254133
  else if (nk == kind::STRING_LENGTH)
1441
  {
1442
153191
    retNode = rewriteLength(node);
1443
  }
1444
100942
  else if (nk == kind::STRING_CHARAT)
1445
  {
1446
107
    retNode = rewriteCharAt(node);
1447
  }
1448
100835
  else if (nk == kind::STRING_SUBSTR)
1449
  {
1450
33018
    retNode = rewriteSubstr(node);
1451
  }
1452
67817
  else if (nk == kind::STRING_UPDATE)
1453
  {
1454
175
    retNode = rewriteUpdate(node);
1455
  }
1456
67642
  else if (nk == kind::STRING_STRCTN)
1457
  {
1458
25255
    retNode = rewriteContains(node);
1459
  }
1460
42387
  else if (nk == kind::STRING_STRIDOF)
1461
  {
1462
4498
    retNode = rewriteIndexof(node);
1463
  }
1464
37889
  else if (nk == kind::STRING_STRREPL)
1465
  {
1466
3104
    retNode = rewriteReplace(node);
1467
  }
1468
34785
  else if (nk == kind::STRING_STRREPLALL)
1469
  {
1470
145
    retNode = rewriteReplaceAll(node);
1471
  }
1472
34640
  else if (nk == kind::STRING_REPLACE_RE)
1473
  {
1474
119
    retNode = rewriteReplaceRe(node);
1475
  }
1476
34521
  else if (nk == kind::STRING_REPLACE_RE_ALL)
1477
  {
1478
28
    retNode = rewriteReplaceReAll(node);
1479
  }
1480
34493
  else if (nk == STRING_REV)
1481
  {
1482
237
    retNode = rewriteStrReverse(node);
1483
  }
1484
34256
  else if (nk == kind::STRING_PREFIX || nk == kind::STRING_SUFFIX)
1485
  {
1486
143
    retNode = rewritePrefixSuffix(node);
1487
  }
1488
34113
  else if (nk == kind::STRING_IN_REGEXP)
1489
  {
1490
6807
    retNode = rewriteMembership(node);
1491
  }
1492
27306
  else if (nk == REGEXP_CONCAT)
1493
  {
1494
2350
    retNode = rewriteConcatRegExp(node);
1495
  }
1496
24956
  else if (nk == REGEXP_UNION || nk == REGEXP_INTER)
1497
  {
1498
731
    retNode = rewriteAndOrRegExp(node);
1499
  }
1500
24225
  else if (nk == REGEXP_DIFF)
1501
  {
1502
32
    retNode = rewriteDifferenceRegExp(node);
1503
  }
1504
24193
  else if (nk == REGEXP_STAR)
1505
  {
1506
884
    retNode = rewriteStarRegExp(node);
1507
  }
1508
23309
  else if (nk == REGEXP_PLUS)
1509
  {
1510
35
    retNode = rewritePlusRegExp(node);
1511
  }
1512
23274
  else if (nk == REGEXP_OPT)
1513
  {
1514
25
    retNode = rewriteOptionRegExp(node);
1515
  }
1516
23249
  else if (nk == REGEXP_RANGE)
1517
  {
1518
348
    retNode = rewriteRangeRegExp(node);
1519
  }
1520
22901
  else if (nk == REGEXP_LOOP)
1521
  {
1522
24
    retNode = rewriteLoopRegExp(node);
1523
  }
1524
22877
  else if (nk == REGEXP_REPEAT)
1525
  {
1526
4
    retNode = rewriteRepeatRegExp(node);
1527
  }
1528
22873
  else if (nk == SEQ_UNIT)
1529
  {
1530
307
    retNode = rewriteSeqUnit(node);
1531
  }
1532
22566
  else if (nk == SEQ_NTH || nk == SEQ_NTH_TOTAL)
1533
  {
1534
330
    retNode = rewriteSeqNth(node);
1535
  }
1536
1537
763022
  Trace("sequences-postrewrite")
1538
381511
      << "Strings::SequencesRewriter::postRewrite returning " << retNode
1539
381511
      << std::endl;
1540
381511
  if (node != retNode)
1541
  {
1542
145246
    Trace("strings-rewrite-debug") << "Strings::SequencesRewriter::postRewrite "
1543
72623
                                   << node << " to " << retNode << std::endl;
1544
72623
    return RewriteResponse(REWRITE_AGAIN_FULL, retNode);
1545
  }
1546
308888
  Trace("strings-rewrite-nf") << "No rewrites for : " << node << std::endl;
1547
308888
  return RewriteResponse(REWRITE_DONE, retNode);
1548
}
1549
1550
236970
RewriteResponse SequencesRewriter::preRewrite(TNode node)
1551
{
1552
236970
  return RewriteResponse(REWRITE_DONE, node);
1553
}
1554
1555
687
TrustNode SequencesRewriter::expandDefinition(Node node)
1556
{
1557
1374
  Trace("strings-exp-def") << "SequencesRewriter::expandDefinition : " << node
1558
687
                           << std::endl;
1559
1560
687
  if (node.getKind() == kind::SEQ_NTH)
1561
  {
1562
2
    NodeManager* nm = NodeManager::currentNM();
1563
4
    Node s = node[0];
1564
4
    Node n = node[1];
1565
    // seq.nth(s, n) --> ite(0 <= n < len(s), seq.nth_total(s,n), Uf(s, n))
1566
    Node cond = nm->mkNode(AND,
1567
4
                           nm->mkNode(LEQ, nm->mkConst(Rational(0)), n),
1568
8
                           nm->mkNode(LT, n, nm->mkNode(STRING_LENGTH, s)));
1569
4
    Node ss = nm->mkNode(SEQ_NTH_TOTAL, s, n);
1570
4
    Node uf = SkolemCache::mkSkolemSeqNth(s.getType(), "Uf");
1571
4
    Node u = nm->mkNode(APPLY_UF, uf, s, n);
1572
4
    Node ret = nm->mkNode(ITE, cond, ss, u);
1573
2
    Trace("strings-exp-def") << "...return " << ret << std::endl;
1574
2
    return TrustNode::mkTrustRewrite(node, ret, nullptr);
1575
  }
1576
685
  return TrustNode::null();
1577
}
1578
1579
330
Node SequencesRewriter::rewriteSeqNth(Node node)
1580
{
1581
330
  Assert(node.getKind() == SEQ_NTH || node.getKind() == SEQ_NTH_TOTAL);
1582
660
  Node s = node[0];
1583
660
  Node i = node[1];
1584
330
  if (s.isConst() && i.isConst())
1585
  {
1586
26
    size_t len = Word::getLength(s);
1587
26
    size_t pos = i.getConst<Rational>().getNumerator().toUnsignedInt();
1588
26
    if (pos < len)
1589
    {
1590
16
      std::vector<Node> elements = s.getConst<Sequence>().getVec();
1591
8
      const Node& ret = elements[pos];
1592
8
      return returnRewrite(node, ret, Rewrite::SEQ_NTH_EVAL);
1593
    }
1594
18
    else if (node.getKind() == SEQ_NTH_TOTAL)
1595
    {
1596
      // return arbitrary term
1597
4
      Node ret = s.getType().getSequenceElementType().mkGroundValue();
1598
2
      return returnRewrite(node, ret, Rewrite::SEQ_NTH_TOTAL_OOB);
1599
    }
1600
    else
1601
    {
1602
16
      return node;
1603
    }
1604
  }
1605
  else
1606
  {
1607
304
    return node;
1608
  }
1609
}
1610
1611
107
Node SequencesRewriter::rewriteCharAt(Node node)
1612
{
1613
107
  Assert(node.getKind() == STRING_CHARAT);
1614
107
  NodeManager* nm = NodeManager::currentNM();
1615
214
  Node one = nm->mkConst(Rational(1));
1616
214
  Node retNode = nm->mkNode(STRING_SUBSTR, node[0], node[1], one);
1617
214
  return returnRewrite(node, retNode, Rewrite::CHARAT_ELIM);
1618
}
1619
1620
33030
Node SequencesRewriter::rewriteSubstr(Node node)
1621
{
1622
33030
  Assert(node.getKind() == kind::STRING_SUBSTR);
1623
1624
33030
  NodeManager* nm = NodeManager::currentNM();
1625
33030
  if (node[0].isConst())
1626
  {
1627
10030
    if (Word::isEmpty(node[0]))
1628
    {
1629
1586
      Node ret = node[0];
1630
793
      return returnRewrite(node, ret, Rewrite::SS_EMPTYSTR);
1631
    }
1632
    // rewriting for constant arguments
1633
9237
    if (node[1].isConst() && node[2].isConst())
1634
    {
1635
9572
      Node s = node[0];
1636
9572
      cvc5::Rational rMaxInt(String::maxSize());
1637
      uint32_t start;
1638
4786
      if (node[1].getConst<Rational>() > rMaxInt)
1639
      {
1640
        // start beyond the maximum size of strings
1641
        // thus, it must be beyond the end point of this string
1642
8
        Node ret = Word::mkEmptyWord(node.getType());
1643
4
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_MAX_OOB);
1644
      }
1645
4782
      else if (node[1].getConst<Rational>().sgn() < 0)
1646
      {
1647
        // start before the beginning of the string
1648
970
        Node ret = Word::mkEmptyWord(node.getType());
1649
485
        return returnRewrite(node, ret, Rewrite::SS_CONST_START_NEG);
1650
      }
1651
      else
1652
      {
1653
4297
        start = node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1654
4297
        if (start >= Word::getLength(node[0]))
1655
        {
1656
          // start beyond the end of the string
1657
724
          Node ret = Word::mkEmptyWord(node.getType());
1658
362
          return returnRewrite(node, ret, Rewrite::SS_CONST_START_OOB);
1659
        }
1660
      }
1661
3935
      if (node[2].getConst<Rational>() > rMaxInt)
1662
      {
1663
        // take up to the end of the string
1664
1
        size_t lenS = Word::getLength(s);
1665
2
        Node ret = Word::suffix(s, lenS - start);
1666
1
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_MAX_OOB);
1667
      }
1668
3934
      else if (node[2].getConst<Rational>().sgn() <= 0)
1669
      {
1670
590
        Node ret = Word::mkEmptyWord(node.getType());
1671
295
        return returnRewrite(node, ret, Rewrite::SS_CONST_LEN_NON_POS);
1672
      }
1673
      else
1674
      {
1675
        uint32_t len =
1676
3639
            node[2].getConst<Rational>().getNumerator().toUnsignedInt();
1677
3639
        if (start + len > Word::getLength(node[0]))
1678
        {
1679
          // take up to the end of the string
1680
298
          size_t lenS = Word::getLength(s);
1681
596
          Node ret = Word::suffix(s, lenS - start);
1682
298
          return returnRewrite(node, ret, Rewrite::SS_CONST_END_OOB);
1683
        }
1684
        else
1685
        {
1686
          // compute the substr using the constant string
1687
6682
          Node ret = Word::substr(s, start, len);
1688
3341
          return returnRewrite(node, ret, Rewrite::SS_CONST_SS);
1689
        }
1690
      }
1691
    }
1692
  }
1693
54902
  Node zero = nm->mkConst(cvc5::Rational(0));
1694
1695
  // if entailed non-positive length or negative start point
1696
27451
  if (ArithEntail::check(zero, node[1], true))
1697
  {
1698
20
    Node ret = Word::mkEmptyWord(node.getType());
1699
10
    return returnRewrite(node, ret, Rewrite::SS_START_NEG);
1700
  }
1701
27441
  else if (ArithEntail::check(zero, node[2]))
1702
  {
1703
1136
    Node ret = Word::mkEmptyWord(node.getType());
1704
568
    return returnRewrite(node, ret, Rewrite::SS_LEN_NON_POS);
1705
  }
1706
1707
26873
  if (node[0].getKind() == STRING_SUBSTR)
1708
  {
1709
    // (str.substr (str.substr x a b) c d) ---> "" if c >= b
1710
    //
1711
    // Note that this rewrite can be generalized to:
1712
    //
1713
    // (str.substr x a b) ---> "" if a >= (str.len x)
1714
    //
1715
    // This can be done when we generalize our entailment methods to
1716
    // accept an optional context. Then we could conjecture that
1717
    // (str.substr x a b) rewrites to "" and do a case analysis:
1718
    //
1719
    // - a < 0 or b < 0 (the result is trivially empty in these cases)
1720
    // - a >= (str.len x) assuming that { a >= 0, b >= 0 }
1721
    //
1722
    // For example, for (str.substr (str.substr x a a) a a), we could
1723
    // then deduce that under those assumptions, "a" is an
1724
    // over-approximation of the length of (str.substr x a a), which
1725
    // then allows us to reason that the result of the whole term must
1726
    // be empty.
1727
2597
    if (ArithEntail::check(node[1], node[0][2]))
1728
    {
1729
62
      Node ret = Word::mkEmptyWord(node.getType());
1730
31
      return returnRewrite(node, ret, Rewrite::SS_START_GEQ_LEN);
1731
    }
1732
  }
1733
24276
  else if (node[0].getKind() == STRING_STRREPL)
1734
  {
1735
    // (str.substr (str.replace x y z) 0 n)
1736
    // 	 ---> (str.replace (str.substr x 0 n) y z)
1737
    // if (str.len y) = 1 and (str.len z) = 1
1738
333
    if (node[1] == zero)
1739
    {
1740
471
      if (StringsEntail::checkLengthOne(node[0][1], true)
1741
471
          && StringsEntail::checkLengthOne(node[0][2], true))
1742
      {
1743
        Node ret = nm->mkNode(
1744
            kind::STRING_STRREPL,
1745
8
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], node[1], node[2]),
1746
            node[0][1],
1747
16
            node[0][2]);
1748
4
        return returnRewrite(node, ret, Rewrite::SUBSTR_REPL_SWAP);
1749
      }
1750
    }
1751
  }
1752
1753
53676
  std::vector<Node> n1;
1754
26838
  utils::getConcat(node[0], n1);
1755
53676
  TypeNode stype = node.getType();
1756
1757
  // definite inclusion
1758
26838
  if (node[1] == zero)
1759
  {
1760
18505
    Node curr = node[2];
1761
18505
    std::vector<Node> childrenr;
1762
9502
    if (StringsEntail::stripSymbolicLength(n1, childrenr, 1, curr))
1763
    {
1764
499
      if (curr != zero && !n1.empty())
1765
      {
1766
288
        childrenr.push_back(nm->mkNode(
1767
192
            kind::STRING_SUBSTR, utils::mkConcat(n1, stype), node[1], curr));
1768
      }
1769
998
      Node ret = utils::mkConcat(childrenr, stype);
1770
499
      return returnRewrite(node, ret, Rewrite::SS_LEN_INCLUDE);
1771
    }
1772
  }
1773
1774
  // symbolic length analysis
1775
76108
  for (unsigned r = 0; r < 2; r++)
1776
  {
1777
    // the amount of characters we can strip
1778
101057
    Node curr;
1779
51288
    if (r == 0)
1780
    {
1781
26339
      if (node[1] != zero)
1782
      {
1783
        // strip up to start point off the start of the string
1784
17336
        curr = node[1];
1785
      }
1786
    }
1787
24949
    else if (r == 1)
1788
    {
1789
      Node tot_len =
1790
49843
          Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, node[0]));
1791
49843
      Node end_pt = Rewriter::rewrite(nm->mkNode(kind::PLUS, node[1], node[2]));
1792
24949
      if (node[2] != tot_len)
1793
      {
1794
24233
        if (ArithEntail::check(node[2], tot_len))
1795
        {
1796
          // end point beyond end point of string, map to tot_len
1797
64
          Node ret = nm->mkNode(kind::STRING_SUBSTR, node[0], node[1], tot_len);
1798
32
          return returnRewrite(node, ret, Rewrite::SS_END_PT_NORM);
1799
        }
1800
        else
1801
        {
1802
          // strip up to ( str.len(node[0]) - end_pt ) off the end of the string
1803
24201
          curr = Rewriter::rewrite(nm->mkNode(kind::MINUS, tot_len, end_pt));
1804
        }
1805
      }
1806
1807
      // (str.substr s x y) --> "" if x < len(s) |= 0 >= y
1808
      Node n1_lt_tot_len =
1809
49811
          Rewriter::rewrite(nm->mkNode(kind::LT, node[1], tot_len));
1810
24917
      if (ArithEntail::checkWithAssumption(n1_lt_tot_len, zero, node[2], false))
1811
      {
1812
32
        Node ret = Word::mkEmptyWord(node.getType());
1813
16
        return returnRewrite(node, ret, Rewrite::SS_START_ENTAILS_ZERO_LEN);
1814
      }
1815
1816
      // (str.substr s x y) --> "" if 0 < y |= x >= str.len(s)
1817
      Node non_zero_len =
1818
49795
          Rewriter::rewrite(nm->mkNode(kind::LT, zero, node[2]));
1819
24901
      if (ArithEntail::checkWithAssumption(
1820
              non_zero_len, node[1], tot_len, false))
1821
      {
1822
4
        Node ret = Word::mkEmptyWord(node.getType());
1823
2
        return returnRewrite(node, ret, Rewrite::SS_NON_ZERO_LEN_ENTAILS_OOB);
1824
      }
1825
1826
      // (str.substr s x y) --> "" if x >= 0 |= 0 >= str.len(s)
1827
      Node geq_zero_start =
1828
49793
          Rewriter::rewrite(nm->mkNode(kind::GEQ, node[1], zero));
1829
24899
      if (ArithEntail::checkWithAssumption(
1830
              geq_zero_start, zero, tot_len, false))
1831
      {
1832
4
        Node ret = Word::mkEmptyWord(node.getType());
1833
        return returnRewrite(
1834
2
            node, ret, Rewrite::SS_GEQ_ZERO_START_ENTAILS_EMP_S);
1835
      }
1836
1837
      // (str.substr s x x) ---> "" if (str.len s) <= 1
1838
24897
      if (node[1] == node[2] && StringsEntail::checkLengthOne(node[0]))
1839
      {
1840
6
        Node ret = Word::mkEmptyWord(node.getType());
1841
3
        return returnRewrite(node, ret, Rewrite::SS_LEN_ONE_Z_Z);
1842
      }
1843
    }
1844
51233
    if (!curr.isNull())
1845
    {
1846
      // strip off components while quantity is entailed positive
1847
41514
      int dir = r == 0 ? 1 : -1;
1848
81564
      std::vector<Node> childrenr;
1849
41514
      if (StringsEntail::stripSymbolicLength(n1, childrenr, dir, curr))
1850
      {
1851
1464
        if (r == 0)
1852
        {
1853
          Node ret = nm->mkNode(
1854
2780
              kind::STRING_SUBSTR, utils::mkConcat(n1, stype), curr, node[2]);
1855
1390
          return returnRewrite(node, ret, Rewrite::SS_STRIP_START_PT);
1856
        }
1857
        else
1858
        {
1859
          Node ret = nm->mkNode(kind::STRING_SUBSTR,
1860
148
                                utils::mkConcat(n1, stype),
1861
                                node[1],
1862
296
                                node[2]);
1863
74
          return returnRewrite(node, ret, Rewrite::SS_STRIP_END_PT);
1864
        }
1865
      }
1866
    }
1867
  }
1868
  // combine substr
1869
24820
  if (node[0].getKind() == kind::STRING_SUBSTR)
1870
  {
1871
4612
    Node start_inner = node[0][1];
1872
4612
    Node start_outer = node[1];
1873
2486
    if (ArithEntail::check(start_outer) && ArithEntail::check(start_inner))
1874
    {
1875
      // both are positive
1876
      // thus, start point is definitely start_inner+start_outer.
1877
      // We can rewrite if it for certain what the length is
1878
1879
      // the length of a string from the inner substr subtracts the start point
1880
      // of the outer substr
1881
      Node len_from_inner =
1882
2828
          Rewriter::rewrite(nm->mkNode(kind::MINUS, node[0][2], start_outer));
1883
2828
      Node len_from_outer = node[2];
1884
2828
      Node new_len;
1885
      // take quantity that is for sure smaller than the other
1886
1594
      if (len_from_inner == len_from_outer)
1887
      {
1888
9
        new_len = len_from_inner;
1889
      }
1890
1585
      else if (ArithEntail::check(len_from_inner, len_from_outer))
1891
      {
1892
279
        new_len = len_from_outer;
1893
      }
1894
1306
      else if (ArithEntail::check(len_from_outer, len_from_inner))
1895
      {
1896
72
        new_len = len_from_inner;
1897
      }
1898
1594
      if (!new_len.isNull())
1899
      {
1900
720
        Node new_start = nm->mkNode(kind::PLUS, start_inner, start_outer);
1901
        Node ret =
1902
720
            nm->mkNode(kind::STRING_SUBSTR, node[0][0], new_start, new_len);
1903
360
        return returnRewrite(node, ret, Rewrite::SS_COMBINE);
1904
      }
1905
    }
1906
  }
1907
24460
  return node;
1908
}
1909
1910
175
Node SequencesRewriter::rewriteUpdate(Node node)
1911
{
1912
175
  Assert(node.getKind() == kind::STRING_UPDATE);
1913
350
  Node s = node[0];
1914
175
  if (s.isConst())
1915
  {
1916
163
    if (Word::isEmpty(s))
1917
    {
1918
      return returnRewrite(node, s, Rewrite::UPD_EMPTYSTR);
1919
    }
1920
    // rewriting for constant arguments
1921
163
    if (node[1].isConst())
1922
    {
1923
135
      cvc5::Rational rMaxInt(String::maxSize());
1924
119
      if (node[1].getConst<Rational>() > rMaxInt)
1925
      {
1926
        // start beyond the maximum size of strings
1927
        // thus, it must be beyond the end point of this string
1928
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_MAX_OOB);
1929
      }
1930
119
      else if (node[1].getConst<Rational>().sgn() < 0)
1931
      {
1932
        // start before the beginning of the string
1933
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_NEG);
1934
      }
1935
      uint32_t start =
1936
119
          node[1].getConst<Rational>().getNumerator().toUnsignedInt();
1937
119
      size_t len = Word::getLength(s);
1938
119
      if (start >= len)
1939
      {
1940
        // start beyond the end of the string
1941
        return returnRewrite(node, s, Rewrite::UPD_CONST_INDEX_OOB);
1942
      }
1943
119
      if (node[2].isConst())
1944
      {
1945
206
        Node ret = Word::update(s, start, node[2]);
1946
103
        return returnRewrite(node, ret, Rewrite::UPD_EVAL);
1947
      }
1948
    }
1949
  }
1950
1951
72
  return node;
1952
}
1953
1954
192578
Node SequencesRewriter::rewriteContains(Node node)
1955
{
1956
192578
  Assert(node.getKind() == kind::STRING_STRCTN);
1957
192578
  NodeManager* nm = NodeManager::currentNM();
1958
1959
192578
  if (node[0] == node[1])
1960
  {
1961
1078
    Node ret = NodeManager::currentNM()->mkConst(true);
1962
539
    return returnRewrite(node, ret, Rewrite::CTN_EQ);
1963
  }
1964
192039
  if (node[0].isConst())
1965
  {
1966
59313
    if (node[1].isConst())
1967
    {
1968
9618
      Node ret = nm->mkConst(Word::find(node[0], node[1]) != std::string::npos);
1969
4809
      return returnRewrite(node, ret, Rewrite::CTN_CONST);
1970
    }
1971
    else
1972
    {
1973
105038
      Node t = node[1];
1974
54504
      if (Word::isEmpty(node[0]))
1975
      {
1976
        Node len1 =
1977
48976
            NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
1978
24572
        if (ArithEntail::check(len1, true))
1979
        {
1980
          // we handle the false case here since the rewrite for equality
1981
          // uses this function, hence we want to conclude false if possible.
1982
          // len(x)>0 => contains( "", x ) ---> false
1983
336
          Node ret = NodeManager::currentNM()->mkConst(false);
1984
168
          return returnRewrite(node, ret, Rewrite::CTN_LHS_EMPTYSTR);
1985
        }
1986
      }
1987
29932
      else if (StringsEntail::checkLengthOne(t))
1988
      {
1989
7366
        std::vector<Node> vec = Word::getChars(node[0]);
1990
7366
        Node emp = Word::mkEmptyWord(t.getType());
1991
7366
        NodeBuilder nb(OR);
1992
3683
        nb << emp.eqNode(t);
1993
8579
        for (const Node& c : vec)
1994
        {
1995
4896
          Assert(c.getType() == t.getType());
1996
4896
          nb << c.eqNode(t);
1997
        }
1998
1999
        // str.contains("ABCabc", t) --->
2000
        // t = "" v t = "A" v t = "B" v t = "C" v t = "a" v t = "b" v t = "c"
2001
        // if len(t) <= 1
2002
7366
        Node ret = nb;
2003
3683
        return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2004
      }
2005
26249
      else if (node[1].getKind() == kind::STRING_CONCAT)
2006
      {
2007
        int firstc, lastc;
2008
2548
        if (!StringsEntail::canConstantContainConcat(
2009
                node[0], node[1], firstc, lastc))
2010
        {
2011
238
          Node ret = NodeManager::currentNM()->mkConst(false);
2012
119
          return returnRewrite(node, ret, Rewrite::CTN_NCONST_CTN_CONCAT);
2013
        }
2014
      }
2015
    }
2016
  }
2017
183260
  if (node[1].isConst())
2018
  {
2019
50135
    size_t len = Word::getLength(node[1]);
2020
50135
    if (len == 0)
2021
    {
2022
      // contains( x, "" ) ---> true
2023
48672
      Node ret = NodeManager::currentNM()->mkConst(true);
2024
24336
      return returnRewrite(node, ret, Rewrite::CTN_RHS_EMPTYSTR);
2025
    }
2026
25799
    else if (len == 1)
2027
    {
2028
      // The following rewrites are specific to a single character second
2029
      // argument of contains, where we can reason that this character is
2030
      // not split over multiple components in the first argument.
2031
14784
      if (node[0].getKind() == STRING_CONCAT)
2032
      {
2033
2546
        std::vector<Node> nc1;
2034
1273
        utils::getConcat(node[0], nc1);
2035
2546
        NodeBuilder nb(OR);
2036
4605
        for (const Node& ncc : nc1)
2037
        {
2038
3332
          nb << nm->mkNode(STRING_STRCTN, ncc, node[1]);
2039
        }
2040
2546
        Node ret = nb.constructNode();
2041
        // str.contains( x ++ y, "A" ) --->
2042
        //   str.contains( x, "A" ) OR str.contains( y, "A" )
2043
1273
        return returnRewrite(node, ret, Rewrite::CTN_CONCAT_CHAR);
2044
      }
2045
13511
      else if (node[0].getKind() == STRING_STRREPL)
2046
      {
2047
997
        Node rplDomain = d_stringsEntail.checkContains(node[0][1], node[1]);
2048
564
        if (!rplDomain.isNull() && !rplDomain.getConst<bool>())
2049
        {
2050
262
          Node d1 = nm->mkNode(STRING_STRCTN, node[0][0], node[1]);
2051
          Node d2 =
2052
              nm->mkNode(AND,
2053
262
                         nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]),
2054
524
                         nm->mkNode(STRING_STRCTN, node[0][2], node[1]));
2055
262
          Node ret = nm->mkNode(OR, d1, d2);
2056
          // If str.contains( y, "A" ) ---> false, then:
2057
          // str.contains( str.replace( x, y, z ), "A" ) --->
2058
          //   str.contains( x, "A" ) OR
2059
          //   ( str.contains( x, y ) AND str.contains( z, "A" ) )
2060
131
          return returnRewrite(node, ret, Rewrite::CTN_REPL_CHAR);
2061
        }
2062
      }
2063
    }
2064
  }
2065
315040
  std::vector<Node> nc1;
2066
157520
  utils::getConcat(node[0], nc1);
2067
315040
  std::vector<Node> nc2;
2068
157520
  utils::getConcat(node[1], nc2);
2069
2070
  // component-wise containment
2071
315040
  std::vector<Node> nc1rb;
2072
315040
  std::vector<Node> nc1re;
2073
157520
  if (d_stringsEntail.componentContains(nc1, nc2, nc1rb, nc1re) != -1)
2074
  {
2075
4744
    Node ret = NodeManager::currentNM()->mkConst(true);
2076
2372
    return returnRewrite(node, ret, Rewrite::CTN_COMPONENT);
2077
  }
2078
310296
  TypeNode stype = node[0].getType();
2079
2080
  // strip endpoints
2081
310296
  std::vector<Node> nb;
2082
310296
  std::vector<Node> ne;
2083
155148
  if (StringsEntail::stripConstantEndpoints(nc1, nc2, nb, ne))
2084
  {
2085
    Node ret = NodeManager::currentNM()->mkNode(
2086
408
        kind::STRING_STRCTN, utils::mkConcat(nc1, stype), node[1]);
2087
204
    return returnRewrite(node, ret, Rewrite::CTN_STRIP_ENDPT);
2088
  }
2089
2090
385299
  for (const Node& n : nc2)
2091
  {
2092
230506
    if (n.getKind() == kind::STRING_STRREPL)
2093
    {
2094
      // (str.contains x (str.replace y z w)) --> false
2095
      // if (str.contains x y) = false and (str.contains x w) = false
2096
      //
2097
      // Reasoning: (str.contains x y) checks that x does not contain y if the
2098
      // replacement does not change y. (str.contains x w) checks that if the
2099
      // replacement changes anything in y, the w makes it impossible for it to
2100
      // occur in x.
2101
7045
      Node ctnConst = d_stringsEntail.checkContains(node[0], n[0]);
2102
3598
      if (!ctnConst.isNull() && !ctnConst.getConst<bool>())
2103
      {
2104
442
        Node ctnConst2 = d_stringsEntail.checkContains(node[0], n[2]);
2105
225
        if (!ctnConst2.isNull() && !ctnConst2.getConst<bool>())
2106
        {
2107
16
          Node res = nm->mkConst(false);
2108
8
          return returnRewrite(node, res, Rewrite::CTN_RPL_NON_CTN);
2109
        }
2110
      }
2111
2112
      // (str.contains x (str.++ w (str.replace x y x) z)) --->
2113
      //   (and (= w "") (= x (str.replace x y x)) (= z ""))
2114
      //
2115
      // TODO: Remove with under-/over-approximation
2116
3590
      if (node[0] == n[0] && node[0] == n[2])
2117
      {
2118
286
        Node ret;
2119
143
        if (nc2.size() > 1)
2120
        {
2121
16
          Node emp = Word::mkEmptyWord(stype);
2122
16
          NodeBuilder nb2(kind::AND);
2123
24
          for (const Node& n2 : nc2)
2124
          {
2125
16
            if (n2 == n)
2126
            {
2127
8
              nb2 << nm->mkNode(kind::EQUAL, node[0], node[1]);
2128
            }
2129
            else
2130
            {
2131
8
              nb2 << nm->mkNode(kind::EQUAL, emp, n2);
2132
            }
2133
          }
2134
8
          ret = nb2.constructNode();
2135
        }
2136
        else
2137
        {
2138
135
          ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2139
        }
2140
143
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SELF);
2141
      }
2142
    }
2143
  }
2144
2145
  // length entailment
2146
309586
  Node len_n1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2147
309586
  Node len_n2 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2148
154793
  if (ArithEntail::check(len_n2, len_n1, true))
2149
  {
2150
    // len( n2 ) > len( n1 ) => contains( n1, n2 ) ---> false
2151
212
    Node ret = NodeManager::currentNM()->mkConst(false);
2152
106
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ);
2153
  }
2154
2155
  // multi-set reasoning
2156
  //   For example, contains( str.++( x, "b" ), str.++( "a", x ) ) ---> false
2157
  //   since the number of a's in the second argument is greater than the number
2158
  //   of a's in the first argument
2159
154687
  if (StringsEntail::checkMultisetSubset(node[0], node[1]))
2160
  {
2161
88
    Node ret = nm->mkConst(false);
2162
44
    return returnRewrite(node, ret, Rewrite::CTN_MSET_NSS);
2163
  }
2164
2165
154643
  if (ArithEntail::check(len_n2, len_n1, false))
2166
  {
2167
    // len( n2 ) >= len( n1 ) => contains( n1, n2 ) ---> n1 = n2
2168
62764
    Node ret = node[0].eqNode(node[1]);
2169
31382
    return returnRewrite(node, ret, Rewrite::CTN_LEN_INEQ_NSTRICT);
2170
  }
2171
2172
  // splitting
2173
123261
  if (node[0].getKind() == kind::STRING_CONCAT)
2174
  {
2175
37144
    if (node[1].isConst())
2176
    {
2177
3960
      Node t = node[1];
2178
      // Below, we are looking for a constant component of node[0]
2179
      // has no overlap with node[1], which means we can split.
2180
      // Notice that if the first or last components had no
2181
      // overlap, these would have been removed by strip
2182
      // constant endpoints above.
2183
      // Hence, we consider only the inner children.
2184
4036
      for (unsigned i = 1; i < (node[0].getNumChildren() - 1); i++)
2185
      {
2186
        // constant contains
2187
2095
        if (node[0][i].isConst())
2188
        {
2189
          // if no overlap, we can split into disjunction
2190
243
          if (Word::noOverlapWith(node[0][i], node[1]))
2191
          {
2192
156
            std::vector<Node> nc0;
2193
78
            utils::getConcat(node[0], nc0);
2194
156
            std::vector<Node> spl[2];
2195
78
            spl[0].insert(spl[0].end(), nc0.begin(), nc0.begin() + i);
2196
78
            Assert(i < nc0.size() - 1);
2197
78
            spl[1].insert(spl[1].end(), nc0.begin() + i + 1, nc0.end());
2198
            Node ret = NodeManager::currentNM()->mkNode(
2199
                kind::OR,
2200
312
                NodeManager::currentNM()->mkNode(kind::STRING_STRCTN,
2201
156
                                                 utils::mkConcat(spl[0], stype),
2202
                                                 node[1]),
2203
312
                NodeManager::currentNM()->mkNode(kind::STRING_STRCTN,
2204
156
                                                 utils::mkConcat(spl[1], stype),
2205
390
                                                 node[1]));
2206
78
            return returnRewrite(node, ret, Rewrite::CTN_SPLIT);
2207
          }
2208
        }
2209
      }
2210
    }
2211
  }
2212
86117
  else if (node[0].getKind() == kind::STRING_SUBSTR)
2213
  {
2214
    // (str.contains (str.substr x n (str.len y)) y) --->
2215
    //   (= (str.substr x n (str.len y)) y)
2216
    //
2217
    // TODO: Remove with under-/over-approximation
2218
8612
    if (node[0][2] == nm->mkNode(kind::STRING_LENGTH, node[1]))
2219
    {
2220
64
      Node ret = nm->mkNode(kind::EQUAL, node[0], node[1]);
2221
32
      return returnRewrite(node, ret, Rewrite::CTN_SUBSTR);
2222
    }
2223
  }
2224
77505
  else if (node[0].getKind() == kind::STRING_STRREPL)
2225
  {
2226
1754
    if (node[1].isConst() && node[0][1].isConst() && node[0][2].isConst())
2227
    {
2228
460
      if (Word::noOverlapWith(node[1], node[0][1])
2229
460
          && Word::noOverlapWith(node[1], node[0][2]))
2230
      {
2231
        // (str.contains (str.replace x c1 c2) c3) ---> (str.contains x c3)
2232
        // if there is no overlap between c1 and c3 and none between c2 and c3
2233
4
        Node ret = nm->mkNode(STRING_STRCTN, node[0][0], node[1]);
2234
2
        return returnRewrite(node, ret, Rewrite::CTN_REPL_CNSTS_TO_CTN);
2235
      }
2236
    }
2237
2238
1752
    if (node[0][0] == node[0][2])
2239
    {
2240
      // (str.contains (str.replace x y x) y) ---> (str.contains x y)
2241
275
      if (node[0][1] == node[1])
2242
      {
2243
20
        Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]);
2244
10
        return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN);
2245
      }
2246
2247
      // (str.contains (str.replace x y x) z) ---> (str.contains x z)
2248
      // if (str.len z) <= 1
2249
265
      if (StringsEntail::checkLengthOne(node[1]))
2250
      {
2251
24
        Node ret = nm->mkNode(kind::STRING_STRCTN, node[0][0], node[1]);
2252
12
        return returnRewrite(node, ret, Rewrite::CTN_REPL_LEN_ONE_TO_CTN);
2253
      }
2254
    }
2255
2256
    // (str.contains (str.replace x y z) z) --->
2257
    //   (or (str.contains x y) (str.contains x z))
2258
1730
    if (node[0][2] == node[1])
2259
    {
2260
      Node ret = nm->mkNode(OR,
2261
178
                            nm->mkNode(STRING_STRCTN, node[0][0], node[0][1]),
2262
356
                            nm->mkNode(STRING_STRCTN, node[0][0], node[0][2]));
2263
89
      return returnRewrite(node, ret, Rewrite::CTN_REPL_TO_CTN_DISJ);
2264
    }
2265
2266
    // (str.contains (str.replace x y z) w) --->
2267
    //   (str.contains (str.replace x y "") w)
2268
    // if (str.contains z w) ---> false and (str.len w) = 1
2269
1641
    if (StringsEntail::checkLengthOne(node[1]))
2270
    {
2271
461
      Node ctn = d_stringsEntail.checkContains(node[0][2], node[1]);
2272
268
      if (!ctn.isNull() && !ctn.getConst<bool>())
2273
      {
2274
150
        Node empty = Word::mkEmptyWord(stype);
2275
        Node ret = nm->mkNode(
2276
            kind::STRING_STRCTN,
2277
150
            nm->mkNode(kind::STRING_STRREPL, node[0][0], node[0][1], empty),
2278
300
            node[1]);
2279
75
        return returnRewrite(node, ret, Rewrite::CTN_REPL_SIMP_REPL);
2280
      }
2281
    }
2282
  }
2283
2284
122963
  if (node[1].getKind() == kind::STRING_STRREPL)
2285
  {
2286
    // (str.contains x (str.replace y x y)) --->
2287
    //   (str.contains x y)
2288
1858
    if (node[0] == node[1][1] && node[1][0] == node[1][2])
2289
    {
2290
12
      Node ret = nm->mkNode(kind::STRING_STRCTN, node[0], node[1][0]);
2291
6
      return returnRewrite(node, ret, Rewrite::CTN_REPL);
2292
    }
2293
2294
    // (str.contains x (str.replace "" x y)) --->
2295
    //   (= "" (str.replace "" x y))
2296
    //
2297
    // Note: Length-based reasoning is not sufficient to get this rewrite. We
2298
    // can neither show that str.len(str.replace("", x, y)) - str.len(x) >= 0
2299
    // nor str.len(x) - str.len(str.replace("", x, y)) >= 0
2300
3702
    Node emp = Word::mkEmptyWord(stype);
2301
1852
    if (node[0] == node[1][1] && node[1][0] == emp)
2302
    {
2303
4
      Node ret = nm->mkNode(kind::EQUAL, emp, node[1]);
2304
2
      return returnRewrite(node, ret, Rewrite::CTN_REPL_EMPTY);
2305
    }
2306
  }
2307
2308
122955
  return node;
2309
}
2310
2311
4498
Node SequencesRewriter::rewriteIndexof(Node node)
2312
{
2313
4498
  Assert(node.getKind() == kind::STRING_STRIDOF);
2314
4498
  NodeManager* nm = NodeManager::currentNM();
2315
2316
4498
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() < 0)
2317
  {
2318
    // z<0  implies  str.indexof( x, y, z ) --> -1
2319
322
    Node negone = nm->mkConst(Rational(-1));
2320
161
    return returnRewrite(node, negone, Rewrite::IDOF_NEG);
2321
  }
2322
2323
  // the string type
2324
8674
  TypeNode stype = node[0].getType();
2325
2326
  // evaluation and simple cases
2327
8674
  std::vector<Node> children0;
2328
4337
  utils::getConcat(node[0], children0);
2329
4337
  if (children0[0].isConst() && node[1].isConst() && node[2].isConst())
2330
  {
2331
1064
    cvc5::Rational rMaxInt(cvc5::String::maxSize());
2332
894
    if (node[2].getConst<Rational>() > rMaxInt)
2333
    {
2334
      // We know that, due to limitations on the size of string constants
2335
      // in our implementation, that accessing a position greater than
2336
      // rMaxInt is guaranteed to be out of bounds.
2337
      Node negone = nm->mkConst(Rational(-1));
2338
      return returnRewrite(node, negone, Rewrite::IDOF_MAX);
2339
    }
2340
894
    Assert(node[2].getConst<Rational>().sgn() >= 0);
2341
1064
    Node s = children0[0];
2342
1064
    Node t = node[1];
2343
    uint32_t start =
2344
894
        node[2].getConst<Rational>().getNumerator().toUnsignedInt();
2345
894
    std::size_t ret = Word::find(s, t, start);
2346
894
    if (ret != std::string::npos)
2347
    {
2348
682
      Node retv = nm->mkConst(Rational(static_cast<unsigned>(ret)));
2349
341
      return returnRewrite(node, retv, Rewrite::IDOF_FIND);
2350
    }
2351
553
    else if (children0.size() == 1)
2352
    {
2353
766
      Node negone = nm->mkConst(Rational(-1));
2354
383
      return returnRewrite(node, negone, Rewrite::IDOF_NFIND);
2355
    }
2356
  }
2357
2358
3613
  if (node[0] == node[1])
2359
  {
2360
88
    if (node[2].isConst())
2361
    {
2362
14
      if (node[2].getConst<Rational>().sgn() == 0)
2363
      {
2364
        // indexof( x, x, 0 ) --> 0
2365
28
        Node zero = nm->mkConst(Rational(0));
2366
14
        return returnRewrite(node, zero, Rewrite::IDOF_EQ_CST_START);
2367
      }
2368
    }
2369
74
    if (ArithEntail::check(node[2], true))
2370
    {
2371
      // y>0  implies  indexof( x, x, y ) --> -1
2372
      Node negone = nm->mkConst(Rational(-1));
2373
      return returnRewrite(node, negone, Rewrite::IDOF_EQ_NSTART);
2374
    }
2375
128
    Node emp = Word::mkEmptyWord(stype);
2376
74
    if (node[0] != emp)
2377
    {
2378
      // indexof( x, x, z ) ---> indexof( "", "", z )
2379
40
      Node ret = nm->mkNode(STRING_STRIDOF, emp, emp, node[2]);
2380
20
      return returnRewrite(node, ret, Rewrite::IDOF_EQ_NORM);
2381
    }
2382
  }
2383
2384
7158
  Node len0 = nm->mkNode(STRING_LENGTH, node[0]);
2385
7158
  Node len1 = nm->mkNode(STRING_LENGTH, node[1]);
2386
7158
  Node len0m2 = nm->mkNode(MINUS, len0, node[2]);
2387
2388
3579
  if (node[1].isConst())
2389
  {
2390
3081
    if (Word::isEmpty(node[1]))
2391
    {
2392
80
      if (ArithEntail::check(len0, node[2]) && ArithEntail::check(node[2]))
2393
      {
2394
        // len(x)>=z ^ z >=0 implies indexof( x, "", z ) ---> z
2395
10
        return returnRewrite(node, node[2], Rewrite::IDOF_EMP_IDOF);
2396
      }
2397
    }
2398
  }
2399
2400
3569
  if (ArithEntail::check(len1, len0m2, true))
2401
  {
2402
    // len(x)-z < len(y)  implies  indexof( x, y, z ) ----> -1
2403
12
    Node negone = nm->mkConst(Rational(-1));
2404
6
    return returnRewrite(node, negone, Rewrite::IDOF_LEN);
2405
  }
2406
2407
7126
  Node fstr = node[0];
2408
3563
  if (!node[2].isConst() || node[2].getConst<Rational>().sgn() != 0)
2409
  {
2410
543
    fstr = nm->mkNode(kind::STRING_SUBSTR, node[0], node[2], len0);
2411
543
    fstr = Rewriter::rewrite(fstr);
2412
  }
2413
2414
7126
  Node cmp_conr = d_stringsEntail.checkContains(fstr, node[1]);
2415
7126
  Trace("strings-rewrite-debug") << "For " << node << ", check contains("
2416
3563
                                 << fstr << ", " << node[1] << ")" << std::endl;
2417
3563
  Trace("strings-rewrite-debug") << "...got " << cmp_conr << std::endl;
2418
7126
  std::vector<Node> children1;
2419
3563
  utils::getConcat(node[1], children1);
2420
3563
  if (!cmp_conr.isNull())
2421
  {
2422
284
    if (cmp_conr.getConst<bool>())
2423
    {
2424
279
      if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2425
      {
2426
        // past the first position in node[0] that contains node[1], we can drop
2427
345
        std::vector<Node> nb;
2428
345
        std::vector<Node> ne;
2429
203
        int cc = d_stringsEntail.componentContains(
2430
203
            children0, children1, nb, ne, true, 1);
2431
203
        if (cc != -1 && !ne.empty())
2432
        {
2433
          // For example:
2434
          // str.indexof(str.++(x,y,z),y,0) ---> str.indexof(str.++(x,y),y,0)
2435
98
          Node nn = utils::mkConcat(children0, stype);
2436
98
          Node ret = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]);
2437
49
          return returnRewrite(node, ret, Rewrite::IDOF_DEF_CTN);
2438
        }
2439
2440
        // Strip components from the beginning that are guaranteed not to match
2441
154
        if (StringsEntail::stripConstantEndpoints(
2442
                children0, children1, nb, ne, 1))
2443
        {
2444
          // str.indexof(str.++("AB", x, "C"), "C", 0) --->
2445
          // 2 + str.indexof(str.++(x, "C"), "C", 0)
2446
          Node ret = nm->mkNode(
2447
              kind::PLUS,
2448
24
              nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(nb, stype)),
2449
48
              nm->mkNode(kind::STRING_STRIDOF,
2450
24
                         utils::mkConcat(children0, stype),
2451
                         node[1],
2452
48
                         node[2]));
2453
12
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_CNST_ENDPTS);
2454
        }
2455
      }
2456
      // To show that the first argument definitely contains the second, the
2457
      // index must be a valid index in the first argument. This ensures that
2458
      // (str.indexof t "" n) is not rewritten to something other than -1 when n
2459
      // is beyond the length of t. This is not required for the above rewrites,
2460
      // which only apply when n=0.
2461
218
      if (ArithEntail::check(node[2]) && ArithEntail::check(len0, node[2]))
2462
      {
2463
        // strip symbolic length
2464
290
        Node new_len = node[2];
2465
290
        std::vector<Node> nr;
2466
148
        if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
2467
        {
2468
          // For example:
2469
          // z>=0 and z>str.len( x1 ) and str.contains( x2, y )-->true
2470
          // implies
2471
          // str.indexof( str.++( x1, x2 ), y, z ) --->
2472
          // str.len( x1 ) + str.indexof( x2, y, z-str.len(x1) )
2473
12
          Node nn = utils::mkConcat(children0, stype);
2474
          Node ret =
2475
              nm->mkNode(PLUS,
2476
12
                         nm->mkNode(MINUS, node[2], new_len),
2477
24
                         nm->mkNode(STRING_STRIDOF, nn, node[1], new_len));
2478
6
          return returnRewrite(node, ret, Rewrite::IDOF_STRIP_SYM_LEN);
2479
        }
2480
      }
2481
    }
2482
    else
2483
    {
2484
      // str.contains( x, y ) --> false  implies  str.indexof(x,y,z) --> -1
2485
10
      Node negone = nm->mkConst(Rational(-1));
2486
5
      return returnRewrite(node, negone, Rewrite::IDOF_NCTN);
2487
    }
2488
  }
2489
  else
2490
  {
2491
6528
    Node new_len = node[2];
2492
6528
    std::vector<Node> nr;
2493
3279
    if (StringsEntail::stripSymbolicLength(children0, nr, 1, new_len))
2494
    {
2495
      // Normalize the string before the start index.
2496
      //
2497
      // For example:
2498
      // str.indexof(str.++("ABCD", x), y, 3) --->
2499
      // str.indexof(str.++("AAAD", x), y, 3)
2500
114
      Node nodeNr = utils::mkConcat(nr, stype);
2501
114
      Node normNr = lengthPreserveRewrite(nodeNr);
2502
72
      if (normNr != nodeNr)
2503
      {
2504
60
        std::vector<Node> normNrChildren;
2505
30
        utils::getConcat(normNr, normNrChildren);
2506
60
        std::vector<Node> children(normNrChildren);
2507
30
        children.insert(children.end(), children0.begin(), children0.end());
2508
60
        Node nn = utils::mkConcat(children, stype);
2509
60
        Node res = nm->mkNode(kind::STRING_STRIDOF, nn, node[1], node[2]);
2510
30
        return returnRewrite(node, res, Rewrite::IDOF_NORM_PREFIX);
2511
      }
2512
    }
2513
  }
2514
2515
3461
  if (node[2].isConst() && node[2].getConst<Rational>().sgn() == 0)
2516
  {
2517
5905
    std::vector<Node> cb;
2518
5905
    std::vector<Node> ce;
2519
2959
    if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce, -1))
2520
    {
2521
26
      Node ret = utils::mkConcat(children0, stype);
2522
13
      ret = nm->mkNode(STRING_STRIDOF, ret, node[1], node[2]);
2523
      // For example:
2524
      // str.indexof( str.++( x, "A" ), "B", 0 ) ---> str.indexof( x, "B", 0 )
2525
13
      return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2526
    }
2527
  }
2528
2529
3448
  return node;
2530
}
2531
2532
3104
Node SequencesRewriter::rewriteReplace(Node node)
2533
{
2534
3104
  Assert(node.getKind() == kind::STRING_STRREPL);
2535
3104
  NodeManager* nm = NodeManager::currentNM();
2536
2537
3104
  if (node[1].isConst() && Word::isEmpty(node[1]))
2538
  {
2539
808
    Node ret = nm->mkNode(STRING_CONCAT, node[2], node[0]);
2540
404
    return returnRewrite(node, ret, Rewrite::RPL_RPL_EMPTY);
2541
  }
2542
  // the string type
2543
5400
  TypeNode stype = node.getType();
2544
2545
5400
  std::vector<Node> children0;
2546
2700
  utils::getConcat(node[0], children0);
2547
2548
2700
  if (node[1].isConst() && children0[0].isConst())
2549
  {
2550
825
    Node s = children0[0];
2551
825
    Node t = node[1];
2552
816
    std::size_t p = Word::find(s, t);
2553
816
    if (p == std::string::npos)
2554
    {
2555
554
      if (children0.size() == 1)
2556
      {
2557
545
        return returnRewrite(node, node[0], Rewrite::RPL_CONST_NFIND);
2558
      }
2559
    }
2560
    else
2561
    {
2562
524
      Node s1 = Word::substr(s, 0, p);
2563
524
      Node s3 = Word::substr(s, p + Word::getLength(t));
2564
524
      std::vector<Node> children;
2565
262
      if (!Word::isEmpty(s1))
2566
      {
2567
49
        children.push_back(s1);
2568
      }
2569
262
      children.push_back(node[2]);
2570
262
      if (!Word::isEmpty(s3))
2571
      {
2572
85
        children.push_back(s3);
2573
      }
2574
262
      children.insert(children.end(), children0.begin() + 1, children0.end());
2575
524
      Node ret = utils::mkConcat(children, stype);
2576
262
      return returnRewrite(node, ret, Rewrite::RPL_CONST_FIND);
2577
    }
2578
  }
2579
2580
  // rewrites that apply to both replace and replaceall
2581
3786
  Node rri = rewriteReplaceInternal(node);
2582
1893
  if (!rri.isNull())
2583
  {
2584
    // printing of the rewrite managed by the call above
2585
131
    return rri;
2586
  }
2587
2588
1762
  if (node[0] == node[2])
2589
  {
2590
    // ( len( y )>=len(x) ) => str.replace( x, y, x ) ---> x
2591
482
    Node l0 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[0]);
2592
482
    Node l1 = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, node[1]);
2593
259
    if (ArithEntail::check(l1, l0))
2594
    {
2595
34
      return returnRewrite(node, node[0], Rewrite::RPL_RPL_LEN_ID);
2596
    }
2597
2598
    // (str.replace x y x) ---> (str.replace x (str.++ y1 ... yn) x)
2599
    // if 1 >= (str.len x) and (= y "") ---> (= y1 "") ... (= yn "")
2600
225
    if (StringsEntail::checkLengthOne(node[0]))
2601
    {
2602
110
      Node empty = Word::mkEmptyWord(stype);
2603
      Node rn1 = Rewriter::rewrite(
2604
110
          rewriteEqualityExt(nm->mkNode(EQUAL, node[1], empty)));
2605
56
      if (rn1 != node[1])
2606
      {
2607
110
        std::vector<Node> emptyNodes;
2608
        bool allEmptyEqs;
2609
56
        std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(rn1);
2610
2611
56
        if (allEmptyEqs)
2612
        {
2613
62
          Node nn1 = utils::mkConcat(emptyNodes, stype);
2614
32
          if (node[1] != nn1)
2615
          {
2616
4
            Node ret = nm->mkNode(STRING_STRREPL, node[0], nn1, node[2]);
2617
2
            return returnRewrite(node, ret, Rewrite::RPL_X_Y_X_SIMP);
2618
          }
2619
        }
2620
      }
2621
    }
2622
  }
2623
2624
3452
  std::vector<Node> children1;
2625
1726
  utils::getConcat(node[1], children1);
2626
2627
  // check if contains definitely does (or does not) hold
2628
3452
  Node cmp_con = nm->mkNode(kind::STRING_STRCTN, node[0], node[1]);
2629
3452
  Node cmp_conr = Rewriter::rewrite(cmp_con);
2630
1726
  if (cmp_conr.isConst())
2631
  {
2632
175
    if (cmp_conr.getConst<bool>())
2633
    {
2634
      // component-wise containment
2635
289
      std::vector<Node> cb;
2636
289
      std::vector<Node> ce;
2637
165
      int cc = d_stringsEntail.componentContains(
2638
165
          children0, children1, cb, ce, true, 1);
2639
165
      if (cc != -1)
2640
      {
2641
101
        if (cc == 0 && children0[0] == children1[0])
2642
        {
2643
          // definitely a prefix, can do the replace
2644
          // for example,
2645
          //   str.replace( str.++( x, "ab" ), str.++( x, "a" ), y )  --->
2646
          //   str.++( y, "b" )
2647
52
          std::vector<Node> cres;
2648
26
          cres.push_back(node[2]);
2649
26
          cres.insert(cres.end(), ce.begin(), ce.end());
2650
52
          Node ret = utils::mkConcat(cres, stype);
2651
26
          return returnRewrite(node, ret, Rewrite::RPL_CCTN_RPL);
2652
        }
2653
75
        else if (!ce.empty())
2654
        {
2655
          // we can pull remainder past first definite containment
2656
          // for example,
2657
          //   str.replace( str.++( x, "ab" ), "a", y ) --->
2658
          //   str.++( str.replace( str.++( x, "a" ), "a", y ), "b" )
2659
          // this is independent of whether the second argument may be empty
2660
30
          std::vector<Node> scc;
2661
45
          scc.push_back(NodeManager::currentNM()->mkNode(
2662
              kind::STRING_STRREPL,
2663
30
              utils::mkConcat(children0, stype),
2664
              node[1],
2665
              node[2]));
2666
15
          scc.insert(scc.end(), ce.begin(), ce.end());
2667
30
          Node ret = utils::mkConcat(scc, stype);
2668
15
          return returnRewrite(node, ret, Rewrite::RPL_CCTN);
2669
        }
2670
      }
2671
    }
2672
    else
2673
    {
2674
      // ~contains( t, s ) => ( replace( t, s, r ) ----> t )
2675
10
      return returnRewrite(node, node[0], Rewrite::RPL_NCTN);
2676
    }
2677
  }
2678
1551
  else if (cmp_conr.getKind() == kind::EQUAL || cmp_conr.getKind() == kind::AND)
2679
  {
2680
    // Rewriting the str.contains may return equalities of the form (= x "").
2681
    // In that case, we can substitute the variables appearing in those
2682
    // equalities with the empty string in the third argument of the
2683
    // str.replace. For example:
2684
    //
2685
    // (str.replace x (str.++ x y) y) --> (str.replace x (str.++ x y) "")
2686
    //
2687
    // This can be done because str.replace changes x iff (str.++ x y) is in x
2688
    // but that means that y must be empty in that case. Thus, we can
2689
    // substitute y with "" in the third argument. Note that the third argument
2690
    // does not matter when the str.replace does not apply.
2691
    //
2692
542
    Node empty = Word::mkEmptyWord(stype);
2693
2694
542
    std::vector<Node> emptyNodes;
2695
    bool allEmptyEqs;
2696
280
    std::tie(allEmptyEqs, emptyNodes) = utils::collectEmptyEqs(cmp_conr);
2697
2698
280
    if (emptyNodes.size() > 0)
2699
    {
2700
      // Perform the substitutions
2701
306
      std::vector<TNode> substs(emptyNodes.size(), TNode(empty));
2702
      Node nn2 = node[2].substitute(
2703
306
          emptyNodes.begin(), emptyNodes.end(), substs.begin(), substs.end());
2704
2705
      // If the contains rewrites to a conjunction of empty-string equalities
2706
      // and we are doing the replacement in an empty string, we can rewrite
2707
      // the string-to-replace with a concatenation of all the terms that must
2708
      // be empty:
2709
      //
2710
      // (str.replace "" y z) ---> (str.replace "" (str.++ y1 ... yn)  z)
2711
      // if (str.contains "" y) ---> (and (= y1 "") ... (= yn ""))
2712
162
      if (node[0] == empty && allEmptyEqs)
2713
      {
2714
192
        std::vector<Node> emptyNodesList(emptyNodes.begin(), emptyNodes.end());
2715
192
        Node nn1 = utils::mkConcat(emptyNodesList, stype);
2716
100
        if (nn1 != node[1] || nn2 != node[2])
2717
        {
2718
16
          Node res = nm->mkNode(kind::STRING_STRREPL, node[0], nn1, nn2);
2719
8
          return returnRewrite(node, res, Rewrite::RPL_EMP_CNTS_SUBSTS);
2720
        }
2721
      }
2722
2723
154
      if (nn2 != node[2])
2724
      {
2725
20
        Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1], nn2);
2726
10
        return returnRewrite(node, res, Rewrite::RPL_CNTS_SUBSTS);
2727
      }
2728
    }
2729
  }
2730
2731
1657
  if (cmp_conr != cmp_con)
2732
  {
2733
503
    if (StringsEntail::checkNonEmpty(node[1]))
2734
    {
2735
      // pull endpoints that can be stripped
2736
      // for example,
2737
      //   str.replace( str.++( "b", x, "b" ), "a", y ) --->
2738
      //   str.++( "b", str.replace( x, "a", y ), "b" )
2739
549
      std::vector<Node> cb;
2740
549
      std::vector<Node> ce;
2741
283
      if (StringsEntail::stripConstantEndpoints(children0, children1, cb, ce))
2742
      {
2743
34
        std::vector<Node> cc;
2744
17
        cc.insert(cc.end(), cb.begin(), cb.end());
2745
17
        cc.push_back(
2746
68
            NodeManager::currentNM()->mkNode(kind::STRING_STRREPL,
2747
34
                                             utils::mkConcat(children0, stype),
2748
                                             node[1],
2749
                                             node[2]));
2750
17
        cc.insert(cc.end(), ce.begin(), ce.end());
2751
34
        Node ret = utils::mkConcat(cc, stype);
2752
17
        return returnRewrite(node, ret, Rewrite::RPL_PULL_ENDPT);
2753
      }
2754
    }
2755
  }
2756
2757
1640
  children1.clear();
2758
1640
  utils::getConcat(node[1], children1);
2759
3280
  Node lastChild1 = children1[children1.size() - 1];
2760
1640
  if (lastChild1.getKind() == kind::STRING_SUBSTR)
2761
  {
2762
    // (str.replace x (str.++ t (str.substr y i j)) z) --->
2763
    // (str.replace x (str.++ t
2764
    //                  (str.substr y i (+ (str.len x) 1 (- (str.len t))))) z)
2765
    // if j > len(x)
2766
    //
2767
    // Reasoning: If the string to be replaced is longer than x, then it does
2768
    // not matter how much longer it is, the result is always x. Thus, it is
2769
    // fine to only look at the prefix of length len(x) + 1 - len(t).
2770
2771
150
    children1.pop_back();
2772
    // Length of the non-substr components in the second argument
2773
    Node partLen1 =
2774
292
        nm->mkNode(kind::STRING_LENGTH, utils::mkConcat(children1, stype));
2775
292
    Node maxLen1 = nm->mkNode(kind::PLUS, partLen1, lastChild1[2]);
2776
2777
292
    Node zero = nm->mkConst(Rational(0));
2778
292
    Node one = nm->mkConst(Rational(1));
2779
292
    Node len0 = nm->mkNode(kind::STRING_LENGTH, node[0]);
2780
292
    Node len0_1 = nm->mkNode(kind::PLUS, len0, one);
2781
    // Check len(t) + j > len(x) + 1
2782
150
    if (ArithEntail::check(maxLen1, len0_1, true))
2783
    {
2784
8
      children1.push_back(nm->mkNode(
2785
          kind::STRING_SUBSTR,
2786
          lastChild1[0],
2787
          lastChild1[1],
2788
16
          nm->mkNode(
2789
16
              kind::PLUS, len0, one, nm->mkNode(kind::UMINUS, partLen1))));
2790
      Node res = nm->mkNode(kind::STRING_STRREPL,
2791
                            node[0],
2792
16
                            utils::mkConcat(children1, stype),
2793
32
                            node[2]);
2794
8
      return returnRewrite(node, res, Rewrite::REPL_SUBST_IDX);
2795
    }
2796
  }
2797
2798
1632
  if (node[0].getKind() == STRING_STRREPL)
2799
  {
2800
302
    Node x = node[0];
2801
302
    Node y = node[1];
2802
302
    Node z = node[2];
2803
156
    if (x[0] == x[2] && x[0] == y)
2804
    {
2805
      // (str.replace (str.replace y w y) y z) -->
2806
      //   (str.replace (str.replace y w z) y z)
2807
      // if (str.len w) >= (str.len z) and w != z
2808
      //
2809
      // Reasoning: There are two cases: (1) w does not appear in y and (2) w
2810
      // does appear in y.
2811
      //
2812
      // Case (1): In this case, the reasoning is trivial. The
2813
      // inner replace does not do anything, so we can just replace its third
2814
      // argument with any string.
2815
      //
2816
      // Case (2): After the inner replace, we are guaranteed to have a string
2817
      // that contains y at the index of w in the original string y. The outer
2818
      // replace then replaces that y with z, so we can short-circuit that
2819
      // replace by directly replacing w with z in the inner replace. We can
2820
      // only do that if the result of the new inner replace does not contain
2821
      // y, otherwise we end up doing two replaces that are different from the
2822
      // original expression. We enforce that by requiring that the length of w
2823
      // has to be greater or equal to the length of z and that w and z have to
2824
      // be different. This makes sure that an inner replace changes a string
2825
      // to a string that is shorter than y, making it impossible for the outer
2826
      // replace to match.
2827
86
      Node w = x[1];
2828
2829
      // (str.len w) >= (str.len z)
2830
86
      Node wlen = nm->mkNode(kind::STRING_LENGTH, w);
2831
86
      Node zlen = nm->mkNode(kind::STRING_LENGTH, z);
2832
48
      if (ArithEntail::check(wlen, zlen))
2833
      {
2834
        // w != z
2835
38
        Node wEqZ = Rewriter::rewrite(nm->mkNode(kind::EQUAL, w, z));
2836
24
        if (wEqZ.isConst() && !wEqZ.getConst<bool>())
2837
        {
2838
          Node ret = nm->mkNode(kind::STRING_STRREPL,
2839
20
                                nm->mkNode(kind::STRING_STRREPL, y, w, z),
2840
                                y,
2841
40
                                z);
2842
10
          return returnRewrite(node, ret, Rewrite::REPL_REPL_SHORT_CIRCUIT);
2843
        }
2844
      }
2845
    }
2846
  }
2847
2848
1622
  if (node[1].getKind() == STRING_STRREPL)
2849
  {
2850
122
    if (node[1][0] == node[0])
2851
    {
2852
56
      if (node[1][0] == node[1][2] && node[1][0] == node[2])
2853
      {
2854
        // str.replace( x, str.replace( x, y, x ), x ) ---> x
2855
        return returnRewrite(node, node[0], Rewrite::REPL_REPL2_INV_ID);
2856
      }
2857
56
      bool dualReplIteSuccess = false;
2858
112
      Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2859
56
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2860
      {
2861
        // str.contains( x, z ) ---> false
2862
        //   implies
2863
        // str.replace( x, str.replace( x, y, z ), w ) --->
2864
        // ite( str.contains( x, y ), x, w )
2865
        dualReplIteSuccess = true;
2866
      }
2867
      else
2868
      {
2869
        // str.contains( y, z ) ---> false and str.contains( z, y ) ---> false
2870
        //   implies
2871
        // str.replace( x, str.replace( x, y, z ), w ) --->
2872
        // ite( str.contains( x, y ), x, w )
2873
56
        cmp_con2 = d_stringsEntail.checkContains(node[1][1], node[1][2]);
2874
56
        if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2875
        {
2876
10
          cmp_con2 = d_stringsEntail.checkContains(node[1][2], node[1][1]);
2877
10
          if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2878
          {
2879
            dualReplIteSuccess = true;
2880
          }
2881
        }
2882
      }
2883
56
      if (dualReplIteSuccess)
2884
      {
2885
        Node res = nm->mkNode(ITE,
2886
                              nm->mkNode(STRING_STRCTN, node[0], node[1][1]),
2887
                              node[0],
2888
                              node[2]);
2889
        return returnRewrite(node, res, Rewrite::REPL_DUAL_REPL_ITE);
2890
      }
2891
    }
2892
2893
122
    bool invSuccess = false;
2894
122
    if (node[1][1] == node[0])
2895
    {
2896
46
      if (node[1][0] == node[1][2])
2897
      {
2898
        // str.replace(x, str.replace(y, x, y), w) ---> str.replace(x, y, w)
2899
        invSuccess = true;
2900
      }
2901
46
      else if (node[1][1] == node[2] || node[1][0] == node[2])
2902
      {
2903
        // str.contains(y, z) ----> false and ( y == w or x == w ) implies
2904
        //   implies
2905
        // str.replace(x, str.replace(y, x, z), w) ---> str.replace(x, y, w)
2906
40
        Node cmp_con2 = d_stringsEntail.checkContains(node[1][0], node[1][2]);
2907
20
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2908
      }
2909
    }
2910
    else
2911
    {
2912
      // str.contains(x, z) ----> false and str.contains(x, w) ----> false
2913
      //   implies
2914
      // str.replace(x, str.replace(y, z, w), u) ---> str.replace(x, y, u)
2915
152
      Node cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][1]);
2916
76
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2917
      {
2918
8
        cmp_con2 = d_stringsEntail.checkContains(node[0], node[1][2]);
2919
8
        invSuccess = !cmp_con2.isNull() && !cmp_con2.getConst<bool>();
2920
      }
2921
    }
2922
122
    if (invSuccess)
2923
    {
2924
      Node res = nm->mkNode(kind::STRING_STRREPL, node[0], node[1][0], node[2]);
2925
      return returnRewrite(node, res, Rewrite::REPL_REPL2_INV);
2926
    }
2927
  }
2928
1622
  if (node[2].getKind() == STRING_STRREPL)
2929
  {
2930
48
    if (node[2][1] == node[0])
2931
    {
2932
      // str.contains( z, w ) ----> false implies
2933
      // str.replace( x, w, str.replace( z, x, y ) ) ---> str.replace( x, w, z )
2934
24
      Node cmp_con2 = d_stringsEntail.checkContains(node[2][0], node[1]);
2935
12
      if (!cmp_con2.isNull() && !cmp_con2.getConst<bool>())
2936
      {
2937
        Node res =
2938
            nm->mkNode(kind::STRING_STRREPL, node[0], node[1], node[2][0]);
2939
        return returnRewrite(node, res, Rewrite::REPL_REPL3_INV);
2940
      }
2941
    }
2942
48
    if (node[2][0] == node[1])
2943
    {
2944
4
      bool success = false;
2945
4
      if (node[2][0] == node[2][2] && node[2][1] == node[0])
2946
      {
2947
        // str.replace( x, y, str.replace( y, x, y ) ) ---> x
2948
        success = true;
2949
      }
2950
      else
2951
      {
2952
        // str.contains( x, z ) ----> false implies
2953
        // str.replace( x, y, str.replace( y, z, w ) ) ---> x
2954
4
        cmp_con = d_stringsEntail.checkContains(node[0], node[2][1]);
2955
4
        success = !cmp_con.isNull() && !cmp_con.getConst<bool>();
2956
      }
2957
4
      if (success)
2958
      {
2959
        return returnRewrite(node, node[0], Rewrite::REPL_REPL3_INV_ID);
2960
      }
2961
    }
2962
  }
2963
  // miniscope based on components that do not contribute to contains
2964
  // for example,
2965
  //   str.replace( x ++ y ++ x ++ y, "A", z ) -->
2966
  //   str.replace( x ++ y, "A", z ) ++ x ++ y
2967
  // since if "A" occurs in x ++ y ++ x ++ y, then it must occur in x ++ y.
2968
1622
  if (StringsEntail::checkLengthOne(node[1]))
2969
  {
2970
1256
    Node lastLhs;
2971
632
    unsigned lastCheckIndex = 0;
2972
644
    for (unsigned i = 1, iend = children0.size(); i < iend; i++)
2973
    {
2974
102
      unsigned checkIndex = children0.size() - i;
2975
114
      std::vector<Node> checkLhs;
2976
102
      checkLhs.insert(
2977
204
          checkLhs.end(), children0.begin(), children0.begin() + checkIndex);
2978
114
      Node lhs = utils::mkConcat(checkLhs, stype);
2979
114
      Node rhs = children0[checkIndex];
2980
114
      Node ctn = d_stringsEntail.checkContains(lhs, rhs);
2981
102
      if (!ctn.isNull() && ctn.getConst<bool>())
2982
      {
2983
12
        lastLhs = lhs;
2984
12
        lastCheckIndex = checkIndex;
2985
      }
2986
      else
2987
      {
2988
90
        break;
2989
      }
2990
    }
2991
632
    if (!lastLhs.isNull())
2992
    {
2993
16
      std::vector<Node> remc(children0.begin() + lastCheckIndex,
2994
32
                             children0.end());
2995
16
      Node rem = utils::mkConcat(remc, stype);
2996
      Node ret =
2997
          nm->mkNode(STRING_CONCAT,
2998
16
                     nm->mkNode(STRING_STRREPL, lastLhs, node[1], node[2]),
2999
32
                     rem);
3000
      // for example:
3001
      //   str.replace( x ++ x, "A", y ) ---> str.replace( x, "A", y ) ++ x
3002
      // Since we know that the first occurrence of "A" cannot be in the
3003
      // second occurrence of x. Notice this is specific to single characters
3004
      // due to complications with finds that span multiple components for
3005
      // non-characters.
3006
8
      return returnRewrite(node, ret, Rewrite::REPL_CHAR_NCONTRIB_FIND);
3007
    }
3008
  }
3009
3010
  // TODO (#1180) incorporate these?
3011
  // contains( t, s ) =>
3012
  //   replace( replace( x, t, s ), s, r ) ----> replace( x, t, r )
3013
  // contains( t, s ) =>
3014
  //   contains( replace( t, s, r ), r ) ----> true
3015
3016
1614
  return node;
3017
}
3018
3019
145
Node SequencesRewriter::rewriteReplaceAll(Node node)
3020
{
3021
145
  Assert(node.getKind() == STRING_STRREPLALL);
3022
3023
290
  TypeNode stype = node.getType();
3024
3025
145
  if (node[0].isConst() && node[1].isConst())
3026
  {
3027
54
    std::vector<Node> children;
3028
54
    Node s = node[0];
3029
54
    Node t = node[1];
3030
27
    if (Word::isEmpty(s) || Word::isEmpty(t))
3031
    {
3032
6
      return returnRewrite(node, node[0], Rewrite::REPLALL_EMPTY_FIND);
3033
    }
3034
21
    std::size_t sizeS = Word::getLength(s);
3035
21
    std::size_t sizeT = Word::getLength(t);
3036
21
    std::size_t index = 0;
3037
21
    std::size_t curr = 0;
3038
56
    do
3039
    {
3040
77
      curr = Word::find(s, t, index);
3041
77
      if (curr != std::string::npos)
3042
      {
3043
56
        if (curr > index)
3044
        {
3045
34
          children.push_back(Word::substr(s, index, curr - index));
3046
        }
3047
56
        children.push_back(node[2]);
3048
56
        index = curr + sizeT;
3049
      }
3050
      else
3051
      {
3052
21
        children.push_back(Word::substr(s, index, sizeS - index));
3053
      }
3054
77
    } while (curr != std::string::npos && curr < sizeS);
3055
    // constant evaluation
3056
42
    Node res = utils::mkConcat(children, stype);
3057
21
    return returnRewrite(node, res, Rewrite::REPLALL_CONST);
3058
  }
3059
3060
  // rewrites that apply to both replace and replaceall
3061
236
  Node rri = rewriteReplaceInternal(node);
3062
118
  if (!rri.isNull())
3063
  {
3064
    // printing of the rewrite managed by the call above
3065
4
    return rri;
3066
  }
3067
3068
114
  return node;
3069
}
3070
3071
2011
Node SequencesRewriter::rewriteReplaceInternal(Node node)
3072
{
3073
2011
  Kind nk = node.getKind();
3074
2011
  Assert(nk == STRING_STRREPL || nk == STRING_STRREPLALL);
3075
3076
2011
  if (node[1] == node[2])
3077
  {
3078
59
    return returnRewrite(node, node[0], Rewrite::RPL_ID);
3079
  }
3080
3081
1952
  if (node[0] == node[1])
3082
  {
3083
    // only holds for replaceall if non-empty
3084
96
    if (nk == STRING_STRREPL || StringsEntail::checkNonEmpty(node[1]))
3085
    {
3086
76
      return returnRewrite(node, node[2], Rewrite::RPL_REPLACE);
3087
    }
3088
  }
3089
3090
1876
  return Node::null();
3091
}
3092
3093
119
Node SequencesRewriter::rewriteReplaceRe(Node node)
3094
{
3095
119
  Assert(node.getKind() == STRING_REPLACE_RE);
3096
119
  NodeManager* nm = NodeManager::currentNM();
3097
238
  Node x = node[0];
3098
238
  Node y = node[1];
3099
238
  Node z = node[2];
3100
3101
119
  if (RegExpEntail::isConstRegExp(y))
3102
  {
3103
117
    if (x.isConst())
3104
    {
3105
      // str.replace_re("ZABCZ", re.++("A", _*, "C"), y) ---> "Z" ++ y ++ "Z"
3106
37
      std::pair<size_t, size_t> match = firstMatch(x, y);
3107
37
      if (match.first != string::npos)
3108
      {
3109
38
        String s = x.getConst<String>();
3110
        Node ret = nm->mkNode(STRING_CONCAT,
3111
38
                              nm->mkConst(s.substr(0, match.first)),
3112
                              z,
3113
76
                              nm->mkConst(s.substr(match.second)));
3114
19
        return returnRewrite(node, ret, Rewrite::REPLACE_RE_EVAL);
3115
      }
3116
      else
3117
      {
3118
18
        return returnRewrite(node, x, Rewrite::REPLACE_RE_EVAL);
3119
      }
3120
    }
3121
    // str.replace_re( x, y, z ) ---> z ++ x if "" in y ---> true
3122
156
    String emptyStr("");
3123
80
    if (RegExpEntail::testConstStringInRegExp(emptyStr, 0, y))
3124
    {
3125
8
      Node ret = nm->mkNode(STRING_CONCAT, z, x);
3126
4
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_EMP_RE);
3127
    }
3128
  }
3129
78
  return node;
3130
}
3131
3132
28
Node SequencesRewriter::rewriteReplaceReAll(Node node)
3133
{
3134
28
  Assert(node.getKind() == STRING_REPLACE_RE_ALL);
3135
28
  NodeManager* nm = NodeManager::currentNM();
3136
56
  Node x = node[0];
3137
56
  Node y = node[1];
3138
56
  Node z = node[2];
3139
3140
28
  if (RegExpEntail::isConstRegExp(y))
3141
  {
3142
28
    if (x.isConst())
3143
    {
3144
      // str.replace_re_all("ZABCZAB", re.++("A", _*, "C"), y) --->
3145
      //   "Z" ++ y ++ "Z" ++ y
3146
28
      TypeNode t = x.getType();
3147
28
      Node emp = Word::mkEmptyWord(t);
3148
      Node yp = Rewriter::rewrite(
3149
28
          nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)));
3150
28
      std::vector<Node> res;
3151
28
      String rem = x.getConst<String>();
3152
14
      std::pair<size_t, size_t> match(0, 0);
3153
42
      while (rem.size() >= 0)
3154
      {
3155
28
        match = firstMatch(nm->mkConst(rem), yp);
3156
28
        if (match.first == string::npos)
3157
        {
3158
14
          break;
3159
        }
3160
14
        res.push_back(nm->mkConst(rem.substr(0, match.first)));
3161
14
        res.push_back(z);
3162
14
        rem = rem.substr(match.second);
3163
      }
3164
14
      res.push_back(nm->mkConst(rem));
3165
28
      Node ret = utils::mkConcat(res, t);
3166
14
      return returnRewrite(node, ret, Rewrite::REPLACE_RE_ALL_EVAL);
3167
    }
3168
  }
3169
3170
14
  return node;
3171
}
3172
3173
65
std::pair<size_t, size_t> SequencesRewriter::firstMatch(Node n, Node r)
3174
{
3175
65
  Assert(n.isConst() && n.getType().isStringLike());
3176
65
  Assert(r.getType().isRegExp());
3177
65
  NodeManager* nm = NodeManager::currentNM();
3178
3179
130
  std::vector<Node> emptyVec;
3180
130
  Node sigmaStar = nm->mkNode(REGEXP_STAR, nm->mkNode(REGEXP_SIGMA, emptyVec));
3181
130
  Node re = nm->mkNode(REGEXP_CONCAT, r, sigmaStar);
3182
130
  String s = n.getConst<String>();
3183
3184
65
  if (s.size() == 0)
3185
  {
3186
23
    if (RegExpEntail::testConstStringInRegExp(s, 0, r))
3187
    {
3188
2
      return std::make_pair(0, 0);
3189
    }
3190
    else
3191
    {
3192
21
      return std::make_pair(string::npos, string::npos);
3193
    }
3194
  }
3195
3196
76
  for (size_t i = 0, size = s.size(); i < size; i++)
3197
  {
3198
65
    if (RegExpEntail::testConstStringInRegExp(s, i, re))
3199
    {
3200
110
      for (size_t j = i; j <= size; j++)
3201
      {
3202
189
        String substr = s.substr(i, j - i);
3203
110
        if (RegExpEntail::testConstStringInRegExp(substr, 0, r))
3204
        {
3205
31
          return std::make_pair(i, j);
3206
        }
3207
      }
3208
    }
3209
  }
3210
3211
11
  return std::make_pair(string::npos, string::npos);
3212
}
3213
3214
237
Node SequencesRewriter::rewriteStrReverse(Node node)
3215
{
3216
237
  Assert(node.getKind() == STRING_REV);
3217
237
  NodeManager* nm = NodeManager::currentNM();
3218
474
  Node x = node[0];
3219
237
  if (x.isConst())
3220
  {
3221
    // reverse the characters in the constant
3222
130
    Node retNode = Word::reverse(x);
3223
65
    return returnRewrite(node, retNode, Rewrite::STR_CONV_CONST);
3224
  }
3225
172
  else if (x.getKind() == STRING_CONCAT)
3226
  {
3227
74
    std::vector<Node> children;
3228
113
    for (const Node& nc : x)
3229
    {
3230
76
      children.push_back(nm->mkNode(STRING_REV, nc));
3231
    }
3232
37
    std::reverse(children.begin(), children.end());
3233
    // rev( x1 ++ x2 ) --> rev( x2 ) ++ rev( x1 )
3234
74
    Node retNode = nm->mkNode(STRING_CONCAT, children);
3235
37
    return returnRewrite(node, retNode, Rewrite::STR_REV_MINSCOPE_CONCAT);
3236
  }
3237
135
  else if (x.getKind() == STRING_REV)
3238
  {
3239
    // rev( rev( x ) ) --> x
3240
10
    Node retNode = x[0];
3241
5
    return returnRewrite(node, retNode, Rewrite::STR_REV_IDEM);
3242
  }
3243
130
  return node;
3244
}
3245
3246
143
Node SequencesRewriter::rewritePrefixSuffix(Node n)
3247
{
3248
143
  Assert(n.getKind() == kind::STRING_PREFIX
3249
         || n.getKind() == kind::STRING_SUFFIX);
3250
143
  bool isPrefix = n.getKind() == kind::STRING_PREFIX;
3251
143
  if (n[0] == n[1])
3252
  {
3253
16
    Node ret = NodeManager::currentNM()->mkConst(true);
3254
8
    return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EQ);
3255
  }
3256
135
  if (n[0].isConst())
3257
  {
3258
56
    if (Word::isEmpty(n[0]))
3259
    {
3260
8
      Node ret = NodeManager::currentNM()->mkConst(true);
3261
4
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY_CONST);
3262
    }
3263
  }
3264
131
  if (n[1].isConst())
3265
  {
3266
39
    Node s = n[1];
3267
39
    size_t lenS = Word::getLength(s);
3268
39
    if (n[0].isConst())
3269
    {
3270
      Node ret = NodeManager::currentNM()->mkConst(false);
3271
      Node t = n[0];
3272
      size_t lenT = Word::getLength(t);
3273
      if (lenS >= lenT)
3274
      {
3275
        if ((isPrefix && t == Word::prefix(s, lenT))
3276
            || (!isPrefix && t == Word::suffix(s, lenT)))
3277
        {
3278
          ret = NodeManager::currentNM()->mkConst(true);
3279
        }
3280
      }
3281
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CONST);
3282
    }
3283
39
    else if (lenS == 0)
3284
    {
3285
4
      Node ret = n[0].eqNode(n[1]);
3286
2
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_EMPTY);
3287
    }
3288
37
    else if (lenS == 1)
3289
    {
3290
      // (str.prefix x "A") and (str.suffix x "A") are equivalent to
3291
      // (str.contains "A" x )
3292
      Node ret =
3293
74
          NodeManager::currentNM()->mkNode(kind::STRING_STRCTN, n[1], n[0]);
3294
37
      return returnRewrite(n, ret, Rewrite::SUF_PREFIX_CTN);
3295
    }
3296
  }
3297
184
  Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[0]);
3298
184
  Node lent = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, n[1]);
3299
184
  Node val;
3300
92
  if (isPrefix)
3301
  {
3302
67
    val = NodeManager::currentNM()->mkConst(::cvc5::Rational(0));
3303
  }
3304
  else
3305
  {
3306
25
    val = NodeManager::currentNM()->mkNode(kind::MINUS, lent, lens);
3307
  }
3308
3309
  // Check if we can turn the prefix/suffix into equalities by showing that the
3310
  // prefix/suffix is at least as long as the string
3311
184
  Node eqs = StringsEntail::inferEqsFromContains(n[1], n[0]);
3312
92
  if (!eqs.isNull())
3313
  {
3314
8
    return returnRewrite(n, eqs, Rewrite::SUF_PREFIX_TO_EQS);
3315
  }
3316
3317
  // general reduction to equality + substr
3318
  Node retNode = n[0].eqNode(
3319
168
      NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, n[1], val, lens));
3320
3321
84
  return returnRewrite(n, retNode, Rewrite::SUF_PREFIX_ELIM);
3322
}
3323
3324
76
Node SequencesRewriter::lengthPreserveRewrite(Node n)
3325
{
3326
76
  NodeManager* nm = NodeManager::currentNM();
3327
152
  Node len = Rewriter::rewrite(nm->mkNode(kind::STRING_LENGTH, n));
3328
152
  Node res = canonicalStrForSymbolicLength(len, n.getType());
3329
152
  return res.isNull() ? n : res;
3330
}
3331
3332
88
Node SequencesRewriter::canonicalStrForSymbolicLength(Node len, TypeNode stype)
3333
{
3334
88
  NodeManager* nm = NodeManager::currentNM();
3335
3336
176
  Node res;
3337
88
  if (len.getKind() == CONST_RATIONAL)
3338
  {
3339
    // c -> "A" repeated c times
3340
152
    Rational ratLen = len.getConst<Rational>();
3341
76
    Assert(ratLen.getDenominator() == 1);
3342
152
    Integer intLen = ratLen.getNumerator();
3343
76
    uint32_t u = intLen.getUnsignedInt();
3344
76
    if (stype.isString())  // string-only
3345
    {
3346
76
      res = nm->mkConst(String(std::string(u, 'A')));
3347
    }
3348
    // we could do this for sequences, but we need to be careful: some
3349
    // sorts do not permit values that the solver can handle (e.g. uninterpreted
3350
    // sorts and arrays).
3351
  }
3352
12
  else if (len.getKind() == PLUS)
3353
  {
3354
    // x + y -> norm(x) + norm(y)
3355
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3356
12
    for (const auto& n : len)
3357
    {
3358
16
      Node sn = canonicalStrForSymbolicLength(n, stype);
3359
8
      if (sn.isNull())
3360
      {
3361
        return Node::null();
3362
      }
3363
16
      std::vector<Node> snChildren;
3364
8
      utils::getConcat(sn, snChildren);
3365
8
      concatBuilder.append(snChildren);
3366
    }
3367
4
    res = concatBuilder.constructNode();
3368
  }
3369
20
  else if (len.getKind() == MULT && len.getNumChildren() == 2
3370
20
           && len[0].isConst())
3371
  {
3372
    // c * x -> norm(x) repeated c times
3373
8
    Rational ratReps = len[0].getConst<Rational>();
3374
4
    Assert(ratReps.getDenominator() == 1);
3375
8
    Integer intReps = ratReps.getNumerator();
3376
3377
8
    Node nRep = canonicalStrForSymbolicLength(len[1], stype);
3378
4
    if (nRep.isNull())
3379
    {
3380
      return Node::null();
3381
    }
3382
8
    std::vector<Node> nRepChildren;
3383
4
    utils::getConcat(nRep, nRepChildren);
3384
8
    NodeBuilder concatBuilder(STRING_CONCAT);
3385
12
    for (size_t i = 0, reps = intReps.getUnsignedInt(); i < reps; i++)
3386
    {
3387
8
      concatBuilder.append(nRepChildren);
3388
    }
3389
4
    res = concatBuilder.constructNode();
3390
  }
3391
4
  else if (len.getKind() == STRING_LENGTH)
3392
  {
3393
    // len(x) -> x
3394
4
    res = len[0];
3395
  }
3396
88
  return res;
3397
}
3398
3399
307
Node SequencesRewriter::rewriteSeqUnit(Node node)
3400
{
3401
307
  NodeManager* nm = NodeManager::currentNM();
3402
307
  if (node[0].isConst())
3403
  {
3404
234
    std::vector<Node> seq;
3405
117
    seq.push_back(node[0]);
3406
234
    TypeNode stype = node[0].getType();
3407
234
    Node ret = nm->mkConst(Sequence(stype, seq));
3408
117
    return returnRewrite(node, ret, Rewrite::SEQ_UNIT_EVAL);
3409
  }
3410
190
  return node;
3411
}
3412
3413
146191
Node SequencesRewriter::returnRewrite(Node node, Node ret, Rewrite r)
3414
{
3415
292382
  Trace("strings-rewrite") << "Rewrite " << node << " to " << ret << " by " << r
3416
146191
                           << "." << std::endl;
3417
3418
146191
  NodeManager* nm = NodeManager::currentNM();
3419
3420
146191
  if (d_statistics != nullptr)
3421
  {
3422
146121
    (*d_statistics) << r;
3423
  }
3424
3425
  // standard post-processing
3426
  // We rewrite (string) equalities immediately here. This allows us to forego
3427
  // the standard invariant on equality rewrites (that s=t must rewrite to one
3428
  // of { s=t, t=s, true, false } ).
3429
146191
  Kind retk = ret.getKind();
3430
146191
  if (retk == OR || retk == AND)
3431
  {
3432
14108
    std::vector<Node> children;
3433
7054
    bool childChanged = false;
3434
23726
    for (const Node& cret : ret)
3435
    {
3436
33344
      Node creter = cret;
3437
16672
      if (cret.getKind() == EQUAL)
3438
      {
3439
10909
        creter = rewriteEqualityExt(cret);
3440
      }
3441
5763
      else if (cret.getKind() == NOT && cret[0].getKind() == EQUAL)
3442
      {
3443
347
        creter = nm->mkNode(NOT, rewriteEqualityExt(cret[0]));
3444
      }
3445
16672
      childChanged = childChanged || cret != creter;
3446
16672
      children.push_back(creter);
3447
    }
3448
7054
    if (childChanged)
3449
    {
3450
2200
      ret = nm->mkNode(retk, children);
3451
7054
    }
3452
  }
3453
139137
  else if (retk == NOT && ret[0].getKind() == EQUAL)
3454
  {
3455
    ret = nm->mkNode(NOT, rewriteEqualityExt(ret[0]));
3456
  }
3457
139137
  else if (retk == EQUAL && node.getKind() != EQUAL)
3458
  {
3459
64254
    Trace("strings-rewrite")
3460
32127
        << "Apply extended equality rewrite on " << ret << std::endl;
3461
32127
    ret = rewriteEqualityExt(ret);
3462
  }
3463
146191
  return ret;
3464
}
3465
3466
}  // namespace strings
3467
}  // namespace theory
3468
27735
}  // namespace cvc5