GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/theory_strings_preprocess.cpp Lines: 522 522 100.0 %
Date: 2021-11-07 Branches: 1574 3078 51.1 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Strings Preprocess.
14
 */
15
16
#include "theory/strings/theory_strings_preprocess.h"
17
18
#include "expr/kind.h"
19
#include "expr/skolem_manager.h"
20
#include "options/smt_options.h"
21
#include "options/strings_options.h"
22
#include "smt/logic_exception.h"
23
#include "theory/strings/arith_entail.h"
24
#include "theory/strings/sequences_rewriter.h"
25
#include "theory/strings/theory_strings_utils.h"
26
#include "theory/strings/word.h"
27
#include "util/rational.h"
28
#include "util/statistics_registry.h"
29
#include "util/string.h"
30
31
using namespace cvc5;
32
using namespace cvc5::kind;
33
34
namespace cvc5 {
35
namespace theory {
36
namespace strings {
37
38
15315
StringsPreprocess::StringsPreprocess(SkolemCache* sc,
39
15315
                                     HistogramStat<Kind>* statReductions)
40
15315
    : d_sc(sc), d_statReductions(statReductions)
41
{
42
15315
}
43
44
15310
StringsPreprocess::~StringsPreprocess(){
45
46
15310
}
47
48
4560
Node StringsPreprocess::reduce(Node t,
49
                               std::vector<Node>& asserts,
50
                               SkolemCache* sc)
51
{
52
9120
  Trace("strings-preprocess-debug")
53
4560
      << "StringsPreprocess::reduce: " << t << std::endl;
54
4560
  Node retNode = t;
55
4560
  NodeManager* nm = NodeManager::currentNM();
56
9120
  Node zero = nm->mkConst(Rational(0));
57
9120
  Node one = nm->mkConst(Rational(1));
58
9120
  Node negOne = nm->mkConst(Rational(-1));
59
60
4560
  if( t.getKind() == kind::STRING_SUBSTR ) {
61
    // processing term:  substr( s, n, m )
62
3646
    Node s = t[0];
63
3646
    Node n = t[1];
64
3646
    Node m = t[2];
65
3646
    Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst");
66
3646
    Node t12 = nm->mkNode(PLUS, n, m);
67
3646
    Node lt0 = nm->mkNode(STRING_LENGTH, s);
68
    //start point is greater than or equal zero
69
3646
    Node c1 = nm->mkNode(GEQ, n, zero);
70
    //start point is less than end of string
71
3646
    Node c2 = nm->mkNode(GT, lt0, n);
72
    //length is positive
73
3646
    Node c3 = nm->mkNode(GT, m, zero);
74
3646
    Node cond = nm->mkNode(AND, c1, c2, c3);
75
76
3646
    Node emp = Word::mkEmptyWord(t.getType());
77
78
3646
    Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre");
79
3646
    Node sk2 = sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr");
80
3646
    Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, skt, sk2));
81
    //length of first skolem is second argument
82
3646
    Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n);
83
3646
    Node lsk2 = nm->mkNode(STRING_LENGTH, sk2);
84
    // Length of the suffix is either 0 (in the case where m + n > len(s)) or
85
    // len(s) - n -m
86
    Node b13 = nm->mkNode(
87
        OR,
88
3646
        nm->mkNode(EQUAL, lsk2, nm->mkNode(MINUS, lt0, nm->mkNode(PLUS, n, m))),
89
7292
        nm->mkNode(EQUAL, lsk2, zero));
90
    // Length of the result is at most m
91
3646
    Node b14 = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, skt), m);
92
93
3646
    Node b1 = nm->mkNode(AND, {b11, b12, b13, b14});
94
3646
    Node b2 = skt.eqNode(emp);
95
3646
    Node lemma = nm->mkNode(ITE, cond, b1, b2);
96
97
    // assert:
98
    // IF    n >=0 AND len( s ) > n AND m > 0
99
    // THEN: s = sk1 ++ skt ++ sk2 AND
100
    //       len( sk1 ) = n AND
101
    //       ( len( sk2 ) = len( s )-(n+m) OR len( sk2 ) = 0 ) AND
102
    //       len( skt ) <= m
103
    // ELSE: skt = ""
104
    //
105
    // Note: The length of sk2 is either 0 (if the n + m is greater or equal to
106
    // the length of s) or len(s) - (n + m) otherwise. If n + m is greater or
107
    // equal to the length of s, then len(s) - (n + m) is negative and in
108
    // conflict with lengths always being positive, so only len(sk2) = 0 may be
109
    // satisfied. If n + m is less than the length of s, then len(sk2) = 0
110
    // cannot be satisfied because we have the constraint that len(skt) <= m,
111
    // so sk2 must be greater than 0.
112
1823
    asserts.push_back(lemma);
113
114
    // Thus, substr( s, n, m ) = skt
115
1823
    retNode = skt;
116
  }
117
2737
  else if (t.getKind() == kind::STRING_UPDATE)
118
  {
119
    // processing term:  update( s, n, r )
120
48
    Node s = t[0];
121
48
    Node n = t[1];
122
48
    Node r = t[2];
123
48
    Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst");
124
48
    Node ls = nm->mkNode(STRING_LENGTH, s);
125
    // start point is greater than or equal zero
126
48
    Node c1 = nm->mkNode(GEQ, n, zero);
127
    // start point is less than end of string
128
48
    Node c2 = nm->mkNode(GT, ls, n);
129
48
    Node cond = nm->mkNode(AND, c1, c2);
130
131
    // substr(r,0,|s|-n)
132
48
    Node lens = nm->mkNode(STRING_LENGTH, s);
133
48
    Node rs;
134
24
    if (r.isConst() && Word::getLength(r) == 1)
135
    {
136
      // optimization: don't need to take substring for single characters, due
137
      // to guard on where it is used in the reduction below.
138
9
      rs = r;
139
    }
140
    else
141
    {
142
15
      rs = nm->mkNode(STRING_SUBSTR, r, zero, nm->mkNode(MINUS, lens, n));
143
    }
144
48
    Node rslen = nm->mkNode(STRING_LENGTH, rs);
145
48
    Node nsuf = nm->mkNode(PLUS, n, rslen);
146
    // substr(s, n, len(substr(r,0,|s|-n))), which is used for formalizing the
147
    // purification variable sk3 below.
148
48
    Node ssubstr = nm->mkNode(STRING_SUBSTR, s, n, rslen);
149
150
48
    Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre");
151
    Node sk2 =
152
48
        sc->mkSkolemCached(s, nsuf, SkolemCache::SK_SUFFIX_REM, "sssufr");
153
48
    Node sk3 = sc->mkSkolemCached(ssubstr, SkolemCache::SK_PURIFY, "ssubstr");
154
48
    Node a1 = skt.eqNode(nm->mkNode(STRING_CONCAT, sk1, rs, sk2));
155
48
    Node a2 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, sk3, sk2));
156
    // length of first skolem is second argument
157
48
    Node a3 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n);
158
48
    Node a4 = nm->mkNode(STRING_LENGTH, rs).eqNode(nm->mkNode(STRING_LENGTH, sk3));
159
160
48
    Node b1 = nm->mkNode(AND, {a1, a2, a3, a4});
161
48
    Node b2 = skt.eqNode(s);
162
48
    Node lemma = nm->mkNode(ITE, cond, b1, b2);
163
164
    // assert:
165
    // IF    n >=0 AND n < len( s )
166
    // THEN: skt = sk1 ++ substr(r,0,len(s)-n) ++ sk2 AND
167
    //       s = sk1 ++ sk3 ++ sk2 AND
168
    //       len( sk1 ) = n AND
169
    //       len( substr(r,0,len(s)-n) ) = len( sk3 )
170
    // ELSE: skt = s
171
    // We use an optimization where r is used instead of substr(r,0,len(s)-n)
172
    // if r is a constant of length one.
173
24
    asserts.push_back(lemma);
174
175
    // Thus, str.update( s, n, r ) = skt
176
24
    retNode = skt;
177
  }
178
2713
  else if (t.getKind() == kind::STRING_INDEXOF)
179
  {
180
    // processing term:  indexof( x, y, n )
181
288
    Node x = t[0];
182
288
    Node y = t[1];
183
288
    Node n = t[2];
184
    Node skk = sc->mkTypedSkolemCached(
185
288
        nm->integerType(), t, SkolemCache::SK_PURIFY, "iok");
186
187
288
    Node negone = nm->mkConst(Rational(-1));
188
189
    // substr( x, n, len( x ) - n )
190
    Node st = nm->mkNode(STRING_SUBSTR,
191
                         x,
192
                         n,
193
288
                         nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), n));
194
    Node io2 =
195
288
        sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_PRE, "iopre");
196
    Node io4 =
197
288
        sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_POST, "iopost");
198
199
    // ~contains( substr( x, n, len( x ) - n ), y )
200
288
    Node c11 = nm->mkNode(STRING_CONTAINS, st, y).negate();
201
    // n > len( x )
202
288
    Node c12 = nm->mkNode(GT, n, nm->mkNode(STRING_LENGTH, x));
203
    // 0 > n
204
288
    Node c13 = nm->mkNode(GT, zero, n);
205
288
    Node cond1 = nm->mkNode(OR, c11, c12, c13);
206
    // skk = -1
207
288
    Node cc1 = skk.eqNode(negone);
208
209
    // y = ""
210
288
    Node emp = Word::mkEmptyWord(x.getType());
211
288
    Node cond2 = y.eqNode(emp);
212
    // skk = n
213
288
    Node cc2 = skk.eqNode(t[2]);
214
215
    // substr( x, n, len( x ) - n ) = str.++( io2, y, io4 )
216
288
    Node c31 = st.eqNode(nm->mkNode(STRING_CONCAT, io2, y, io4));
217
    // ~contains( str.++( io2, substr( y, 0, len( y ) - 1) ), y )
218
    Node c32 =
219
576
        nm->mkNode(
220
              STRING_CONTAINS,
221
288
              nm->mkNode(
222
                  STRING_CONCAT,
223
                  io2,
224
288
                  nm->mkNode(
225
                      STRING_SUBSTR,
226
                      y,
227
                      zero,
228
288
                      nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, y), one))),
229
              y)
230
288
            .negate();
231
    // skk = n + len( io2 )
232
288
    Node c33 = skk.eqNode(nm->mkNode(PLUS, n, nm->mkNode(STRING_LENGTH, io2)));
233
288
    Node cc3 = nm->mkNode(AND, c31, c32, c33);
234
235
    // assert:
236
    // IF:   ~contains( substr( x, n, len( x ) - n ), y ) OR n > len(x) OR 0 > n
237
    // THEN: skk = -1
238
    // ELIF: y = ""
239
    // THEN: skk = n
240
    // ELSE: substr( x, n, len( x ) - n ) = str.++( io2, y, io4 ) ^
241
    //       ~contains( str.++( io2, substr( y, 0, len( y ) - 1) ), y ) ^
242
    //       skk = n + len( io2 )
243
    // for fresh io2, io4.
244
288
    Node rr = nm->mkNode(ITE, cond1, cc1, nm->mkNode(ITE, cond2, cc2, cc3));
245
144
    asserts.push_back(rr);
246
247
    // Thus, indexof( x, y, n ) = skk.
248
144
    retNode = skk;
249
  }
250
2569
  else if (t.getKind() == kind::STRING_INDEXOF_RE)
251
  {
252
    // processing term:  indexof_re(s, r, n)
253
200
    Node s = t[0];
254
200
    Node r = t[1];
255
200
    Node n = t[2];
256
    Node skk = sc->mkTypedSkolemCached(
257
200
        nm->integerType(), t, SkolemCache::SK_PURIFY, "iork");
258
200
    Node sLen = nm->mkNode(STRING_LENGTH, s);
259
260
    // n > len(x)
261
200
    Node ub = nm->mkNode(GT, n, sLen);
262
    // 0 > n
263
200
    Node lb = nm->mkNode(GT, zero, n);
264
    // n > len(x) OR 0 > n
265
200
    Node condNegOne = nm->mkNode(OR, ub, lb);
266
    // skk = -1
267
200
    Node retNegOne = skk.eqNode(negOne);
268
269
200
    Node emp = Word::mkEmptyWord(s.getType());
270
    // in_re("", r)
271
200
    Node matchEmptyStr = nm->mkNode(STRING_IN_REGEXP, emp, r);
272
    // skk = n
273
200
    Node retN = skk.eqNode(n);
274
275
200
    Node i = SkolemCache::mkIndexVar(t);
276
200
    Node l = SkolemCache::mkLengthVar(t);
277
200
    Node bvl = nm->mkNode(BOUND_VAR_LIST, i, l);
278
    Node bound =
279
        nm->mkNode(AND,
280
                   {
281
200
                       nm->mkNode(GEQ, i, n),
282
200
                       nm->mkNode(LT, i, nm->mkNode(ITE, retNegOne, sLen, skk)),
283
200
                       nm->mkNode(GT, l, zero),
284
200
                       nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, i)),
285
500
                   });
286
    Node body = nm->mkNode(
287
        OR,
288
200
        bound.negate(),
289
200
        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, i, l), r)
290
400
            .negate());
291
    // forall il.
292
    //   n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i =>
293
    //     ~in_re(substr(s, i, l), r)
294
200
    Node firstMatch = utils::mkForallInternal(bvl, body);
295
200
    Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
296
    Node validLen =
297
        nm->mkNode(AND,
298
200
                   nm->mkNode(GEQ, l, zero),
299
400
                   nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, skk)));
300
    Node matchBody = nm->mkNode(
301
        AND,
302
        validLen,
303
200
        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, skk, l), r));
304
    // skk != -1 =>
305
    //   exists l. (0 <= l < len(s) - skk) ^ in_re(substr(s, skk, l), r))
306
    Node match =
307
        nm->mkNode(OR,
308
                   retNegOne,
309
200
                   utils::mkForallInternal(bvll, matchBody.negate()).negate());
310
311
    // assert:
312
    // IF:   n > len(s) OR 0 > n
313
    // THEN: skk = -1
314
    // ELIF: in_re("", r)
315
    // THEN: skk = n
316
    // ELSE: (forall il.
317
    //         n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i =>
318
    //           ~in_re(substr(s, i, l), r)) ^
319
    //       (skk != -1 =>
320
    //          exists l. 0 <= l <= len(s) - skk ^ in_re(substr(s, skk, l), r))
321
    //
322
    // Note that this reduction relies on eager reduction lemmas being sent to
323
    // properly limit the range of skk.
324
    Node rr = nm->mkNode(
325
        ITE,
326
        condNegOne,
327
        retNegOne,
328
200
        nm->mkNode(
329
400
            ITE, matchEmptyStr, retN, nm->mkNode(AND, firstMatch, match)));
330
100
    asserts.push_back(rr);
331
332
    // Thus, indexof_re(s, r, n) = skk.
333
100
    retNode = skk;
334
  }
335
2469
  else if (t.getKind() == STRING_ITOS)
336
  {
337
    // processing term:  int.to.str( n )
338
76
    Node n = t[0];
339
76
    Node itost = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "itost");
340
76
    Node leni = nm->mkNode(STRING_LENGTH, itost);
341
342
76
    std::vector<Node> conc;
343
76
    std::vector< TypeNode > argTypes;
344
38
    argTypes.push_back(nm->integerType());
345
76
    TypeNode itosResType = nm->mkFunctionType(argTypes, nm->integerType());
346
76
    Node u = sc->mkSkolemFun(SkolemFunId::STRINGS_ITOS_RESULT, itosResType, t);
347
348
76
    Node lem = nm->mkNode(GEQ, leni, one);
349
38
    conc.push_back(lem);
350
351
38
    lem = n.eqNode(nm->mkNode(APPLY_UF, u, leni));
352
38
    conc.push_back(lem);
353
354
38
    lem = zero.eqNode(nm->mkNode(APPLY_UF, u, zero));
355
38
    conc.push_back(lem);
356
357
76
    Node x = SkolemCache::mkIndexVar(t);
358
76
    Node xPlusOne = nm->mkNode(PLUS, x, one);
359
76
    Node xbv = nm->mkNode(BOUND_VAR_LIST, x);
360
76
    Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, leni));
361
76
    Node sx = nm->mkNode(STRING_SUBSTR, itost, x, one);
362
76
    Node ux = nm->mkNode(APPLY_UF, u, x);
363
76
    Node ux1 = nm->mkNode(APPLY_UF, u, xPlusOne);
364
76
    Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0")));
365
76
    Node c = nm->mkNode(MINUS, nm->mkNode(STRING_TO_CODE, sx), c0);
366
367
76
    Node ten = nm->mkConst(Rational(10));
368
76
    Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux)));
369
    Node leadingZeroPos =
370
76
        nm->mkNode(AND, x.eqNode(zero), nm->mkNode(GT, leni, one));
371
    Node cb = nm->mkNode(
372
        AND,
373
76
        nm->mkNode(GEQ, c, nm->mkNode(ITE, leadingZeroPos, one, zero)),
374
152
        nm->mkNode(LT, c, ten));
375
376
76
    Node ux1lem = nm->mkNode(GEQ, n, ux1);
377
378
38
    lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eq, cb, ux1lem));
379
38
    lem = utils::mkForallInternal(xbv, lem);
380
38
    conc.push_back(lem);
381
382
76
    Node nonneg = nm->mkNode(GEQ, n, zero);
383
384
76
    Node emp = Word::mkEmptyWord(t.getType());
385
38
    lem = nm->mkNode(ITE, nonneg, nm->mkNode(AND, conc), itost.eqNode(emp));
386
38
    asserts.push_back(lem);
387
    // assert:
388
    // IF n>=0
389
    // THEN:
390
    //   len( itost ) >= 1 ^
391
    //   n = U( len( itost ) ) ^ U( 0 ) = 0 ^
392
    //   forall x. (x>=0 ^ x < str.len(itost)) =>
393
    //     U( x+1 ) = (str.code( str.at(itost, x) )-48) + 10*U( x ) ^
394
    //     ite( x=0 AND str.len(itost)>1, 49, 48 ) <=
395
    //       str.code( str.at(itost, x) ) < 58 ^
396
    //     n >= U(x + 1)
397
    // ELSE
398
    //   itost = ""
399
    // thus:
400
    //   int.to.str( n ) = itost
401
    //
402
    // Note: The conjunct `n >= U(x + 1)` is not needed for correctness but is
403
    // just an optimization.
404
405
    // The function U is an accumulator, where U( x ) for x>0 is the value of
406
    // str.to.int( str.substr( int.to.str( n ), 0, x ) ). For example, for
407
    // n=345, we have that U(1), U(2), U(3) = 3, 34, 345.
408
    // Above, we use str.code to map characters to their integer value, where
409
    // note that str.code( "0" ) = 48. Further notice that
410
    //   ite( x=0 AND str.len(itost)>1, 49, 48 )
411
    // enforces that int.to.str( n ) has no leading zeroes.
412
38
    retNode = itost;
413
2431
  } else if( t.getKind() == kind::STRING_STOI ) {
414
106
    Node s = t[0];
415
    Node stoit = sc->mkTypedSkolemCached(
416
106
        nm->integerType(), t, SkolemCache::SK_PURIFY, "stoit");
417
106
    Node lens = nm->mkNode(STRING_LENGTH, s);
418
419
106
    std::vector<Node> conc1;
420
106
    Node lem = stoit.eqNode(negOne);
421
53
    conc1.push_back(lem);
422
423
106
    Node emp = Word::mkEmptyWord(s.getType());
424
106
    Node sEmpty = s.eqNode(emp);
425
    Node k = sc->mkSkolemFun(
426
106
        SkolemFunId::STRINGS_STOI_NON_DIGIT, nm->integerType(), t);
427
106
    Node kc1 = nm->mkNode(GEQ, k, zero);
428
106
    Node kc2 = nm->mkNode(LT, k, lens);
429
106
    Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0")));
430
    Node codeSk = nm->mkNode(
431
        MINUS,
432
106
        nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, s, k, one)),
433
212
        c0);
434
106
    Node ten = nm->mkConst(Rational(10));
435
    Node kc3 = nm->mkNode(
436
106
        OR, nm->mkNode(LT, codeSk, zero), nm->mkNode(GEQ, codeSk, ten));
437
53
    conc1.push_back(nm->mkNode(OR, sEmpty, nm->mkNode(AND, kc1, kc2, kc3)));
438
439
106
    std::vector<Node> conc2;
440
106
    std::vector< TypeNode > argTypes;
441
53
    argTypes.push_back(nm->integerType());
442
106
    TypeNode stoiResultType = nm->mkFunctionType(argTypes, nm->integerType());
443
    Node u =
444
106
        sc->mkSkolemFun(SkolemFunId::STRINGS_STOI_RESULT, stoiResultType, t);
445
446
53
    lem = stoit.eqNode(nm->mkNode(APPLY_UF, u, lens));
447
53
    conc2.push_back(lem);
448
449
53
    lem = zero.eqNode(nm->mkNode(APPLY_UF, u, zero));
450
53
    conc2.push_back(lem);
451
452
53
    lem = nm->mkNode(GT, lens, zero);
453
53
    conc2.push_back(lem);
454
455
106
    Node x = SkolemCache::mkIndexVar(t);
456
106
    Node xbv = nm->mkNode(BOUND_VAR_LIST, x);
457
106
    Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, lens));
458
106
    Node sx = nm->mkNode(STRING_SUBSTR, s, x, one);
459
106
    Node ux = nm->mkNode(APPLY_UF, u, x);
460
106
    Node ux1 = nm->mkNode(APPLY_UF, u, nm->mkNode(PLUS, x, one));
461
106
    Node c = nm->mkNode(MINUS, nm->mkNode(STRING_TO_CODE, sx), c0);
462
463
106
    Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux)));
464
106
    Node cb = nm->mkNode(AND, nm->mkNode(GEQ, c, zero), nm->mkNode(LT, c, ten));
465
466
106
    Node ux1lem = nm->mkNode(GEQ, stoit, ux1);
467
468
53
    lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eq, cb, ux1lem));
469
53
    lem = utils::mkForallInternal(xbv, lem);
470
53
    conc2.push_back(lem);
471
472
106
    Node sneg = nm->mkNode(LT, stoit, zero);
473
53
    lem = nm->mkNode(ITE, sneg, nm->mkNode(AND, conc1), nm->mkNode(AND, conc2));
474
53
    asserts.push_back(lem);
475
476
    // assert:
477
    // IF stoit < 0
478
    // THEN:
479
    //   stoit = -1 ^
480
    //   ( s = "" OR
481
    //     ( k>=0 ^ k<len( s ) ^ ( str.code( str.at(s, k) ) < 48 OR
482
    //                             str.code( str.at(s, k) ) >= 58 )))
483
    // ELSE:
484
    //   stoit = U( len( s ) ) ^ U( 0 ) = 0 ^
485
    //   str.len( s ) > 0 ^
486
    //   forall x. (x>=0 ^ x < str.len(s)) =>
487
    //     U( x+1 ) = ( str.code( str.at(s, x) ) - 48 ) + 10*U( x ) ^
488
    //     48 <= str.code( str.at(s, x) ) < 58 ^
489
    //     stoit >= U( x+1 )
490
    // Thus, str.to.int( s ) = stoit
491
    //
492
    // Note: The conjunct `stoit >= U( x+1 )` is not needed for correctness but
493
    // is just an optimization.
494
495
53
    retNode = stoit;
496
  }
497
2378
  else if (t.getKind() == kind::SEQ_NTH)
498
  {
499
    // processing term:  str.nth( s, n)
500
    // similar to substr.
501
108
    Node s = t[0];
502
108
    Node n = t[1];
503
108
    Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst");
504
108
    Node t12 = nm->mkNode(PLUS, n, one);
505
108
    Node lt0 = nm->mkNode(STRING_LENGTH, s);
506
    // start point is greater than or equal zero
507
108
    Node c1 = nm->mkNode(GEQ, n, zero);
508
    // start point is less than end of string
509
108
    Node c2 = nm->mkNode(GT, lt0, n);
510
    // check whether this application of seq.nth is defined.
511
108
    Node cond = nm->mkNode(AND, c1, c2);
512
513
    // nodes for the case where `seq.nth` is defined.
514
108
    Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre");
515
108
    Node sk2 = sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr");
516
108
    Node unit = nm->mkNode(SEQ_UNIT, skt);
517
108
    Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, unit, sk2));
518
    // length of first skolem is second argument
519
108
    Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n);
520
108
    Node lsk2 = nm->mkNode(STRING_LENGTH, sk2);
521
108
    Node b13 = nm->mkNode(EQUAL, lsk2, nm->mkNode(MINUS, lt0, t12));
522
108
    Node b1 = nm->mkNode(AND, b11, b12, b13);
523
524
    // nodes for the case where `seq.nth` is undefined.
525
108
    Node uf = SkolemCache::mkSkolemSeqNth(s.getType(), "Uf");
526
108
    Node b2 = nm->mkNode(EQUAL, skt, nm->mkNode(APPLY_UF, uf, s, n));
527
528
    // the full ite, split on definedness of `seq.nth`
529
108
    Node lemma = nm->mkNode(ITE, cond, b1, b2);
530
531
    // assert:
532
    // IF    n >=0 AND n < len( s )
533
    // THEN: s = sk1 ++ unit(skt) ++ sk2 AND
534
    //       len( sk1 ) = n AND
535
    //       ( len( sk2 ) = len( s )- (n+1)
536
    // ELSE: skt = Uf(s, n), where Uf is a cached skolem function.
537
54
    asserts.push_back(lemma);
538
54
    retNode = skt;
539
  }
540
2324
  else if (t.getKind() == kind::STRING_REPLACE)
541
  {
542
    // processing term: replace( x, y, z )
543
452
    Node x = t[0];
544
452
    Node y = t[1];
545
452
    Node z = t[2];
546
452
    TypeNode tn = t[0].getType();
547
    Node rp1 =
548
452
        sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_PRE, "rfcpre");
549
    Node rp2 =
550
452
        sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_POST, "rfcpost");
551
452
    Node rpw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpw");
552
553
    // y = ""
554
452
    Node emp = Word::mkEmptyWord(tn);
555
452
    Node cond1 = y.eqNode(emp);
556
    // rpw = str.++( z, x )
557
452
    Node c1 = rpw.eqNode(nm->mkNode(kind::STRING_CONCAT, z, x));
558
559
    // contains( x, y )
560
452
    Node cond2 = nm->mkNode(kind::STRING_CONTAINS, x, y);
561
    // x = str.++( rp1, y, rp2 )
562
452
    Node c21 = x.eqNode(nm->mkNode(kind::STRING_CONCAT, rp1, y, rp2));
563
    // rpw = str.++( rp1, z, rp2 )
564
452
    Node c22 = rpw.eqNode(nm->mkNode(kind::STRING_CONCAT, rp1, z, rp2));
565
    // ~contains( str.++( rp1, substr( y, 0, len(y)-1 ) ), y )
566
    Node c23 =
567
904
        nm->mkNode(kind::STRING_CONTAINS,
568
452
                   nm->mkNode(
569
                       kind::STRING_CONCAT,
570
                       rp1,
571
452
                       nm->mkNode(kind::STRING_SUBSTR,
572
                                  y,
573
                                  zero,
574
904
                                  nm->mkNode(kind::MINUS,
575
452
                                             nm->mkNode(kind::STRING_LENGTH, y),
576
                                             one))),
577
                   y)
578
452
            .negate();
579
580
    // assert:
581
    //   IF    y=""
582
    //   THEN: rpw = str.++( z, x )
583
    //   ELIF: contains( x, y )
584
    //   THEN: x = str.++( rp1, y, rp2 ) ^
585
    //         rpw = str.++( rp1, z, rp2 ) ^
586
    //         ~contains( str.++( rp1, substr( y, 0, len(y)-1 ) ), y ),
587
    //   ELSE: rpw = x
588
    // for fresh rp1, rp2, rpw
589
    Node rr = nm->mkNode(kind::ITE,
590
                         cond1,
591
                         c1,
592
904
                         nm->mkNode(kind::ITE,
593
                                    cond2,
594
452
                                    nm->mkNode(kind::AND, c21, c22, c23),
595
904
                                    rpw.eqNode(x)));
596
226
    asserts.push_back(rr);
597
598
    // Thus, replace( x, y, z ) = rpw.
599
226
    retNode = rpw;
600
  }
601
2098
  else if (t.getKind() == kind::STRING_REPLACE_ALL)
602
  {
603
    // processing term: replaceall( x, y, z )
604
32
    Node x = t[0];
605
32
    Node y = t[1];
606
32
    Node z = t[2];
607
32
    Node rpaw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpaw");
608
609
    Node numOcc = sc->mkSkolemFun(
610
32
        SkolemFunId::STRINGS_NUM_OCCUR, nm->integerType(), x, y);
611
32
    std::vector<TypeNode> argTypes;
612
16
    argTypes.push_back(nm->integerType());
613
32
    TypeNode raResultType = nm->mkFunctionType(argTypes, t.getType());
614
    Node us = sc->mkSkolemFun(
615
32
        SkolemFunId::STRINGS_REPLACE_ALL_RESULT, raResultType, t);
616
32
    TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType());
617
32
    Node uf = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_INDEX, ufType, x, y);
618
619
32
    Node ufno = nm->mkNode(APPLY_UF, uf, numOcc);
620
32
    Node usno = nm->mkNode(APPLY_UF, us, numOcc);
621
32
    Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x));
622
623
32
    std::vector<Node> lem;
624
16
    lem.push_back(nm->mkNode(GEQ, numOcc, zero));
625
16
    lem.push_back(rpaw.eqNode(nm->mkNode(APPLY_UF, us, zero)));
626
16
    lem.push_back(usno.eqNode(rem));
627
16
    lem.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero));
628
16
    lem.push_back(nm->mkNode(STRING_INDEXOF, x, y, ufno).eqNode(negOne));
629
630
32
    Node i = SkolemCache::mkIndexVar(t);
631
32
    Node bvli = nm->mkNode(BOUND_VAR_LIST, i);
632
    Node bound =
633
32
        nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc));
634
32
    Node ufi = nm->mkNode(APPLY_UF, uf, i);
635
32
    Node ufip1 = nm->mkNode(APPLY_UF, uf, nm->mkNode(PLUS, i, one));
636
32
    Node ii = nm->mkNode(STRING_INDEXOF, x, y, ufi);
637
    Node cc = nm->mkNode(
638
        STRING_CONCAT,
639
32
        nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)),
640
        z,
641
64
        nm->mkNode(APPLY_UF, us, nm->mkNode(PLUS, i, one)));
642
643
32
    std::vector<Node> flem;
644
16
    flem.push_back(ii.eqNode(negOne).negate());
645
16
    flem.push_back(nm->mkNode(APPLY_UF, us, i).eqNode(cc));
646
16
    flem.push_back(
647
32
        ufip1.eqNode(nm->mkNode(PLUS, ii, nm->mkNode(STRING_LENGTH, y))));
648
649
32
    Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem));
650
32
    Node q = utils::mkForallInternal(bvli, body);
651
16
    lem.push_back(q);
652
653
    // assert:
654
    //   IF y=""
655
    //   THEN: rpaw = x
656
    //   ELSE:
657
    //     numOcc >= 0 ^
658
    //     rpaw = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc), len(x)) ^
659
    //     Uf(0) = 0 ^ indexof( x, y, Uf(numOcc) ) = -1 ^
660
    //     forall i. 0 <= i < numOcc =>
661
    //        ii != -1 ^
662
    //        Us( i ) = str.substr( x, Uf(i), ii - Uf(i) ) ++ z ++ Us(i+1) ^
663
    //        Uf( i+1 ) = ii + len(y)
664
    //        where ii == indexof( x, y, Uf(i) )
665
666
    // Conceptually, numOcc is the number of occurrences of y in x, Uf( i ) is
667
    // the index to begin searching in x for y after the i^th occurrence of y in
668
    // x, and Us( i ) is the result of processing the remainder after processing
669
    // the i^th occurrence of y in x.
670
32
    Node emp = Word::mkEmptyWord(t.getType());
671
    Node assert =
672
32
        nm->mkNode(ITE, y.eqNode(emp), rpaw.eqNode(x), nm->mkNode(AND, lem));
673
16
    asserts.push_back(assert);
674
675
    // Thus, replaceall( x, y, z ) = rpaw
676
16
    retNode = rpaw;
677
  }
678
2082
  else if (t.getKind() == STRING_REPLACE_RE)
679
  {
680
    // processing term: replace_re( x, y, z )
681
100
    Node x = t[0];
682
100
    Node y = t[1];
683
100
    Node z = t[2];
684
100
    Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k");
685
686
    // indexof_re(x, y, 0)
687
100
    Node idx = nm->mkNode(STRING_INDEXOF_RE, x, y, zero);
688
689
    // in_re("", y)
690
    Node matchesEmpty =
691
100
        nm->mkNode(STRING_IN_REGEXP, nm->mkConst(String("")), y);
692
    // k = z ++ x
693
100
    Node res1 = k.eqNode(nm->mkNode(STRING_CONCAT, z, x));
694
695
100
    TypeNode ktype = t.getType();
696
100
    Node k1 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH_PRE, ktype, x, y);
697
100
    Node k2 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH, ktype, x, y);
698
100
    Node k3 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH_POST, ktype, x, y);
699
100
    Node k2Len = nm->mkNode(STRING_LENGTH, k2);
700
    // x = k1 ++ k2 ++ k3
701
100
    Node split = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3));
702
    // len(k1) = indexof_re(x, y, 0)
703
100
    Node k1Len = nm->mkNode(STRING_LENGTH, k1).eqNode(idx);
704
100
    Node l = SkolemCache::mkLengthVar(t);
705
100
    Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
706
    Node bound =
707
100
        nm->mkNode(AND, nm->mkNode(LEQ, zero, l), nm->mkNode(LT, l, k2Len));
708
    Node body = nm->mkNode(
709
        OR,
710
100
        bound.negate(),
711
100
        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, k2, zero, l), y)
712
200
            .negate());
713
    // forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)
714
100
    Node shortestMatch = utils::mkForallInternal(bvll, body);
715
    // in_re(k2, y)
716
100
    Node match = nm->mkNode(STRING_IN_REGEXP, k2, y);
717
    // k = k1 ++ z ++ k3
718
100
    Node res = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3));
719
720
    // IF: indexof_re(x, y, 0) = -1
721
    // THEN: k = x
722
    // ELSE:
723
    //   x = k1 ++ k2 ++ k3 ^
724
    //   len(k1) = indexof_re(x, y, 0) ^
725
    //   (forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)) ^
726
    //   in_re(k2, y) ^
727
    //   k = k1 ++ z ++ k3
728
50
    asserts.push_back(
729
300
        nm->mkNode(ITE,
730
100
                   idx.eqNode(negOne),
731
100
                   k.eqNode(x),
732
100
                   nm->mkNode(AND, {split, k1Len, shortestMatch, match, res})));
733
50
    retNode = k;
734
  }
735
2032
  else if (t.getKind() == STRING_REPLACE_RE_ALL)
736
  {
737
52
    Node x = t[0];
738
52
    Node y = t[1];
739
52
    Node z = t[2];
740
52
    Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k");
741
742
    Node numOcc = sc->mkSkolemFun(
743
52
        SkolemFunId::STRINGS_NUM_OCCUR, nm->integerType(), x, y);
744
52
    std::vector<TypeNode> argTypes;
745
26
    argTypes.push_back(nm->integerType());
746
52
    TypeNode raResultType = nm->mkFunctionType(argTypes, t.getType());
747
    Node us = sc->mkSkolemFun(
748
52
        SkolemFunId::STRINGS_REPLACE_ALL_RESULT, raResultType, t);
749
52
    TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType());
750
52
    Node uf = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_INDEX, ufType, x, y);
751
52
    Node ul = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_LEN, ufType, x, y);
752
753
52
    Node emp = Word::mkEmptyWord(t.getType());
754
755
52
    Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp));
756
    // indexof_re(x, y', 0) = -1
757
52
    Node noMatch = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero).eqNode(negOne);
758
759
52
    Node ufno = nm->mkNode(APPLY_UF, uf, numOcc);
760
52
    Node usno = nm->mkNode(APPLY_UF, us, numOcc);
761
52
    Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x));
762
763
52
    std::vector<Node> lemmas;
764
    // numOcc > 0
765
26
    lemmas.push_back(nm->mkNode(GT, numOcc, zero));
766
    // k = Us(0)
767
26
    lemmas.push_back(k.eqNode(nm->mkNode(APPLY_UF, us, zero)));
768
    // Us(numOcc) = substr(x, Uf(numOcc))
769
26
    lemmas.push_back(usno.eqNode(rem));
770
    // Uf(0) = 0
771
26
    lemmas.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero));
772
    // indexof_re(substr(x, Uf(numOcc)), y', 0) = -1
773
26
    lemmas.push_back(
774
52
        nm->mkNode(STRING_INDEXOF_RE, rem, yp, zero).eqNode(negOne));
775
776
52
    Node i = SkolemCache::mkIndexVar(t);
777
52
    Node bvli = nm->mkNode(BOUND_VAR_LIST, i);
778
    Node bound =
779
52
        nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc));
780
52
    Node ip1 = nm->mkNode(PLUS, i, one);
781
52
    Node ufi = nm->mkNode(APPLY_UF, uf, i);
782
52
    Node ufip1 = nm->mkNode(APPLY_UF, uf, ip1);
783
52
    Node ulip1 = nm->mkNode(APPLY_UF, ul, ip1);
784
    // ii = Uf(i + 1) - Ul(i + 1)
785
52
    Node ii = nm->mkNode(MINUS, ufip1, ulip1);
786
787
52
    std::vector<Node> flem;
788
    // Ul(i + 1) > 0
789
26
    flem.push_back(nm->mkNode(GT, ulip1, zero));
790
    // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1)
791
26
    flem.push_back(ufip1.eqNode(
792
52
        nm->mkNode(PLUS, nm->mkNode(STRING_INDEXOF_RE, x, yp, ufi), ulip1)));
793
    // in_re(substr(x, ii, Ul(i + 1)), y')
794
78
    flem.push_back(nm->mkNode(
795
52
        STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, ulip1), yp));
796
52
    Node l = SkolemCache::mkLengthVar(t);
797
52
    Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
798
    Node lenBound =
799
52
        nm->mkNode(AND, nm->mkNode(LT, zero, l), nm->mkNode(LT, l, ulip1));
800
    Node shortestMatchBody = nm->mkNode(
801
        OR,
802
52
        lenBound.negate(),
803
52
        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, l), y)
804
104
            .negate());
805
    // forall l. 0 < l < Ul(i + 1) =>
806
    //   ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y')
807
26
    flem.push_back(utils::mkForallInternal(bvll, shortestMatchBody));
808
    Node pfxMatch =
809
52
        nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi));
810
    // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
811
26
    flem.push_back(
812
52
        nm->mkNode(APPLY_UF, us, i)
813
52
            .eqNode(nm->mkNode(
814
52
                STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1))));
815
52
    Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem));
816
52
    Node forall = utils::mkForallInternal(bvli, body);
817
26
    lemmas.push_back(forall);
818
819
    // IF indexof(x, y', 0) = -1
820
    // THEN: k = x
821
    // ELSE:
822
    //   numOcc > 0 ^
823
    //   k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^
824
    //   Uf(0) = 0 ^ indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 ^
825
    //   forall i. 0 <= i < nummOcc =>
826
    //     Ul(i + 1) > 0 ^
827
    //     Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) ^
828
    //     in_re(substr(x, ii, Ul(i + 1)), y') ^
829
    //     forall l. 0 < l < Ul(i + 1) =>
830
    //       ~in_re(substr(x, ii, l), y')
831
    //     Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
832
    //     where ii = Uf(i + 1) - Ul(i + 1)
833
    // where y' = re.diff(y, "")
834
    //
835
    // Conceptually, y' is the regex y but excluding the empty string (because
836
    // we do not want to match empty strings), numOcc is the number of shortest
837
    // matches of y' in x, Uf(i) is the end position of the i-th match, Ul(i)
838
    // is the length of the i^th match, and Us(i) is the result of processing
839
    // the remainder after processing the i^th occurrence of y in x.
840
    //
841
    // Visualization of Uf(i) and Ul(i):
842
    //
843
    // x = |------------| match 1 |-----------| match 2   |---|
844
    //     |                      |                       |
845
    //     Uf(0)                  Uf(1)                   Uf(2)
846
    //
847
    //                  |---------|           |-----------|
848
    //                    Ul(1)                 Ul(2)
849
26
    asserts.push_back(
850
52
        nm->mkNode(ITE, noMatch, k.eqNode(x), nm->mkNode(AND, lemmas)));
851
26
    retNode = k;
852
  }
853
2006
  else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER)
854
  {
855
12
    Node x = t[0];
856
12
    Node r = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r");
857
858
12
    Node lenx = nm->mkNode(STRING_LENGTH, x);
859
12
    Node lenr = nm->mkNode(STRING_LENGTH, r);
860
12
    Node eqLenA = lenx.eqNode(lenr);
861
862
12
    Node i = SkolemCache::mkIndexVar(t);
863
12
    Node bvi = nm->mkNode(BOUND_VAR_LIST, i);
864
865
12
    Node ci = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, x, i, one));
866
12
    Node ri = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, r, i, one));
867
868
12
    Node lb = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 97 : 65));
869
12
    Node ub = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 122 : 90));
870
    Node offset =
871
12
        nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? -32 : 32));
872
873
    Node res = nm->mkNode(
874
        ITE,
875
12
        nm->mkNode(AND, nm->mkNode(LEQ, lb, ci), nm->mkNode(LEQ, ci, ub)),
876
12
        nm->mkNode(PLUS, ci, offset),
877
36
        ci);
878
879
    Node bound =
880
12
        nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr));
881
12
    Node body = nm->mkNode(OR, bound.negate(), ri.eqNode(res));
882
12
    Node rangeA = utils::mkForallInternal(bvi, body);
883
884
    // upper 65 ... 90
885
    // lower 97 ... 122
886
    // assert:
887
    //   len(r) = len(x) ^
888
    //   forall i. 0 <= i < len(r) =>
889
    //     str.code( str.substr(r,i,1) ) = ite( 97 <= ci <= 122, ci-32, ci)
890
    // where ci = str.code( str.substr(x,i,1) )
891
12
    Node assert = nm->mkNode(AND, eqLenA, rangeA);
892
6
    asserts.push_back(assert);
893
894
    // Thus, toLower( x ) = r
895
6
    retNode = r;
896
  }
897
2000
  else if (t.getKind() == STRING_REV)
898
  {
899
36
    Node x = t[0];
900
36
    Node r = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r");
901
902
36
    Node lenx = nm->mkNode(STRING_LENGTH, x);
903
36
    Node lenr = nm->mkNode(STRING_LENGTH, r);
904
36
    Node eqLenA = lenx.eqNode(lenr);
905
906
36
    Node i = SkolemCache::mkIndexVar(t);
907
36
    Node bvi = nm->mkNode(BOUND_VAR_LIST, i);
908
909
    Node revi = nm->mkNode(
910
36
        MINUS, nm->mkNode(STRING_LENGTH, x), nm->mkNode(PLUS, i, one));
911
36
    Node ssr = nm->mkNode(STRING_SUBSTR, r, i, one);
912
36
    Node ssx = nm->mkNode(STRING_SUBSTR, x, revi, one);
913
914
    Node bound =
915
36
        nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr));
916
36
    Node body = nm->mkNode(OR, bound.negate(), ssr.eqNode(ssx));
917
36
    Node rangeA = utils::mkForallInternal(bvi, body);
918
    // assert:
919
    //   len(r) = len(x) ^
920
    //   forall i. 0 <= i < len(r) =>
921
    //     substr(r,i,1) = substr(x,len(x)-(i+1),1)
922
36
    Node assert = nm->mkNode(AND, eqLenA, rangeA);
923
18
    asserts.push_back(assert);
924
925
    // Thus, (str.rev x) = r
926
18
    retNode = r;
927
  }
928
1982
  else if (t.getKind() == kind::STRING_CONTAINS)
929
  {
930
260
    Node x = t[0];
931
260
    Node s = t[1];
932
    //negative contains reduces to existential
933
260
    Node lenx = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x);
934
260
    Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
935
260
    Node b1 = SkolemCache::mkIndexVar(t);
936
260
    Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
937
    Node body = NodeManager::currentNM()->mkNode(
938
        kind::AND,
939
260
        NodeManager::currentNM()->mkNode(kind::LEQ, zero, b1),
940
390
        NodeManager::currentNM()->mkNode(
941
            kind::LEQ,
942
            b1,
943
260
            NodeManager::currentNM()->mkNode(kind::MINUS, lenx, lens)),
944
520
        NodeManager::currentNM()->mkNode(
945
            kind::EQUAL,
946
260
            NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, x, b1, lens),
947
910
            s));
948
130
    retNode = utils::mkForallInternal(b1v, body.negate()).negate();
949
  }
950
1852
  else if (t.getKind() == kind::STRING_LEQ)
951
  {
952
    Node ltp = sc->mkTypedSkolemCached(
953
92
        nm->booleanType(), t, SkolemCache::SK_PURIFY, "ltp");
954
92
    Node k = SkolemCache::mkIndexVar(t);
955
956
92
    std::vector<Node> conj;
957
46
    conj.push_back(nm->mkNode(GEQ, k, zero));
958
92
    Node substr[2];
959
92
    Node code[2];
960
138
    for (unsigned r = 0; r < 2; r++)
961
    {
962
184
      Node ta = t[r];
963
184
      Node tb = t[1 - r];
964
92
      substr[r] = nm->mkNode(STRING_SUBSTR, ta, zero, k);
965
92
      code[r] =
966
184
          nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, ta, k, one));
967
92
      conj.push_back(nm->mkNode(LEQ, k, nm->mkNode(STRING_LENGTH, ta)));
968
    }
969
46
    conj.push_back(substr[0].eqNode(substr[1]));
970
92
    std::vector<Node> ite_ch;
971
46
    ite_ch.push_back(ltp);
972
138
    for (unsigned r = 0; r < 2; r++)
973
    {
974
92
      ite_ch.push_back(nm->mkNode(LT, code[r], code[1 - r]));
975
    }
976
46
    conj.push_back(nm->mkNode(ITE, ite_ch));
977
978
92
    Node conjn = utils::mkForallInternal(nm->mkNode(BOUND_VAR_LIST, k),
979
92
                                         nm->mkNode(AND, conj).negate())
980
92
                     .negate();
981
    // Intuitively, the reduction says either x and y are equal, or they have
982
    // some (maximal) common prefix after which their characters at position k
983
    // are distinct, and the comparison of their code matches the return value
984
    // of the overall term.
985
    // Notice the below reduction relies on the semantics of str.code being -1
986
    // for the empty string. In particular, say x = "a" and y = "ab", then the
987
    // reduction below is satisfied for k = 1, since
988
    //   str.code(substr( "a", 1, 1 )) = str.code( "" ) = -1 <
989
    //   str.code(substr( "ab", 1, 1 )) = str.code( "b" ) = 66
990
991
    // assert:
992
    //  IF x=y
993
    //  THEN: ltp
994
    //  ELSE: exists k.
995
    //        k >= 0 AND k <= len( x ) AND k <= len( y ) AND
996
    //        substr( x, 0, k ) = substr( y, 0, k ) AND
997
    //        IF    ltp
998
    //        THEN: str.code(substr( x, k, 1 )) < str.code(substr( y, k, 1 ))
999
    //        ELSE: str.code(substr( x, k, 1 )) > str.code(substr( y, k, 1 ))
1000
92
    Node assert = nm->mkNode(ITE, t[0].eqNode(t[1]), ltp, conjn);
1001
46
    asserts.push_back(assert);
1002
1003
    // Thus, str.<=( x, y ) = ltp
1004
46
    retNode = ltp;
1005
  }
1006
9120
  return retNode;
1007
}
1008
1009
4073
Node StringsPreprocess::simplify(Node t, std::vector<Node>& asserts)
1010
{
1011
4073
  size_t prev_asserts = asserts.size();
1012
  // call the static reduce routine
1013
4073
  Node retNode = reduce(t, asserts, d_sc);
1014
4073
  if( t!=retNode ){
1015
2267
    Trace("strings-preprocess") << "StringsPreprocess::simplify: " << t << " -> " << retNode << std::endl;
1016
2267
    if (!asserts.empty())
1017
    {
1018
4312
      Trace("strings-preprocess")
1019
4312
          << " ... new nodes (" << (asserts.size() - prev_asserts)
1020
2156
          << "):" << std::endl;
1021
4293
      for (size_t i = prev_asserts; i < asserts.size(); ++i)
1022
      {
1023
2137
        Trace("strings-preprocess") << "   " << asserts[i] << std::endl;
1024
      }
1025
    }
1026
2267
    if (d_statReductions != nullptr)
1027
    {
1028
2167
      (*d_statReductions) << t.getKind();
1029
    }
1030
  }
1031
  else
1032
  {
1033
3612
    Trace("strings-preprocess-debug")
1034
1806
        << "Return " << retNode << " unchanged" << std::endl;
1035
  }
1036
4073
  return retNode;
1037
}
1038
1039
3174
Node StringsPreprocess::simplifyRec(Node t, std::vector<Node>& asserts)
1040
{
1041
3174
  std::map<Node, Node>::iterator it = d_visited.find(t);
1042
3174
  if (it != d_visited.end())
1043
  {
1044
1260
    return it->second;
1045
  }else{
1046
3828
    Node retNode = t;
1047
1914
    if( t.getNumChildren()==0 ){
1048
472
      retNode = simplify(t, asserts);
1049
    }
1050
1442
    else if (!t.isClosure())
1051
    {
1052
1434
      bool changed = false;
1053
2868
      std::vector< Node > cc;
1054
1434
      if( t.getMetaKind() == kind::metakind::PARAMETERIZED ){
1055
20
        cc.push_back( t.getOperator() );
1056
      }
1057
4431
      for(unsigned i=0; i<t.getNumChildren(); i++) {
1058
5994
        Node s = simplifyRec(t[i], asserts);
1059
2997
        cc.push_back( s );
1060
2997
        if( s!=t[i] ) {
1061
243
          changed = true;
1062
        }
1063
      }
1064
2868
      Node tmp = t;
1065
1434
      if( changed ){
1066
210
        tmp = NodeManager::currentNM()->mkNode( t.getKind(), cc );
1067
      }
1068
1434
      retNode = simplify(tmp, asserts);
1069
    }
1070
1914
    d_visited[t] = retNode;
1071
1914
    return retNode;
1072
  }
1073
}
1074
1075
107
Node StringsPreprocess::processAssertion(Node n, std::vector<Node>& asserts)
1076
{
1077
214
  std::vector<Node> asserts_curr;
1078
107
  Node ret = simplifyRec(n, asserts_curr);
1079
247
  while (!asserts_curr.empty())
1080
  {
1081
140
    Node curr = asserts_curr.back();
1082
70
    asserts_curr.pop_back();
1083
140
    std::vector<Node> asserts_tmp;
1084
70
    curr = simplifyRec(curr, asserts_tmp);
1085
70
    asserts_curr.insert(
1086
140
        asserts_curr.end(), asserts_tmp.begin(), asserts_tmp.end());
1087
70
    asserts.push_back(curr);
1088
  }
1089
214
  return ret;
1090
}
1091
1092
}  // namespace strings
1093
}  // namespace theory
1094
31137
}  // namespace cvc5