GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/theory_strings_preprocess.cpp Lines: 518 518 100.0 %
Date: 2021-08-16 Branches: 1566 3062 51.1 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Strings Preprocess.
14
 */
15
16
#include "theory/strings/theory_strings_preprocess.h"
17
18
#include "expr/kind.h"
19
#include "expr/skolem_manager.h"
20
#include "options/smt_options.h"
21
#include "options/strings_options.h"
22
#include "smt/logic_exception.h"
23
#include "theory/strings/arith_entail.h"
24
#include "theory/strings/sequences_rewriter.h"
25
#include "theory/strings/theory_strings_utils.h"
26
#include "theory/strings/word.h"
27
#include "util/rational.h"
28
#include "util/statistics_registry.h"
29
#include "util/string.h"
30
31
using namespace cvc5;
32
using namespace cvc5::kind;
33
34
namespace cvc5 {
35
namespace theory {
36
namespace strings {
37
38
9893
StringsPreprocess::StringsPreprocess(SkolemCache* sc,
39
9893
                                     HistogramStat<Kind>* statReductions)
40
9893
    : d_sc(sc), d_statReductions(statReductions)
41
{
42
9893
}
43
44
9893
StringsPreprocess::~StringsPreprocess(){
45
46
9893
}
47
48
4278
Node StringsPreprocess::reduce(Node t,
49
                               std::vector<Node>& asserts,
50
                               SkolemCache* sc)
51
{
52
8556
  Trace("strings-preprocess-debug")
53
4278
      << "StringsPreprocess::reduce: " << t << std::endl;
54
4278
  Node retNode = t;
55
4278
  NodeManager* nm = NodeManager::currentNM();
56
4278
  SkolemManager* sm = nm->getSkolemManager();
57
8556
  Node zero = nm->mkConst(Rational(0));
58
8556
  Node one = nm->mkConst(Rational(1));
59
8556
  Node negOne = nm->mkConst(Rational(-1));
60
61
4278
  if( t.getKind() == kind::STRING_SUBSTR ) {
62
    // processing term:  substr( s, n, m )
63
3328
    Node s = t[0];
64
3328
    Node n = t[1];
65
3328
    Node m = t[2];
66
3328
    Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst");
67
3328
    Node t12 = nm->mkNode(PLUS, n, m);
68
3328
    Node lt0 = nm->mkNode(STRING_LENGTH, s);
69
    //start point is greater than or equal zero
70
3328
    Node c1 = nm->mkNode(GEQ, n, zero);
71
    //start point is less than end of string
72
3328
    Node c2 = nm->mkNode(GT, lt0, n);
73
    //length is positive
74
3328
    Node c3 = nm->mkNode(GT, m, zero);
75
3328
    Node cond = nm->mkNode(AND, c1, c2, c3);
76
77
3328
    Node emp = Word::mkEmptyWord(t.getType());
78
79
3328
    Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre");
80
3328
    Node sk2 = sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr");
81
3328
    Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, skt, sk2));
82
    //length of first skolem is second argument
83
3328
    Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n);
84
3328
    Node lsk2 = nm->mkNode(STRING_LENGTH, sk2);
85
    // Length of the suffix is either 0 (in the case where m + n > len(s)) or
86
    // len(s) - n -m
87
    Node b13 = nm->mkNode(
88
        OR,
89
3328
        nm->mkNode(EQUAL, lsk2, nm->mkNode(MINUS, lt0, nm->mkNode(PLUS, n, m))),
90
6656
        nm->mkNode(EQUAL, lsk2, zero));
91
    // Length of the result is at most m
92
3328
    Node b14 = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, skt), m);
93
94
3328
    Node b1 = nm->mkNode(AND, {b11, b12, b13, b14});
95
3328
    Node b2 = skt.eqNode(emp);
96
3328
    Node lemma = nm->mkNode(ITE, cond, b1, b2);
97
98
    // assert:
99
    // IF    n >=0 AND len( s ) > n AND m > 0
100
    // THEN: s = sk1 ++ skt ++ sk2 AND
101
    //       len( sk1 ) = n AND
102
    //       ( len( sk2 ) = len( s )-(n+m) OR len( sk2 ) = 0 ) AND
103
    //       len( skt ) <= m
104
    // ELSE: skt = ""
105
    //
106
    // Note: The length of sk2 is either 0 (if the n + m is greater or equal to
107
    // the length of s) or len(s) - (n + m) otherwise. If n + m is greater or
108
    // equal to the length of s, then len(s) - (n + m) is negative and in
109
    // conflict with lengths always being positive, so only len(sk2) = 0 may be
110
    // satisfied. If n + m is less than the length of s, then len(sk2) = 0
111
    // cannot be satisfied because we have the constraint that len(skt) <= m,
112
    // so sk2 must be greater than 0.
113
1664
    asserts.push_back(lemma);
114
115
    // Thus, substr( s, n, m ) = skt
116
1664
    retNode = skt;
117
  }
118
2614
  else if (t.getKind() == kind::STRING_UPDATE)
119
  {
120
    // processing term:  update( s, n, r )
121
34
    Node s = t[0];
122
34
    Node n = t[1];
123
34
    Node r = t[2];
124
34
    Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst");
125
34
    Node ls = nm->mkNode(STRING_LENGTH, s);
126
    // start point is greater than or equal zero
127
34
    Node c1 = nm->mkNode(GEQ, n, zero);
128
    // start point is less than end of string
129
34
    Node c2 = nm->mkNode(GT, ls, n);
130
34
    Node cond = nm->mkNode(AND, c1, c2);
131
132
    // substr(r,0,|s|-n)
133
34
    Node lens = nm->mkNode(STRING_LENGTH, s);
134
34
    Node rs;
135
17
    if (r.isConst() && Word::getLength(r) == 1)
136
    {
137
      // optimization: don't need to take substring for single characters, due
138
      // to guard on where it is used in the reduction below.
139
2
      rs = r;
140
    }
141
    else
142
    {
143
15
      rs = nm->mkNode(STRING_SUBSTR, r, zero, nm->mkNode(MINUS, lens, n));
144
    }
145
34
    Node rslen = nm->mkNode(STRING_LENGTH, rs);
146
34
    Node nsuf = nm->mkNode(PLUS, n, rslen);
147
    // substr(s, n, len(substr(r,0,|s|-n))), which is used for formalizing the
148
    // purification variable sk3 below.
149
34
    Node ssubstr = nm->mkNode(STRING_SUBSTR, s, n, rslen);
150
151
34
    Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre");
152
    Node sk2 =
153
34
        sc->mkSkolemCached(s, nsuf, SkolemCache::SK_SUFFIX_REM, "sssufr");
154
34
    Node sk3 = sc->mkSkolemCached(ssubstr, SkolemCache::SK_PURIFY, "ssubstr");
155
34
    Node a1 = skt.eqNode(nm->mkNode(STRING_CONCAT, sk1, rs, sk2));
156
34
    Node a2 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, sk3, sk2));
157
    // length of first skolem is second argument
158
34
    Node a3 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n);
159
34
    Node a4 = nm->mkNode(STRING_LENGTH, rs).eqNode(nm->mkNode(STRING_LENGTH, sk3));
160
161
34
    Node b1 = nm->mkNode(AND, {a1, a2, a3, a4});
162
34
    Node b2 = skt.eqNode(s);
163
34
    Node lemma = nm->mkNode(ITE, cond, b1, b2);
164
165
    // assert:
166
    // IF    n >=0 AND n < len( s )
167
    // THEN: skt = sk1 ++ substr(r,0,len(s)-n) ++ sk2 AND
168
    //       s = sk1 ++ sk3 ++ sk2 AND
169
    //       len( sk1 ) = n AND
170
    //       len( substr(r,0,len(s)-n) ) = len( sk3 )
171
    // ELSE: skt = s
172
    // We use an optimization where r is used instead of substr(r,0,len(s)-n)
173
    // if r is a constant of length one.
174
17
    asserts.push_back(lemma);
175
176
    // Thus, str.update( s, n, r ) = skt
177
17
    retNode = skt;
178
  }
179
2597
  else if (t.getKind() == kind::STRING_INDEXOF)
180
  {
181
    // processing term:  indexof( x, y, n )
182
284
    Node x = t[0];
183
284
    Node y = t[1];
184
284
    Node n = t[2];
185
    Node skk = sc->mkTypedSkolemCached(
186
284
        nm->integerType(), t, SkolemCache::SK_PURIFY, "iok");
187
188
284
    Node negone = nm->mkConst(Rational(-1));
189
190
    // substr( x, n, len( x ) - n )
191
    Node st = nm->mkNode(STRING_SUBSTR,
192
                         x,
193
                         n,
194
284
                         nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), n));
195
    Node io2 =
196
284
        sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_PRE, "iopre");
197
    Node io4 =
198
284
        sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_POST, "iopost");
199
200
    // ~contains( substr( x, n, len( x ) - n ), y )
201
284
    Node c11 = nm->mkNode(STRING_CONTAINS, st, y).negate();
202
    // n > len( x )
203
284
    Node c12 = nm->mkNode(GT, n, nm->mkNode(STRING_LENGTH, x));
204
    // 0 > n
205
284
    Node c13 = nm->mkNode(GT, zero, n);
206
284
    Node cond1 = nm->mkNode(OR, c11, c12, c13);
207
    // skk = -1
208
284
    Node cc1 = skk.eqNode(negone);
209
210
    // y = ""
211
284
    Node emp = Word::mkEmptyWord(x.getType());
212
284
    Node cond2 = y.eqNode(emp);
213
    // skk = n
214
284
    Node cc2 = skk.eqNode(t[2]);
215
216
    // substr( x, n, len( x ) - n ) = str.++( io2, y, io4 )
217
284
    Node c31 = st.eqNode(nm->mkNode(STRING_CONCAT, io2, y, io4));
218
    // ~contains( str.++( io2, substr( y, 0, len( y ) - 1) ), y )
219
    Node c32 =
220
568
        nm->mkNode(
221
              STRING_CONTAINS,
222
284
              nm->mkNode(
223
                  STRING_CONCAT,
224
                  io2,
225
284
                  nm->mkNode(
226
                      STRING_SUBSTR,
227
                      y,
228
                      zero,
229
284
                      nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, y), one))),
230
              y)
231
284
            .negate();
232
    // skk = n + len( io2 )
233
284
    Node c33 = skk.eqNode(nm->mkNode(PLUS, n, nm->mkNode(STRING_LENGTH, io2)));
234
284
    Node cc3 = nm->mkNode(AND, c31, c32, c33);
235
236
    // assert:
237
    // IF:   ~contains( substr( x, n, len( x ) - n ), y ) OR n > len(x) OR 0 > n
238
    // THEN: skk = -1
239
    // ELIF: y = ""
240
    // THEN: skk = n
241
    // ELSE: substr( x, n, len( x ) - n ) = str.++( io2, y, io4 ) ^
242
    //       ~contains( str.++( io2, substr( y, 0, len( y ) - 1) ), y ) ^
243
    //       skk = n + len( io2 )
244
    // for fresh io2, io4.
245
284
    Node rr = nm->mkNode(ITE, cond1, cc1, nm->mkNode(ITE, cond2, cc2, cc3));
246
142
    asserts.push_back(rr);
247
248
    // Thus, indexof( x, y, n ) = skk.
249
142
    retNode = skk;
250
  }
251
2455
  else if (t.getKind() == kind::STRING_INDEXOF_RE)
252
  {
253
    // processing term:  indexof_re(s, r, n)
254
236
    Node s = t[0];
255
236
    Node r = t[1];
256
236
    Node n = t[2];
257
    Node skk = sc->mkTypedSkolemCached(
258
236
        nm->integerType(), t, SkolemCache::SK_PURIFY, "iork");
259
236
    Node sLen = nm->mkNode(STRING_LENGTH, s);
260
261
    // n > len(x)
262
236
    Node ub = nm->mkNode(GT, n, sLen);
263
    // 0 > n
264
236
    Node lb = nm->mkNode(GT, zero, n);
265
    // n > len(x) OR 0 > n
266
236
    Node condNegOne = nm->mkNode(OR, ub, lb);
267
    // skk = -1
268
236
    Node retNegOne = skk.eqNode(negOne);
269
270
236
    Node emp = Word::mkEmptyWord(s.getType());
271
    // in_re("", r)
272
236
    Node matchEmptyStr = nm->mkNode(STRING_IN_REGEXP, emp, r);
273
    // skk = n
274
236
    Node retN = skk.eqNode(n);
275
276
236
    Node i = SkolemCache::mkIndexVar(t);
277
236
    Node l = SkolemCache::mkLengthVar(t);
278
236
    Node bvl = nm->mkNode(BOUND_VAR_LIST, i, l);
279
    Node bound =
280
        nm->mkNode(AND,
281
                   {
282
236
                       nm->mkNode(GEQ, i, n),
283
236
                       nm->mkNode(LT, i, nm->mkNode(ITE, retNegOne, sLen, skk)),
284
236
                       nm->mkNode(GT, l, zero),
285
236
                       nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, i)),
286
590
                   });
287
    Node body = nm->mkNode(
288
        OR,
289
236
        bound.negate(),
290
236
        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, i, l), r)
291
472
            .negate());
292
    // forall il.
293
    //   n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i =>
294
    //     ~in_re(substr(s, i, l), r)
295
236
    Node firstMatch = utils::mkForallInternal(bvl, body);
296
236
    Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
297
    Node validLen =
298
        nm->mkNode(AND,
299
236
                   nm->mkNode(GEQ, l, zero),
300
472
                   nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, skk)));
301
    Node matchBody = nm->mkNode(
302
        AND,
303
        validLen,
304
236
        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, skk, l), r));
305
    // skk != -1 =>
306
    //   exists l. (0 <= l < len(s) - skk) ^ in_re(substr(s, skk, l), r))
307
    Node match =
308
        nm->mkNode(OR,
309
                   retNegOne,
310
236
                   utils::mkForallInternal(bvll, matchBody.negate()).negate());
311
312
    // assert:
313
    // IF:   n > len(s) OR 0 > n
314
    // THEN: skk = -1
315
    // ELIF: in_re("", r)
316
    // THEN: skk = n
317
    // ELSE: (forall il.
318
    //         n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i =>
319
    //           ~in_re(substr(s, i, l), r)) ^
320
    //       (skk != -1 =>
321
    //          exists l. 0 <= l <= len(s) - skk ^ in_re(substr(s, skk, l), r))
322
    //
323
    // Note that this reduction relies on eager reduction lemmas being sent to
324
    // properly limit the range of skk.
325
    Node rr = nm->mkNode(
326
        ITE,
327
        condNegOne,
328
        retNegOne,
329
236
        nm->mkNode(
330
472
            ITE, matchEmptyStr, retN, nm->mkNode(AND, firstMatch, match)));
331
118
    asserts.push_back(rr);
332
333
    // Thus, indexof_re(s, r, n) = skk.
334
118
    retNode = skk;
335
  }
336
2337
  else if (t.getKind() == STRING_ITOS)
337
  {
338
    // processing term:  int.to.str( n )
339
54
    Node n = t[0];
340
54
    Node itost = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "itost");
341
54
    Node leni = nm->mkNode(STRING_LENGTH, itost);
342
343
54
    std::vector<Node> conc;
344
54
    std::vector< TypeNode > argTypes;
345
27
    argTypes.push_back(nm->integerType());
346
    Node u =
347
54
        sm->mkDummySkolem("U", nm->mkFunctionType(argTypes, nm->integerType()));
348
349
54
    Node lem = nm->mkNode(GEQ, leni, one);
350
27
    conc.push_back(lem);
351
352
27
    lem = n.eqNode(nm->mkNode(APPLY_UF, u, leni));
353
27
    conc.push_back(lem);
354
355
27
    lem = zero.eqNode(nm->mkNode(APPLY_UF, u, zero));
356
27
    conc.push_back(lem);
357
358
54
    Node x = SkolemCache::mkIndexVar(t);
359
54
    Node xPlusOne = nm->mkNode(PLUS, x, one);
360
54
    Node xbv = nm->mkNode(BOUND_VAR_LIST, x);
361
54
    Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, leni));
362
54
    Node sx = nm->mkNode(STRING_SUBSTR, itost, x, one);
363
54
    Node ux = nm->mkNode(APPLY_UF, u, x);
364
54
    Node ux1 = nm->mkNode(APPLY_UF, u, xPlusOne);
365
54
    Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0")));
366
54
    Node c = nm->mkNode(MINUS, nm->mkNode(STRING_TO_CODE, sx), c0);
367
368
54
    Node ten = nm->mkConst(Rational(10));
369
54
    Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux)));
370
    Node leadingZeroPos =
371
54
        nm->mkNode(AND, x.eqNode(zero), nm->mkNode(GT, leni, one));
372
    Node cb = nm->mkNode(
373
        AND,
374
54
        nm->mkNode(GEQ, c, nm->mkNode(ITE, leadingZeroPos, one, zero)),
375
108
        nm->mkNode(LT, c, ten));
376
377
54
    Node ux1lem = nm->mkNode(GEQ, n, ux1);
378
379
27
    lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eq, cb, ux1lem));
380
27
    lem = utils::mkForallInternal(xbv, lem);
381
27
    conc.push_back(lem);
382
383
54
    Node nonneg = nm->mkNode(GEQ, n, zero);
384
385
54
    Node emp = Word::mkEmptyWord(t.getType());
386
27
    lem = nm->mkNode(ITE, nonneg, nm->mkNode(AND, conc), itost.eqNode(emp));
387
27
    asserts.push_back(lem);
388
    // assert:
389
    // IF n>=0
390
    // THEN:
391
    //   len( itost ) >= 1 ^
392
    //   n = U( len( itost ) ) ^ U( 0 ) = 0 ^
393
    //   forall x. (x>=0 ^ x < str.len(itost)) =>
394
    //     U( x+1 ) = (str.code( str.at(itost, x) )-48) + 10*U( x ) ^
395
    //     ite( x=0 AND str.len(itost)>1, 49, 48 ) <=
396
    //       str.code( str.at(itost, x) ) < 58 ^
397
    //     n >= U(x + 1)
398
    // ELSE
399
    //   itost = ""
400
    // thus:
401
    //   int.to.str( n ) = itost
402
    //
403
    // Note: The conjunct `n >= U(x + 1)` is not needed for correctness but is
404
    // just an optimization.
405
406
    // The function U is an accumulator, where U( x ) for x>0 is the value of
407
    // str.to.int( str.substr( int.to.str( n ), 0, x ) ). For example, for
408
    // n=345, we have that U(1), U(2), U(3) = 3, 34, 345.
409
    // Above, we use str.code to map characters to their integer value, where
410
    // note that str.code( "0" ) = 48. Further notice that
411
    //   ite( x=0 AND str.len(itost)>1, 49, 48 )
412
    // enforces that int.to.str( n ) has no leading zeroes.
413
27
    retNode = itost;
414
2310
  } else if( t.getKind() == kind::STRING_STOI ) {
415
54
    Node s = t[0];
416
    Node stoit = sc->mkTypedSkolemCached(
417
54
        nm->integerType(), t, SkolemCache::SK_PURIFY, "stoit");
418
54
    Node lens = nm->mkNode(STRING_LENGTH, s);
419
420
54
    std::vector<Node> conc1;
421
54
    Node lem = stoit.eqNode(negOne);
422
27
    conc1.push_back(lem);
423
424
54
    Node emp = Word::mkEmptyWord(s.getType());
425
54
    Node sEmpty = s.eqNode(emp);
426
54
    Node k = sm->mkDummySkolem("k", nm->integerType());
427
54
    Node kc1 = nm->mkNode(GEQ, k, zero);
428
54
    Node kc2 = nm->mkNode(LT, k, lens);
429
54
    Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0")));
430
    Node codeSk = nm->mkNode(
431
        MINUS,
432
54
        nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, s, k, one)),
433
108
        c0);
434
54
    Node ten = nm->mkConst(Rational(10));
435
    Node kc3 = nm->mkNode(
436
54
        OR, nm->mkNode(LT, codeSk, zero), nm->mkNode(GEQ, codeSk, ten));
437
27
    conc1.push_back(nm->mkNode(OR, sEmpty, nm->mkNode(AND, kc1, kc2, kc3)));
438
439
54
    std::vector<Node> conc2;
440
54
    std::vector< TypeNode > argTypes;
441
27
    argTypes.push_back(nm->integerType());
442
    Node u =
443
54
        sm->mkDummySkolem("U", nm->mkFunctionType(argTypes, nm->integerType()));
444
445
27
    lem = stoit.eqNode(nm->mkNode(APPLY_UF, u, lens));
446
27
    conc2.push_back(lem);
447
448
27
    lem = zero.eqNode(nm->mkNode(APPLY_UF, u, zero));
449
27
    conc2.push_back(lem);
450
451
27
    lem = nm->mkNode(GT, lens, zero);
452
27
    conc2.push_back(lem);
453
454
54
    Node x = SkolemCache::mkIndexVar(t);
455
54
    Node xbv = nm->mkNode(BOUND_VAR_LIST, x);
456
54
    Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, lens));
457
54
    Node sx = nm->mkNode(STRING_SUBSTR, s, x, one);
458
54
    Node ux = nm->mkNode(APPLY_UF, u, x);
459
54
    Node ux1 = nm->mkNode(APPLY_UF, u, nm->mkNode(PLUS, x, one));
460
54
    Node c = nm->mkNode(MINUS, nm->mkNode(STRING_TO_CODE, sx), c0);
461
462
54
    Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux)));
463
54
    Node cb = nm->mkNode(AND, nm->mkNode(GEQ, c, zero), nm->mkNode(LT, c, ten));
464
465
54
    Node ux1lem = nm->mkNode(GEQ, stoit, ux1);
466
467
27
    lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eq, cb, ux1lem));
468
27
    lem = utils::mkForallInternal(xbv, lem);
469
27
    conc2.push_back(lem);
470
471
54
    Node sneg = nm->mkNode(LT, stoit, zero);
472
27
    lem = nm->mkNode(ITE, sneg, nm->mkNode(AND, conc1), nm->mkNode(AND, conc2));
473
27
    asserts.push_back(lem);
474
475
    // assert:
476
    // IF stoit < 0
477
    // THEN:
478
    //   stoit = -1 ^
479
    //   ( s = "" OR
480
    //     ( k>=0 ^ k<len( s ) ^ ( str.code( str.at(s, k) ) < 48 OR
481
    //                             str.code( str.at(s, k) ) >= 58 )))
482
    // ELSE:
483
    //   stoit = U( len( s ) ) ^ U( 0 ) = 0 ^
484
    //   str.len( s ) > 0 ^
485
    //   forall x. (x>=0 ^ x < str.len(s)) =>
486
    //     U( x+1 ) = ( str.code( str.at(s, x) ) - 48 ) + 10*U( x ) ^
487
    //     48 <= str.code( str.at(s, x) ) < 58 ^
488
    //     stoit >= U( x+1 )
489
    // Thus, str.to.int( s ) = stoit
490
    //
491
    // Note: The conjunct `stoit >= U( x+1 )` is not needed for correctness but
492
    // is just an optimization.
493
494
27
    retNode = stoit;
495
  }
496
2283
  else if (t.getKind() == kind::SEQ_NTH)
497
  {
498
    // processing term:  str.nth( s, n)
499
    // similar to substr.
500
112
    Node s = t[0];
501
112
    Node n = t[1];
502
112
    Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst");
503
112
    Node t12 = nm->mkNode(PLUS, n, one);
504
112
    Node lt0 = nm->mkNode(STRING_LENGTH, s);
505
    // start point is greater than or equal zero
506
112
    Node c1 = nm->mkNode(GEQ, n, zero);
507
    // start point is less than end of string
508
112
    Node c2 = nm->mkNode(GT, lt0, n);
509
    // check whether this application of seq.nth is defined.
510
112
    Node cond = nm->mkNode(AND, c1, c2);
511
512
    // nodes for the case where `seq.nth` is defined.
513
112
    Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre");
514
112
    Node sk2 = sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr");
515
112
    Node unit = nm->mkNode(SEQ_UNIT, skt);
516
112
    Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, unit, sk2));
517
    // length of first skolem is second argument
518
112
    Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n);
519
112
    Node lsk2 = nm->mkNode(STRING_LENGTH, sk2);
520
112
    Node b13 = nm->mkNode(EQUAL, lsk2, nm->mkNode(MINUS, lt0, t12));
521
112
    Node b1 = nm->mkNode(AND, b11, b12, b13);
522
523
    // nodes for the case where `seq.nth` is undefined.
524
112
    Node uf = SkolemCache::mkSkolemSeqNth(s.getType(), "Uf");
525
112
    Node b2 = nm->mkNode(EQUAL, skt, nm->mkNode(APPLY_UF, uf, s, n));
526
527
    // the full ite, split on definedness of `seq.nth`
528
112
    Node lemma = nm->mkNode(ITE, cond, b1, b2);
529
530
    // assert:
531
    // IF    n >=0 AND n < len( s )
532
    // THEN: s = sk1 ++ unit(skt) ++ sk2 AND
533
    //       len( sk1 ) = n AND
534
    //       ( len( sk2 ) = len( s )- (n+1)
535
    // ELSE: skt = Uf(s, n), where Uf is a cached skolem function.
536
56
    asserts.push_back(lemma);
537
56
    retNode = skt;
538
  }
539
2227
  else if (t.getKind() == kind::STRING_REPLACE)
540
  {
541
    // processing term: replace( x, y, z )
542
502
    Node x = t[0];
543
502
    Node y = t[1];
544
502
    Node z = t[2];
545
502
    TypeNode tn = t[0].getType();
546
    Node rp1 =
547
502
        sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_PRE, "rfcpre");
548
    Node rp2 =
549
502
        sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_POST, "rfcpost");
550
502
    Node rpw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpw");
551
552
    // y = ""
553
502
    Node emp = Word::mkEmptyWord(tn);
554
502
    Node cond1 = y.eqNode(emp);
555
    // rpw = str.++( z, x )
556
502
    Node c1 = rpw.eqNode(nm->mkNode(kind::STRING_CONCAT, z, x));
557
558
    // contains( x, y )
559
502
    Node cond2 = nm->mkNode(kind::STRING_CONTAINS, x, y);
560
    // x = str.++( rp1, y, rp2 )
561
502
    Node c21 = x.eqNode(nm->mkNode(kind::STRING_CONCAT, rp1, y, rp2));
562
    // rpw = str.++( rp1, z, rp2 )
563
502
    Node c22 = rpw.eqNode(nm->mkNode(kind::STRING_CONCAT, rp1, z, rp2));
564
    // ~contains( str.++( rp1, substr( y, 0, len(y)-1 ) ), y )
565
    Node c23 =
566
1004
        nm->mkNode(kind::STRING_CONTAINS,
567
502
                   nm->mkNode(
568
                       kind::STRING_CONCAT,
569
                       rp1,
570
502
                       nm->mkNode(kind::STRING_SUBSTR,
571
                                  y,
572
                                  zero,
573
1004
                                  nm->mkNode(kind::MINUS,
574
502
                                             nm->mkNode(kind::STRING_LENGTH, y),
575
                                             one))),
576
                   y)
577
502
            .negate();
578
579
    // assert:
580
    //   IF    y=""
581
    //   THEN: rpw = str.++( z, x )
582
    //   ELIF: contains( x, y )
583
    //   THEN: x = str.++( rp1, y, rp2 ) ^
584
    //         rpw = str.++( rp1, z, rp2 ) ^
585
    //         ~contains( str.++( rp1, substr( y, 0, len(y)-1 ) ), y ),
586
    //   ELSE: rpw = x
587
    // for fresh rp1, rp2, rpw
588
    Node rr = nm->mkNode(kind::ITE,
589
                         cond1,
590
                         c1,
591
1004
                         nm->mkNode(kind::ITE,
592
                                    cond2,
593
502
                                    nm->mkNode(kind::AND, c21, c22, c23),
594
1004
                                    rpw.eqNode(x)));
595
251
    asserts.push_back(rr);
596
597
    // Thus, replace( x, y, z ) = rpw.
598
251
    retNode = rpw;
599
  }
600
1976
  else if (t.getKind() == kind::STRING_REPLACE_ALL)
601
  {
602
    // processing term: replaceall( x, y, z )
603
28
    Node x = t[0];
604
28
    Node y = t[1];
605
28
    Node z = t[2];
606
28
    Node rpaw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpaw");
607
608
    Node numOcc = sc->mkTypedSkolemCached(
609
28
        nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc");
610
28
    std::vector<TypeNode> argTypes;
611
14
    argTypes.push_back(nm->integerType());
612
    Node us =
613
28
        sm->mkDummySkolem("Us", nm->mkFunctionType(argTypes, nm->stringType()));
614
28
    TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType());
615
    Node uf = sc->mkTypedSkolemCached(
616
28
        ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf");
617
618
28
    Node ufno = nm->mkNode(APPLY_UF, uf, numOcc);
619
28
    Node usno = nm->mkNode(APPLY_UF, us, numOcc);
620
28
    Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x));
621
622
28
    std::vector<Node> lem;
623
14
    lem.push_back(nm->mkNode(GEQ, numOcc, zero));
624
14
    lem.push_back(rpaw.eqNode(nm->mkNode(APPLY_UF, us, zero)));
625
14
    lem.push_back(usno.eqNode(rem));
626
14
    lem.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero));
627
14
    lem.push_back(nm->mkNode(STRING_INDEXOF, x, y, ufno).eqNode(negOne));
628
629
28
    Node i = SkolemCache::mkIndexVar(t);
630
28
    Node bvli = nm->mkNode(BOUND_VAR_LIST, i);
631
    Node bound =
632
28
        nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc));
633
28
    Node ufi = nm->mkNode(APPLY_UF, uf, i);
634
28
    Node ufip1 = nm->mkNode(APPLY_UF, uf, nm->mkNode(PLUS, i, one));
635
28
    Node ii = nm->mkNode(STRING_INDEXOF, x, y, ufi);
636
    Node cc = nm->mkNode(
637
        STRING_CONCAT,
638
28
        nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)),
639
        z,
640
56
        nm->mkNode(APPLY_UF, us, nm->mkNode(PLUS, i, one)));
641
642
28
    std::vector<Node> flem;
643
14
    flem.push_back(ii.eqNode(negOne).negate());
644
14
    flem.push_back(nm->mkNode(APPLY_UF, us, i).eqNode(cc));
645
14
    flem.push_back(
646
28
        ufip1.eqNode(nm->mkNode(PLUS, ii, nm->mkNode(STRING_LENGTH, y))));
647
648
28
    Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem));
649
28
    Node q = utils::mkForallInternal(bvli, body);
650
14
    lem.push_back(q);
651
652
    // assert:
653
    //   IF y=""
654
    //   THEN: rpaw = x
655
    //   ELSE:
656
    //     numOcc >= 0 ^
657
    //     rpaw = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc), len(x)) ^
658
    //     Uf(0) = 0 ^ indexof( x, y, Uf(numOcc) ) = -1 ^
659
    //     forall i. 0 <= i < numOcc =>
660
    //        ii != -1 ^
661
    //        Us( i ) = str.substr( x, Uf(i), ii - Uf(i) ) ++ z ++ Us(i+1) ^
662
    //        Uf( i+1 ) = ii + len(y)
663
    //        where ii == indexof( x, y, Uf(i) )
664
665
    // Conceptually, numOcc is the number of occurrences of y in x, Uf( i ) is
666
    // the index to begin searching in x for y after the i^th occurrence of y in
667
    // x, and Us( i ) is the result of processing the remainder after processing
668
    // the i^th occurrence of y in x.
669
28
    Node emp = Word::mkEmptyWord(t.getType());
670
    Node assert =
671
28
        nm->mkNode(ITE, y.eqNode(emp), rpaw.eqNode(x), nm->mkNode(AND, lem));
672
14
    asserts.push_back(assert);
673
674
    // Thus, replaceall( x, y, z ) = rpaw
675
14
    retNode = rpaw;
676
  }
677
1962
  else if (t.getKind() == STRING_REPLACE_RE)
678
  {
679
    // processing term: replace_re( x, y, z )
680
78
    Node x = t[0];
681
78
    Node y = t[1];
682
78
    Node z = t[2];
683
78
    Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k");
684
685
    // indexof_re(x, y, 0)
686
78
    Node idx = nm->mkNode(STRING_INDEXOF_RE, x, y, zero);
687
688
    // in_re("", y)
689
    Node matchesEmpty =
690
78
        nm->mkNode(STRING_IN_REGEXP, nm->mkConst(String("")), y);
691
    // k = z ++ x
692
78
    Node res1 = k.eqNode(nm->mkNode(STRING_CONCAT, z, x));
693
694
    Node k1 =
695
78
        sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_PRE, "rre_pre");
696
    Node k2 =
697
78
        sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match");
698
    Node k3 =
699
78
        sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post");
700
78
    Node k2Len = nm->mkNode(STRING_LENGTH, k2);
701
    // x = k1 ++ k2 ++ k3
702
78
    Node split = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3));
703
    // len(k1) = indexof_re(x, y, 0)
704
78
    Node k1Len = nm->mkNode(STRING_LENGTH, k1).eqNode(idx);
705
78
    Node l = SkolemCache::mkLengthVar(t);
706
78
    Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
707
    Node bound =
708
78
        nm->mkNode(AND, nm->mkNode(LEQ, zero, l), nm->mkNode(LT, l, k2Len));
709
    Node body = nm->mkNode(
710
        OR,
711
78
        bound.negate(),
712
78
        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, k2, zero, l), y)
713
156
            .negate());
714
    // forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)
715
78
    Node shortestMatch = utils::mkForallInternal(bvll, body);
716
    // in_re(k2, y)
717
78
    Node match = nm->mkNode(STRING_IN_REGEXP, k2, y);
718
    // k = k1 ++ z ++ k3
719
78
    Node res = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3));
720
721
    // IF: indexof_re(x, y, 0) = -1
722
    // THEN: k = x
723
    // ELSE:
724
    //   x = k1 ++ k2 ++ k3 ^
725
    //   len(k1) = indexof_re(x, y, 0) ^
726
    //   (forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)) ^
727
    //   in_re(k2, y) ^
728
    //   k = k1 ++ z ++ k3
729
39
    asserts.push_back(
730
234
        nm->mkNode(ITE,
731
78
                   idx.eqNode(negOne),
732
78
                   k.eqNode(x),
733
78
                   nm->mkNode(AND, {split, k1Len, shortestMatch, match, res})));
734
39
    retNode = k;
735
  }
736
1923
  else if (t.getKind() == STRING_REPLACE_RE_ALL)
737
  {
738
50
    Node x = t[0];
739
50
    Node y = t[1];
740
50
    Node z = t[2];
741
50
    Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k");
742
743
    Node numOcc = sc->mkTypedSkolemCached(
744
50
        nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc");
745
50
    std::vector<TypeNode> argTypes;
746
25
    argTypes.push_back(nm->integerType());
747
    Node us =
748
50
        sm->mkDummySkolem("Us", nm->mkFunctionType(argTypes, t.getType()));
749
50
    TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType());
750
    Node uf = sc->mkTypedSkolemCached(
751
50
        ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf");
752
    Node ul =
753
50
        sc->mkTypedSkolemCached(ufType, x, y, SkolemCache::SK_OCCUR_LEN, "Ul");
754
755
50
    Node emp = Word::mkEmptyWord(t.getType());
756
757
50
    Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp));
758
    // indexof_re(x, y', 0) = -1
759
50
    Node noMatch = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero).eqNode(negOne);
760
761
50
    Node ufno = nm->mkNode(APPLY_UF, uf, numOcc);
762
50
    Node usno = nm->mkNode(APPLY_UF, us, numOcc);
763
50
    Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x));
764
765
50
    std::vector<Node> lemmas;
766
    // numOcc > 0
767
25
    lemmas.push_back(nm->mkNode(GT, numOcc, zero));
768
    // k = Us(0)
769
25
    lemmas.push_back(k.eqNode(nm->mkNode(APPLY_UF, us, zero)));
770
    // Us(numOcc) = substr(x, Uf(numOcc))
771
25
    lemmas.push_back(usno.eqNode(rem));
772
    // Uf(0) = 0
773
25
    lemmas.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero));
774
    // indexof_re(substr(x, Uf(numOcc)), y', 0) = -1
775
25
    lemmas.push_back(
776
50
        nm->mkNode(STRING_INDEXOF_RE, rem, yp, zero).eqNode(negOne));
777
778
50
    Node i = SkolemCache::mkIndexVar(t);
779
50
    Node bvli = nm->mkNode(BOUND_VAR_LIST, i);
780
    Node bound =
781
50
        nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc));
782
50
    Node ip1 = nm->mkNode(PLUS, i, one);
783
50
    Node ufi = nm->mkNode(APPLY_UF, uf, i);
784
50
    Node ufip1 = nm->mkNode(APPLY_UF, uf, ip1);
785
50
    Node ulip1 = nm->mkNode(APPLY_UF, ul, ip1);
786
    // ii = Uf(i + 1) - Ul(i + 1)
787
50
    Node ii = nm->mkNode(MINUS, ufip1, ulip1);
788
789
50
    std::vector<Node> flem;
790
    // Ul(i + 1) > 0
791
25
    flem.push_back(nm->mkNode(GT, ulip1, zero));
792
    // Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1)
793
25
    flem.push_back(ufip1.eqNode(
794
50
        nm->mkNode(PLUS, nm->mkNode(STRING_INDEXOF_RE, x, yp, ufi), ulip1)));
795
    // in_re(substr(x, ii, Ul(i + 1)), y')
796
75
    flem.push_back(nm->mkNode(
797
50
        STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, ulip1), yp));
798
50
    Node l = SkolemCache::mkLengthVar(t);
799
50
    Node bvll = nm->mkNode(BOUND_VAR_LIST, l);
800
    Node lenBound =
801
50
        nm->mkNode(AND, nm->mkNode(LT, zero, l), nm->mkNode(LT, l, ulip1));
802
    Node shortestMatchBody = nm->mkNode(
803
        OR,
804
50
        lenBound.negate(),
805
50
        nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, l), y)
806
100
            .negate());
807
    // forall l. 0 < l < Ul(i + 1) =>
808
    //   ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y')
809
25
    flem.push_back(utils::mkForallInternal(bvll, shortestMatchBody));
810
    Node pfxMatch =
811
50
        nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi));
812
    // Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
813
25
    flem.push_back(
814
50
        nm->mkNode(APPLY_UF, us, i)
815
50
            .eqNode(nm->mkNode(
816
50
                STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1))));
817
50
    Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem));
818
50
    Node forall = utils::mkForallInternal(bvli, body);
819
25
    lemmas.push_back(forall);
820
821
    // IF indexof(x, y', 0) = -1
822
    // THEN: k = x
823
    // ELSE:
824
    //   numOcc > 0 ^
825
    //   k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^
826
    //   Uf(0) = 0 ^ indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 ^
827
    //   forall i. 0 <= i < nummOcc =>
828
    //     Ul(i + 1) > 0 ^
829
    //     Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) ^
830
    //     in_re(substr(x, ii, Ul(i + 1)), y') ^
831
    //     forall l. 0 < l < Ul(i + 1) =>
832
    //       ~in_re(substr(x, ii, l), y')
833
    //     Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1)
834
    //     where ii = Uf(i + 1) - Ul(i + 1)
835
    // where y' = re.diff(y, "")
836
    //
837
    // Conceptually, y' is the regex y but excluding the empty string (because
838
    // we do not want to match empty strings), numOcc is the number of shortest
839
    // matches of y' in x, Uf(i) is the end position of the i-th match, Ul(i)
840
    // is the length of the i^th match, and Us(i) is the result of processing
841
    // the remainder after processing the i^th occurrence of y in x.
842
    //
843
    // Visualization of Uf(i) and Ul(i):
844
    //
845
    // x = |------------| match 1 |-----------| match 2   |---|
846
    //     |                      |                       |
847
    //     Uf(0)                  Uf(1)                   Uf(2)
848
    //
849
    //                  |---------|           |-----------|
850
    //                    Ul(1)                 Ul(2)
851
25
    asserts.push_back(
852
50
        nm->mkNode(ITE, noMatch, k.eqNode(x), nm->mkNode(AND, lemmas)));
853
25
    retNode = k;
854
  }
855
1898
  else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER)
856
  {
857
12
    Node x = t[0];
858
12
    Node r = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r");
859
860
12
    Node lenx = nm->mkNode(STRING_LENGTH, x);
861
12
    Node lenr = nm->mkNode(STRING_LENGTH, r);
862
12
    Node eqLenA = lenx.eqNode(lenr);
863
864
12
    Node i = SkolemCache::mkIndexVar(t);
865
12
    Node bvi = nm->mkNode(BOUND_VAR_LIST, i);
866
867
12
    Node ci = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, x, i, one));
868
12
    Node ri = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, r, i, one));
869
870
12
    Node lb = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 97 : 65));
871
12
    Node ub = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 122 : 90));
872
    Node offset =
873
12
        nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? -32 : 32));
874
875
    Node res = nm->mkNode(
876
        ITE,
877
12
        nm->mkNode(AND, nm->mkNode(LEQ, lb, ci), nm->mkNode(LEQ, ci, ub)),
878
12
        nm->mkNode(PLUS, ci, offset),
879
36
        ci);
880
881
    Node bound =
882
12
        nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr));
883
12
    Node body = nm->mkNode(OR, bound.negate(), ri.eqNode(res));
884
12
    Node rangeA = utils::mkForallInternal(bvi, body);
885
886
    // upper 65 ... 90
887
    // lower 97 ... 122
888
    // assert:
889
    //   len(r) = len(x) ^
890
    //   forall i. 0 <= i < len(r) =>
891
    //     str.code( str.substr(r,i,1) ) = ite( 97 <= ci <= 122, ci-32, ci)
892
    // where ci = str.code( str.substr(x,i,1) )
893
12
    Node assert = nm->mkNode(AND, eqLenA, rangeA);
894
6
    asserts.push_back(assert);
895
896
    // Thus, toLower( x ) = r
897
6
    retNode = r;
898
  }
899
1892
  else if (t.getKind() == STRING_REV)
900
  {
901
36
    Node x = t[0];
902
36
    Node r = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r");
903
904
36
    Node lenx = nm->mkNode(STRING_LENGTH, x);
905
36
    Node lenr = nm->mkNode(STRING_LENGTH, r);
906
36
    Node eqLenA = lenx.eqNode(lenr);
907
908
36
    Node i = SkolemCache::mkIndexVar(t);
909
36
    Node bvi = nm->mkNode(BOUND_VAR_LIST, i);
910
911
    Node revi = nm->mkNode(
912
36
        MINUS, nm->mkNode(STRING_LENGTH, x), nm->mkNode(PLUS, i, one));
913
36
    Node ssr = nm->mkNode(STRING_SUBSTR, r, i, one);
914
36
    Node ssx = nm->mkNode(STRING_SUBSTR, x, revi, one);
915
916
    Node bound =
917
36
        nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr));
918
36
    Node body = nm->mkNode(OR, bound.negate(), ssr.eqNode(ssx));
919
36
    Node rangeA = utils::mkForallInternal(bvi, body);
920
    // assert:
921
    //   len(r) = len(x) ^
922
    //   forall i. 0 <= i < len(r) =>
923
    //     substr(r,i,1) = substr(x,len(x)-(i+1),1)
924
36
    Node assert = nm->mkNode(AND, eqLenA, rangeA);
925
18
    asserts.push_back(assert);
926
927
    // Thus, (str.rev x) = r
928
18
    retNode = r;
929
  }
930
1874
  else if (t.getKind() == kind::STRING_CONTAINS)
931
  {
932
256
    Node x = t[0];
933
256
    Node s = t[1];
934
    //negative contains reduces to existential
935
256
    Node lenx = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x);
936
256
    Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s);
937
256
    Node b1 = SkolemCache::mkIndexVar(t);
938
256
    Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1);
939
    Node body = NodeManager::currentNM()->mkNode(
940
        kind::AND,
941
256
        NodeManager::currentNM()->mkNode(kind::LEQ, zero, b1),
942
384
        NodeManager::currentNM()->mkNode(
943
            kind::LEQ,
944
            b1,
945
256
            NodeManager::currentNM()->mkNode(kind::MINUS, lenx, lens)),
946
512
        NodeManager::currentNM()->mkNode(
947
            kind::EQUAL,
948
256
            NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, x, b1, lens),
949
896
            s));
950
128
    retNode = utils::mkForallInternal(b1v, body.negate()).negate();
951
  }
952
1746
  else if (t.getKind() == kind::STRING_LEQ)
953
  {
954
    Node ltp = sc->mkTypedSkolemCached(
955
72
        nm->booleanType(), t, SkolemCache::SK_PURIFY, "ltp");
956
72
    Node k = SkolemCache::mkIndexVar(t);
957
958
72
    std::vector<Node> conj;
959
36
    conj.push_back(nm->mkNode(GEQ, k, zero));
960
72
    Node substr[2];
961
72
    Node code[2];
962
108
    for (unsigned r = 0; r < 2; r++)
963
    {
964
144
      Node ta = t[r];
965
144
      Node tb = t[1 - r];
966
72
      substr[r] = nm->mkNode(STRING_SUBSTR, ta, zero, k);
967
72
      code[r] =
968
144
          nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, ta, k, one));
969
72
      conj.push_back(nm->mkNode(LEQ, k, nm->mkNode(STRING_LENGTH, ta)));
970
    }
971
36
    conj.push_back(substr[0].eqNode(substr[1]));
972
72
    std::vector<Node> ite_ch;
973
36
    ite_ch.push_back(ltp);
974
108
    for (unsigned r = 0; r < 2; r++)
975
    {
976
72
      ite_ch.push_back(nm->mkNode(LT, code[r], code[1 - r]));
977
    }
978
36
    conj.push_back(nm->mkNode(ITE, ite_ch));
979
980
72
    Node conjn = utils::mkForallInternal(nm->mkNode(BOUND_VAR_LIST, k),
981
72
                                         nm->mkNode(AND, conj).negate())
982
72
                     .negate();
983
    // Intuitively, the reduction says either x and y are equal, or they have
984
    // some (maximal) common prefix after which their characters at position k
985
    // are distinct, and the comparison of their code matches the return value
986
    // of the overall term.
987
    // Notice the below reduction relies on the semantics of str.code being -1
988
    // for the empty string. In particular, say x = "a" and y = "ab", then the
989
    // reduction below is satisfied for k = 1, since
990
    //   str.code(substr( "a", 1, 1 )) = str.code( "" ) = -1 <
991
    //   str.code(substr( "ab", 1, 1 )) = str.code( "b" ) = 66
992
993
    // assert:
994
    //  IF x=y
995
    //  THEN: ltp
996
    //  ELSE: exists k.
997
    //        k >= 0 AND k <= len( x ) AND k <= len( y ) AND
998
    //        substr( x, 0, k ) = substr( y, 0, k ) AND
999
    //        IF    ltp
1000
    //        THEN: str.code(substr( x, k, 1 )) < str.code(substr( y, k, 1 ))
1001
    //        ELSE: str.code(substr( x, k, 1 )) > str.code(substr( y, k, 1 ))
1002
72
    Node assert = nm->mkNode(ITE, t[0].eqNode(t[1]), ltp, conjn);
1003
36
    asserts.push_back(assert);
1004
1005
    // Thus, str.<=( x, y ) = ltp
1006
36
    retNode = ltp;
1007
  }
1008
8556
  return retNode;
1009
}
1010
1011
3738
Node StringsPreprocess::simplify(Node t, std::vector<Node>& asserts)
1012
{
1013
3738
  size_t prev_asserts = asserts.size();
1014
  // call the static reduce routine
1015
3738
  Node retNode = reduce(t, asserts, d_sc);
1016
3738
  if( t!=retNode ){
1017
2028
    Trace("strings-preprocess") << "StringsPreprocess::simplify: " << t << " -> " << retNode << std::endl;
1018
2028
    if (!asserts.empty())
1019
    {
1020
3854
      Trace("strings-preprocess")
1021
3854
          << " ... new nodes (" << (asserts.size() - prev_asserts)
1022
1927
          << "):" << std::endl;
1023
3835
      for (size_t i = prev_asserts; i < asserts.size(); ++i)
1024
      {
1025
1908
        Trace("strings-preprocess") << "   " << asserts[i] << std::endl;
1026
      }
1027
    }
1028
2028
    if (d_statReductions != nullptr)
1029
    {
1030
1932
      (*d_statReductions) << t.getKind();
1031
    }
1032
  }
1033
  else
1034
  {
1035
3420
    Trace("strings-preprocess-debug")
1036
1710
        << "Return " << retNode << " unchanged" << std::endl;
1037
  }
1038
3738
  return retNode;
1039
}
1040
1041
3016
Node StringsPreprocess::simplifyRec(Node t, std::vector<Node>& asserts)
1042
{
1043
3016
  std::map<Node, Node>::iterator it = d_visited.find(t);
1044
3016
  if (it != d_visited.end())
1045
  {
1046
1202
    return it->second;
1047
  }else{
1048
3628
    Node retNode = t;
1049
1814
    if( t.getNumChildren()==0 ){
1050
448
      retNode = simplify(t, asserts);
1051
    }
1052
1366
    else if (!t.isClosure())
1053
    {
1054
1358
      bool changed = false;
1055
2716
      std::vector< Node > cc;
1056
1358
      if( t.getMetaKind() == kind::metakind::PARAMETERIZED ){
1057
16
        cc.push_back( t.getOperator() );
1058
      }
1059
4207
      for(unsigned i=0; i<t.getNumChildren(); i++) {
1060
5698
        Node s = simplifyRec(t[i], asserts);
1061
2849
        cc.push_back( s );
1062
2849
        if( s!=t[i] ) {
1063
237
          changed = true;
1064
        }
1065
      }
1066
2716
      Node tmp = t;
1067
1358
      if( changed ){
1068
206
        tmp = NodeManager::currentNM()->mkNode( t.getKind(), cc );
1069
      }
1070
1358
      retNode = simplify(tmp, asserts);
1071
    }
1072
1814
    d_visited[t] = retNode;
1073
1814
    return retNode;
1074
  }
1075
}
1076
1077
101
Node StringsPreprocess::processAssertion(Node n, std::vector<Node>& asserts)
1078
{
1079
202
  std::vector<Node> asserts_curr;
1080
101
  Node ret = simplifyRec(n, asserts_curr);
1081
233
  while (!asserts_curr.empty())
1082
  {
1083
132
    Node curr = asserts_curr.back();
1084
66
    asserts_curr.pop_back();
1085
132
    std::vector<Node> asserts_tmp;
1086
66
    curr = simplifyRec(curr, asserts_tmp);
1087
66
    asserts_curr.insert(
1088
132
        asserts_curr.end(), asserts_tmp.begin(), asserts_tmp.end());
1089
66
    asserts.push_back(curr);
1090
  }
1091
202
  return ret;
1092
}
1093
1094
}  // namespace strings
1095
}  // namespace theory
1096
29340
}  // namespace cvc5