1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Tianyi Liang |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Strings Preprocess. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/theory_strings_preprocess.h" |
17 |
|
|
18 |
|
#include "expr/kind.h" |
19 |
|
#include "expr/skolem_manager.h" |
20 |
|
#include "options/smt_options.h" |
21 |
|
#include "options/strings_options.h" |
22 |
|
#include "smt/logic_exception.h" |
23 |
|
#include "theory/strings/arith_entail.h" |
24 |
|
#include "theory/strings/sequences_rewriter.h" |
25 |
|
#include "theory/strings/theory_strings_utils.h" |
26 |
|
#include "theory/strings/word.h" |
27 |
|
#include "util/rational.h" |
28 |
|
#include "util/statistics_registry.h" |
29 |
|
#include "util/string.h" |
30 |
|
|
31 |
|
using namespace cvc5; |
32 |
|
using namespace cvc5::kind; |
33 |
|
|
34 |
|
namespace cvc5 { |
35 |
|
namespace theory { |
36 |
|
namespace strings { |
37 |
|
|
38 |
15313 |
StringsPreprocess::StringsPreprocess(SkolemCache* sc, |
39 |
15313 |
HistogramStat<Kind>* statReductions) |
40 |
15313 |
: d_sc(sc), d_statReductions(statReductions) |
41 |
|
{ |
42 |
15313 |
} |
43 |
|
|
44 |
15308 |
StringsPreprocess::~StringsPreprocess(){ |
45 |
|
|
46 |
15308 |
} |
47 |
|
|
48 |
4560 |
Node StringsPreprocess::reduce(Node t, |
49 |
|
std::vector<Node>& asserts, |
50 |
|
SkolemCache* sc) |
51 |
|
{ |
52 |
9120 |
Trace("strings-preprocess-debug") |
53 |
4560 |
<< "StringsPreprocess::reduce: " << t << std::endl; |
54 |
4560 |
Node retNode = t; |
55 |
4560 |
NodeManager* nm = NodeManager::currentNM(); |
56 |
9120 |
Node zero = nm->mkConst(Rational(0)); |
57 |
9120 |
Node one = nm->mkConst(Rational(1)); |
58 |
9120 |
Node negOne = nm->mkConst(Rational(-1)); |
59 |
|
|
60 |
4560 |
if( t.getKind() == kind::STRING_SUBSTR ) { |
61 |
|
// processing term: substr( s, n, m ) |
62 |
3646 |
Node s = t[0]; |
63 |
3646 |
Node n = t[1]; |
64 |
3646 |
Node m = t[2]; |
65 |
3646 |
Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); |
66 |
3646 |
Node t12 = nm->mkNode(PLUS, n, m); |
67 |
3646 |
Node lt0 = nm->mkNode(STRING_LENGTH, s); |
68 |
|
//start point is greater than or equal zero |
69 |
3646 |
Node c1 = nm->mkNode(GEQ, n, zero); |
70 |
|
//start point is less than end of string |
71 |
3646 |
Node c2 = nm->mkNode(GT, lt0, n); |
72 |
|
//length is positive |
73 |
3646 |
Node c3 = nm->mkNode(GT, m, zero); |
74 |
3646 |
Node cond = nm->mkNode(AND, c1, c2, c3); |
75 |
|
|
76 |
3646 |
Node emp = Word::mkEmptyWord(t.getType()); |
77 |
|
|
78 |
3646 |
Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre"); |
79 |
3646 |
Node sk2 = sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); |
80 |
3646 |
Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, skt, sk2)); |
81 |
|
//length of first skolem is second argument |
82 |
3646 |
Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n); |
83 |
3646 |
Node lsk2 = nm->mkNode(STRING_LENGTH, sk2); |
84 |
|
// Length of the suffix is either 0 (in the case where m + n > len(s)) or |
85 |
|
// len(s) - n -m |
86 |
|
Node b13 = nm->mkNode( |
87 |
|
OR, |
88 |
3646 |
nm->mkNode(EQUAL, lsk2, nm->mkNode(MINUS, lt0, nm->mkNode(PLUS, n, m))), |
89 |
7292 |
nm->mkNode(EQUAL, lsk2, zero)); |
90 |
|
// Length of the result is at most m |
91 |
3646 |
Node b14 = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, skt), m); |
92 |
|
|
93 |
3646 |
Node b1 = nm->mkNode(AND, {b11, b12, b13, b14}); |
94 |
3646 |
Node b2 = skt.eqNode(emp); |
95 |
3646 |
Node lemma = nm->mkNode(ITE, cond, b1, b2); |
96 |
|
|
97 |
|
// assert: |
98 |
|
// IF n >=0 AND len( s ) > n AND m > 0 |
99 |
|
// THEN: s = sk1 ++ skt ++ sk2 AND |
100 |
|
// len( sk1 ) = n AND |
101 |
|
// ( len( sk2 ) = len( s )-(n+m) OR len( sk2 ) = 0 ) AND |
102 |
|
// len( skt ) <= m |
103 |
|
// ELSE: skt = "" |
104 |
|
// |
105 |
|
// Note: The length of sk2 is either 0 (if the n + m is greater or equal to |
106 |
|
// the length of s) or len(s) - (n + m) otherwise. If n + m is greater or |
107 |
|
// equal to the length of s, then len(s) - (n + m) is negative and in |
108 |
|
// conflict with lengths always being positive, so only len(sk2) = 0 may be |
109 |
|
// satisfied. If n + m is less than the length of s, then len(sk2) = 0 |
110 |
|
// cannot be satisfied because we have the constraint that len(skt) <= m, |
111 |
|
// so sk2 must be greater than 0. |
112 |
1823 |
asserts.push_back(lemma); |
113 |
|
|
114 |
|
// Thus, substr( s, n, m ) = skt |
115 |
1823 |
retNode = skt; |
116 |
|
} |
117 |
2737 |
else if (t.getKind() == kind::STRING_UPDATE) |
118 |
|
{ |
119 |
|
// processing term: update( s, n, r ) |
120 |
48 |
Node s = t[0]; |
121 |
48 |
Node n = t[1]; |
122 |
48 |
Node r = t[2]; |
123 |
48 |
Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); |
124 |
48 |
Node ls = nm->mkNode(STRING_LENGTH, s); |
125 |
|
// start point is greater than or equal zero |
126 |
48 |
Node c1 = nm->mkNode(GEQ, n, zero); |
127 |
|
// start point is less than end of string |
128 |
48 |
Node c2 = nm->mkNode(GT, ls, n); |
129 |
48 |
Node cond = nm->mkNode(AND, c1, c2); |
130 |
|
|
131 |
|
// substr(r,0,|s|-n) |
132 |
48 |
Node lens = nm->mkNode(STRING_LENGTH, s); |
133 |
48 |
Node rs; |
134 |
24 |
if (r.isConst() && Word::getLength(r) == 1) |
135 |
|
{ |
136 |
|
// optimization: don't need to take substring for single characters, due |
137 |
|
// to guard on where it is used in the reduction below. |
138 |
9 |
rs = r; |
139 |
|
} |
140 |
|
else |
141 |
|
{ |
142 |
15 |
rs = nm->mkNode(STRING_SUBSTR, r, zero, nm->mkNode(MINUS, lens, n)); |
143 |
|
} |
144 |
48 |
Node rslen = nm->mkNode(STRING_LENGTH, rs); |
145 |
48 |
Node nsuf = nm->mkNode(PLUS, n, rslen); |
146 |
|
// substr(s, n, len(substr(r,0,|s|-n))), which is used for formalizing the |
147 |
|
// purification variable sk3 below. |
148 |
48 |
Node ssubstr = nm->mkNode(STRING_SUBSTR, s, n, rslen); |
149 |
|
|
150 |
48 |
Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre"); |
151 |
|
Node sk2 = |
152 |
48 |
sc->mkSkolemCached(s, nsuf, SkolemCache::SK_SUFFIX_REM, "sssufr"); |
153 |
48 |
Node sk3 = sc->mkSkolemCached(ssubstr, SkolemCache::SK_PURIFY, "ssubstr"); |
154 |
48 |
Node a1 = skt.eqNode(nm->mkNode(STRING_CONCAT, sk1, rs, sk2)); |
155 |
48 |
Node a2 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, sk3, sk2)); |
156 |
|
// length of first skolem is second argument |
157 |
48 |
Node a3 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n); |
158 |
48 |
Node a4 = nm->mkNode(STRING_LENGTH, rs).eqNode(nm->mkNode(STRING_LENGTH, sk3)); |
159 |
|
|
160 |
48 |
Node b1 = nm->mkNode(AND, {a1, a2, a3, a4}); |
161 |
48 |
Node b2 = skt.eqNode(s); |
162 |
48 |
Node lemma = nm->mkNode(ITE, cond, b1, b2); |
163 |
|
|
164 |
|
// assert: |
165 |
|
// IF n >=0 AND n < len( s ) |
166 |
|
// THEN: skt = sk1 ++ substr(r,0,len(s)-n) ++ sk2 AND |
167 |
|
// s = sk1 ++ sk3 ++ sk2 AND |
168 |
|
// len( sk1 ) = n AND |
169 |
|
// len( substr(r,0,len(s)-n) ) = len( sk3 ) |
170 |
|
// ELSE: skt = s |
171 |
|
// We use an optimization where r is used instead of substr(r,0,len(s)-n) |
172 |
|
// if r is a constant of length one. |
173 |
24 |
asserts.push_back(lemma); |
174 |
|
|
175 |
|
// Thus, str.update( s, n, r ) = skt |
176 |
24 |
retNode = skt; |
177 |
|
} |
178 |
2713 |
else if (t.getKind() == kind::STRING_INDEXOF) |
179 |
|
{ |
180 |
|
// processing term: indexof( x, y, n ) |
181 |
288 |
Node x = t[0]; |
182 |
288 |
Node y = t[1]; |
183 |
288 |
Node n = t[2]; |
184 |
|
Node skk = sc->mkTypedSkolemCached( |
185 |
288 |
nm->integerType(), t, SkolemCache::SK_PURIFY, "iok"); |
186 |
|
|
187 |
288 |
Node negone = nm->mkConst(Rational(-1)); |
188 |
|
|
189 |
|
// substr( x, n, len( x ) - n ) |
190 |
|
Node st = nm->mkNode(STRING_SUBSTR, |
191 |
|
x, |
192 |
|
n, |
193 |
288 |
nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), n)); |
194 |
|
Node io2 = |
195 |
288 |
sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_PRE, "iopre"); |
196 |
|
Node io4 = |
197 |
288 |
sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_POST, "iopost"); |
198 |
|
|
199 |
|
// ~contains( substr( x, n, len( x ) - n ), y ) |
200 |
288 |
Node c11 = nm->mkNode(STRING_CONTAINS, st, y).negate(); |
201 |
|
// n > len( x ) |
202 |
288 |
Node c12 = nm->mkNode(GT, n, nm->mkNode(STRING_LENGTH, x)); |
203 |
|
// 0 > n |
204 |
288 |
Node c13 = nm->mkNode(GT, zero, n); |
205 |
288 |
Node cond1 = nm->mkNode(OR, c11, c12, c13); |
206 |
|
// skk = -1 |
207 |
288 |
Node cc1 = skk.eqNode(negone); |
208 |
|
|
209 |
|
// y = "" |
210 |
288 |
Node emp = Word::mkEmptyWord(x.getType()); |
211 |
288 |
Node cond2 = y.eqNode(emp); |
212 |
|
// skk = n |
213 |
288 |
Node cc2 = skk.eqNode(t[2]); |
214 |
|
|
215 |
|
// substr( x, n, len( x ) - n ) = str.++( io2, y, io4 ) |
216 |
288 |
Node c31 = st.eqNode(nm->mkNode(STRING_CONCAT, io2, y, io4)); |
217 |
|
// ~contains( str.++( io2, substr( y, 0, len( y ) - 1) ), y ) |
218 |
|
Node c32 = |
219 |
576 |
nm->mkNode( |
220 |
|
STRING_CONTAINS, |
221 |
288 |
nm->mkNode( |
222 |
|
STRING_CONCAT, |
223 |
|
io2, |
224 |
288 |
nm->mkNode( |
225 |
|
STRING_SUBSTR, |
226 |
|
y, |
227 |
|
zero, |
228 |
288 |
nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, y), one))), |
229 |
|
y) |
230 |
288 |
.negate(); |
231 |
|
// skk = n + len( io2 ) |
232 |
288 |
Node c33 = skk.eqNode(nm->mkNode(PLUS, n, nm->mkNode(STRING_LENGTH, io2))); |
233 |
288 |
Node cc3 = nm->mkNode(AND, c31, c32, c33); |
234 |
|
|
235 |
|
// assert: |
236 |
|
// IF: ~contains( substr( x, n, len( x ) - n ), y ) OR n > len(x) OR 0 > n |
237 |
|
// THEN: skk = -1 |
238 |
|
// ELIF: y = "" |
239 |
|
// THEN: skk = n |
240 |
|
// ELSE: substr( x, n, len( x ) - n ) = str.++( io2, y, io4 ) ^ |
241 |
|
// ~contains( str.++( io2, substr( y, 0, len( y ) - 1) ), y ) ^ |
242 |
|
// skk = n + len( io2 ) |
243 |
|
// for fresh io2, io4. |
244 |
288 |
Node rr = nm->mkNode(ITE, cond1, cc1, nm->mkNode(ITE, cond2, cc2, cc3)); |
245 |
144 |
asserts.push_back(rr); |
246 |
|
|
247 |
|
// Thus, indexof( x, y, n ) = skk. |
248 |
144 |
retNode = skk; |
249 |
|
} |
250 |
2569 |
else if (t.getKind() == kind::STRING_INDEXOF_RE) |
251 |
|
{ |
252 |
|
// processing term: indexof_re(s, r, n) |
253 |
200 |
Node s = t[0]; |
254 |
200 |
Node r = t[1]; |
255 |
200 |
Node n = t[2]; |
256 |
|
Node skk = sc->mkTypedSkolemCached( |
257 |
200 |
nm->integerType(), t, SkolemCache::SK_PURIFY, "iork"); |
258 |
200 |
Node sLen = nm->mkNode(STRING_LENGTH, s); |
259 |
|
|
260 |
|
// n > len(x) |
261 |
200 |
Node ub = nm->mkNode(GT, n, sLen); |
262 |
|
// 0 > n |
263 |
200 |
Node lb = nm->mkNode(GT, zero, n); |
264 |
|
// n > len(x) OR 0 > n |
265 |
200 |
Node condNegOne = nm->mkNode(OR, ub, lb); |
266 |
|
// skk = -1 |
267 |
200 |
Node retNegOne = skk.eqNode(negOne); |
268 |
|
|
269 |
200 |
Node emp = Word::mkEmptyWord(s.getType()); |
270 |
|
// in_re("", r) |
271 |
200 |
Node matchEmptyStr = nm->mkNode(STRING_IN_REGEXP, emp, r); |
272 |
|
// skk = n |
273 |
200 |
Node retN = skk.eqNode(n); |
274 |
|
|
275 |
200 |
Node i = SkolemCache::mkIndexVar(t); |
276 |
200 |
Node l = SkolemCache::mkLengthVar(t); |
277 |
200 |
Node bvl = nm->mkNode(BOUND_VAR_LIST, i, l); |
278 |
|
Node bound = |
279 |
|
nm->mkNode(AND, |
280 |
|
{ |
281 |
200 |
nm->mkNode(GEQ, i, n), |
282 |
200 |
nm->mkNode(LT, i, nm->mkNode(ITE, retNegOne, sLen, skk)), |
283 |
200 |
nm->mkNode(GT, l, zero), |
284 |
200 |
nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, i)), |
285 |
500 |
}); |
286 |
|
Node body = nm->mkNode( |
287 |
|
OR, |
288 |
200 |
bound.negate(), |
289 |
200 |
nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, i, l), r) |
290 |
400 |
.negate()); |
291 |
|
// forall il. |
292 |
|
// n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i => |
293 |
|
// ~in_re(substr(s, i, l), r) |
294 |
200 |
Node firstMatch = utils::mkForallInternal(bvl, body); |
295 |
200 |
Node bvll = nm->mkNode(BOUND_VAR_LIST, l); |
296 |
|
Node validLen = |
297 |
|
nm->mkNode(AND, |
298 |
200 |
nm->mkNode(GEQ, l, zero), |
299 |
400 |
nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, skk))); |
300 |
|
Node matchBody = nm->mkNode( |
301 |
|
AND, |
302 |
|
validLen, |
303 |
200 |
nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, skk, l), r)); |
304 |
|
// skk != -1 => |
305 |
|
// exists l. (0 <= l < len(s) - skk) ^ in_re(substr(s, skk, l), r)) |
306 |
|
Node match = |
307 |
|
nm->mkNode(OR, |
308 |
|
retNegOne, |
309 |
200 |
utils::mkForallInternal(bvll, matchBody.negate()).negate()); |
310 |
|
|
311 |
|
// assert: |
312 |
|
// IF: n > len(s) OR 0 > n |
313 |
|
// THEN: skk = -1 |
314 |
|
// ELIF: in_re("", r) |
315 |
|
// THEN: skk = n |
316 |
|
// ELSE: (forall il. |
317 |
|
// n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i => |
318 |
|
// ~in_re(substr(s, i, l), r)) ^ |
319 |
|
// (skk != -1 => |
320 |
|
// exists l. 0 <= l <= len(s) - skk ^ in_re(substr(s, skk, l), r)) |
321 |
|
// |
322 |
|
// Note that this reduction relies on eager reduction lemmas being sent to |
323 |
|
// properly limit the range of skk. |
324 |
|
Node rr = nm->mkNode( |
325 |
|
ITE, |
326 |
|
condNegOne, |
327 |
|
retNegOne, |
328 |
200 |
nm->mkNode( |
329 |
400 |
ITE, matchEmptyStr, retN, nm->mkNode(AND, firstMatch, match))); |
330 |
100 |
asserts.push_back(rr); |
331 |
|
|
332 |
|
// Thus, indexof_re(s, r, n) = skk. |
333 |
100 |
retNode = skk; |
334 |
|
} |
335 |
2469 |
else if (t.getKind() == STRING_ITOS) |
336 |
|
{ |
337 |
|
// processing term: int.to.str( n ) |
338 |
76 |
Node n = t[0]; |
339 |
76 |
Node itost = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "itost"); |
340 |
76 |
Node leni = nm->mkNode(STRING_LENGTH, itost); |
341 |
|
|
342 |
76 |
std::vector<Node> conc; |
343 |
76 |
std::vector< TypeNode > argTypes; |
344 |
38 |
argTypes.push_back(nm->integerType()); |
345 |
76 |
TypeNode itosResType = nm->mkFunctionType(argTypes, nm->integerType()); |
346 |
76 |
Node u = sc->mkSkolemFun(SkolemFunId::STRINGS_ITOS_RESULT, itosResType, t); |
347 |
|
|
348 |
76 |
Node lem = nm->mkNode(GEQ, leni, one); |
349 |
38 |
conc.push_back(lem); |
350 |
|
|
351 |
38 |
lem = n.eqNode(nm->mkNode(APPLY_UF, u, leni)); |
352 |
38 |
conc.push_back(lem); |
353 |
|
|
354 |
38 |
lem = zero.eqNode(nm->mkNode(APPLY_UF, u, zero)); |
355 |
38 |
conc.push_back(lem); |
356 |
|
|
357 |
76 |
Node x = SkolemCache::mkIndexVar(t); |
358 |
76 |
Node xPlusOne = nm->mkNode(PLUS, x, one); |
359 |
76 |
Node xbv = nm->mkNode(BOUND_VAR_LIST, x); |
360 |
76 |
Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, leni)); |
361 |
76 |
Node sx = nm->mkNode(STRING_SUBSTR, itost, x, one); |
362 |
76 |
Node ux = nm->mkNode(APPLY_UF, u, x); |
363 |
76 |
Node ux1 = nm->mkNode(APPLY_UF, u, xPlusOne); |
364 |
76 |
Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0"))); |
365 |
76 |
Node c = nm->mkNode(MINUS, nm->mkNode(STRING_TO_CODE, sx), c0); |
366 |
|
|
367 |
76 |
Node ten = nm->mkConst(Rational(10)); |
368 |
76 |
Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux))); |
369 |
|
Node leadingZeroPos = |
370 |
76 |
nm->mkNode(AND, x.eqNode(zero), nm->mkNode(GT, leni, one)); |
371 |
|
Node cb = nm->mkNode( |
372 |
|
AND, |
373 |
76 |
nm->mkNode(GEQ, c, nm->mkNode(ITE, leadingZeroPos, one, zero)), |
374 |
152 |
nm->mkNode(LT, c, ten)); |
375 |
|
|
376 |
76 |
Node ux1lem = nm->mkNode(GEQ, n, ux1); |
377 |
|
|
378 |
38 |
lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eq, cb, ux1lem)); |
379 |
38 |
lem = utils::mkForallInternal(xbv, lem); |
380 |
38 |
conc.push_back(lem); |
381 |
|
|
382 |
76 |
Node nonneg = nm->mkNode(GEQ, n, zero); |
383 |
|
|
384 |
76 |
Node emp = Word::mkEmptyWord(t.getType()); |
385 |
38 |
lem = nm->mkNode(ITE, nonneg, nm->mkNode(AND, conc), itost.eqNode(emp)); |
386 |
38 |
asserts.push_back(lem); |
387 |
|
// assert: |
388 |
|
// IF n>=0 |
389 |
|
// THEN: |
390 |
|
// len( itost ) >= 1 ^ |
391 |
|
// n = U( len( itost ) ) ^ U( 0 ) = 0 ^ |
392 |
|
// forall x. (x>=0 ^ x < str.len(itost)) => |
393 |
|
// U( x+1 ) = (str.code( str.at(itost, x) )-48) + 10*U( x ) ^ |
394 |
|
// ite( x=0 AND str.len(itost)>1, 49, 48 ) <= |
395 |
|
// str.code( str.at(itost, x) ) < 58 ^ |
396 |
|
// n >= U(x + 1) |
397 |
|
// ELSE |
398 |
|
// itost = "" |
399 |
|
// thus: |
400 |
|
// int.to.str( n ) = itost |
401 |
|
// |
402 |
|
// Note: The conjunct `n >= U(x + 1)` is not needed for correctness but is |
403 |
|
// just an optimization. |
404 |
|
|
405 |
|
// The function U is an accumulator, where U( x ) for x>0 is the value of |
406 |
|
// str.to.int( str.substr( int.to.str( n ), 0, x ) ). For example, for |
407 |
|
// n=345, we have that U(1), U(2), U(3) = 3, 34, 345. |
408 |
|
// Above, we use str.code to map characters to their integer value, where |
409 |
|
// note that str.code( "0" ) = 48. Further notice that |
410 |
|
// ite( x=0 AND str.len(itost)>1, 49, 48 ) |
411 |
|
// enforces that int.to.str( n ) has no leading zeroes. |
412 |
38 |
retNode = itost; |
413 |
2431 |
} else if( t.getKind() == kind::STRING_STOI ) { |
414 |
106 |
Node s = t[0]; |
415 |
|
Node stoit = sc->mkTypedSkolemCached( |
416 |
106 |
nm->integerType(), t, SkolemCache::SK_PURIFY, "stoit"); |
417 |
106 |
Node lens = nm->mkNode(STRING_LENGTH, s); |
418 |
|
|
419 |
106 |
std::vector<Node> conc1; |
420 |
106 |
Node lem = stoit.eqNode(negOne); |
421 |
53 |
conc1.push_back(lem); |
422 |
|
|
423 |
106 |
Node emp = Word::mkEmptyWord(s.getType()); |
424 |
106 |
Node sEmpty = s.eqNode(emp); |
425 |
|
Node k = sc->mkSkolemFun( |
426 |
106 |
SkolemFunId::STRINGS_STOI_NON_DIGIT, nm->integerType(), t); |
427 |
106 |
Node kc1 = nm->mkNode(GEQ, k, zero); |
428 |
106 |
Node kc2 = nm->mkNode(LT, k, lens); |
429 |
106 |
Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0"))); |
430 |
|
Node codeSk = nm->mkNode( |
431 |
|
MINUS, |
432 |
106 |
nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, s, k, one)), |
433 |
212 |
c0); |
434 |
106 |
Node ten = nm->mkConst(Rational(10)); |
435 |
|
Node kc3 = nm->mkNode( |
436 |
106 |
OR, nm->mkNode(LT, codeSk, zero), nm->mkNode(GEQ, codeSk, ten)); |
437 |
53 |
conc1.push_back(nm->mkNode(OR, sEmpty, nm->mkNode(AND, kc1, kc2, kc3))); |
438 |
|
|
439 |
106 |
std::vector<Node> conc2; |
440 |
106 |
std::vector< TypeNode > argTypes; |
441 |
53 |
argTypes.push_back(nm->integerType()); |
442 |
106 |
TypeNode stoiResultType = nm->mkFunctionType(argTypes, nm->integerType()); |
443 |
|
Node u = |
444 |
106 |
sc->mkSkolemFun(SkolemFunId::STRINGS_STOI_RESULT, stoiResultType, t); |
445 |
|
|
446 |
53 |
lem = stoit.eqNode(nm->mkNode(APPLY_UF, u, lens)); |
447 |
53 |
conc2.push_back(lem); |
448 |
|
|
449 |
53 |
lem = zero.eqNode(nm->mkNode(APPLY_UF, u, zero)); |
450 |
53 |
conc2.push_back(lem); |
451 |
|
|
452 |
53 |
lem = nm->mkNode(GT, lens, zero); |
453 |
53 |
conc2.push_back(lem); |
454 |
|
|
455 |
106 |
Node x = SkolemCache::mkIndexVar(t); |
456 |
106 |
Node xbv = nm->mkNode(BOUND_VAR_LIST, x); |
457 |
106 |
Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, lens)); |
458 |
106 |
Node sx = nm->mkNode(STRING_SUBSTR, s, x, one); |
459 |
106 |
Node ux = nm->mkNode(APPLY_UF, u, x); |
460 |
106 |
Node ux1 = nm->mkNode(APPLY_UF, u, nm->mkNode(PLUS, x, one)); |
461 |
106 |
Node c = nm->mkNode(MINUS, nm->mkNode(STRING_TO_CODE, sx), c0); |
462 |
|
|
463 |
106 |
Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux))); |
464 |
106 |
Node cb = nm->mkNode(AND, nm->mkNode(GEQ, c, zero), nm->mkNode(LT, c, ten)); |
465 |
|
|
466 |
106 |
Node ux1lem = nm->mkNode(GEQ, stoit, ux1); |
467 |
|
|
468 |
53 |
lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eq, cb, ux1lem)); |
469 |
53 |
lem = utils::mkForallInternal(xbv, lem); |
470 |
53 |
conc2.push_back(lem); |
471 |
|
|
472 |
106 |
Node sneg = nm->mkNode(LT, stoit, zero); |
473 |
53 |
lem = nm->mkNode(ITE, sneg, nm->mkNode(AND, conc1), nm->mkNode(AND, conc2)); |
474 |
53 |
asserts.push_back(lem); |
475 |
|
|
476 |
|
// assert: |
477 |
|
// IF stoit < 0 |
478 |
|
// THEN: |
479 |
|
// stoit = -1 ^ |
480 |
|
// ( s = "" OR |
481 |
|
// ( k>=0 ^ k<len( s ) ^ ( str.code( str.at(s, k) ) < 48 OR |
482 |
|
// str.code( str.at(s, k) ) >= 58 ))) |
483 |
|
// ELSE: |
484 |
|
// stoit = U( len( s ) ) ^ U( 0 ) = 0 ^ |
485 |
|
// str.len( s ) > 0 ^ |
486 |
|
// forall x. (x>=0 ^ x < str.len(s)) => |
487 |
|
// U( x+1 ) = ( str.code( str.at(s, x) ) - 48 ) + 10*U( x ) ^ |
488 |
|
// 48 <= str.code( str.at(s, x) ) < 58 ^ |
489 |
|
// stoit >= U( x+1 ) |
490 |
|
// Thus, str.to.int( s ) = stoit |
491 |
|
// |
492 |
|
// Note: The conjunct `stoit >= U( x+1 )` is not needed for correctness but |
493 |
|
// is just an optimization. |
494 |
|
|
495 |
53 |
retNode = stoit; |
496 |
|
} |
497 |
2378 |
else if (t.getKind() == kind::SEQ_NTH) |
498 |
|
{ |
499 |
|
// processing term: str.nth( s, n) |
500 |
|
// similar to substr. |
501 |
108 |
Node s = t[0]; |
502 |
108 |
Node n = t[1]; |
503 |
108 |
Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); |
504 |
108 |
Node t12 = nm->mkNode(PLUS, n, one); |
505 |
108 |
Node lt0 = nm->mkNode(STRING_LENGTH, s); |
506 |
|
// start point is greater than or equal zero |
507 |
108 |
Node c1 = nm->mkNode(GEQ, n, zero); |
508 |
|
// start point is less than end of string |
509 |
108 |
Node c2 = nm->mkNode(GT, lt0, n); |
510 |
|
// check whether this application of seq.nth is defined. |
511 |
108 |
Node cond = nm->mkNode(AND, c1, c2); |
512 |
|
|
513 |
|
// nodes for the case where `seq.nth` is defined. |
514 |
108 |
Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre"); |
515 |
108 |
Node sk2 = sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); |
516 |
108 |
Node unit = nm->mkNode(SEQ_UNIT, skt); |
517 |
108 |
Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, unit, sk2)); |
518 |
|
// length of first skolem is second argument |
519 |
108 |
Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n); |
520 |
108 |
Node lsk2 = nm->mkNode(STRING_LENGTH, sk2); |
521 |
108 |
Node b13 = nm->mkNode(EQUAL, lsk2, nm->mkNode(MINUS, lt0, t12)); |
522 |
108 |
Node b1 = nm->mkNode(AND, b11, b12, b13); |
523 |
|
|
524 |
|
// nodes for the case where `seq.nth` is undefined. |
525 |
108 |
Node uf = SkolemCache::mkSkolemSeqNth(s.getType(), "Uf"); |
526 |
108 |
Node b2 = nm->mkNode(EQUAL, skt, nm->mkNode(APPLY_UF, uf, s, n)); |
527 |
|
|
528 |
|
// the full ite, split on definedness of `seq.nth` |
529 |
108 |
Node lemma = nm->mkNode(ITE, cond, b1, b2); |
530 |
|
|
531 |
|
// assert: |
532 |
|
// IF n >=0 AND n < len( s ) |
533 |
|
// THEN: s = sk1 ++ unit(skt) ++ sk2 AND |
534 |
|
// len( sk1 ) = n AND |
535 |
|
// ( len( sk2 ) = len( s )- (n+1) |
536 |
|
// ELSE: skt = Uf(s, n), where Uf is a cached skolem function. |
537 |
54 |
asserts.push_back(lemma); |
538 |
54 |
retNode = skt; |
539 |
|
} |
540 |
2324 |
else if (t.getKind() == kind::STRING_REPLACE) |
541 |
|
{ |
542 |
|
// processing term: replace( x, y, z ) |
543 |
452 |
Node x = t[0]; |
544 |
452 |
Node y = t[1]; |
545 |
452 |
Node z = t[2]; |
546 |
452 |
TypeNode tn = t[0].getType(); |
547 |
|
Node rp1 = |
548 |
452 |
sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_PRE, "rfcpre"); |
549 |
|
Node rp2 = |
550 |
452 |
sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_POST, "rfcpost"); |
551 |
452 |
Node rpw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpw"); |
552 |
|
|
553 |
|
// y = "" |
554 |
452 |
Node emp = Word::mkEmptyWord(tn); |
555 |
452 |
Node cond1 = y.eqNode(emp); |
556 |
|
// rpw = str.++( z, x ) |
557 |
452 |
Node c1 = rpw.eqNode(nm->mkNode(kind::STRING_CONCAT, z, x)); |
558 |
|
|
559 |
|
// contains( x, y ) |
560 |
452 |
Node cond2 = nm->mkNode(kind::STRING_CONTAINS, x, y); |
561 |
|
// x = str.++( rp1, y, rp2 ) |
562 |
452 |
Node c21 = x.eqNode(nm->mkNode(kind::STRING_CONCAT, rp1, y, rp2)); |
563 |
|
// rpw = str.++( rp1, z, rp2 ) |
564 |
452 |
Node c22 = rpw.eqNode(nm->mkNode(kind::STRING_CONCAT, rp1, z, rp2)); |
565 |
|
// ~contains( str.++( rp1, substr( y, 0, len(y)-1 ) ), y ) |
566 |
|
Node c23 = |
567 |
904 |
nm->mkNode(kind::STRING_CONTAINS, |
568 |
452 |
nm->mkNode( |
569 |
|
kind::STRING_CONCAT, |
570 |
|
rp1, |
571 |
452 |
nm->mkNode(kind::STRING_SUBSTR, |
572 |
|
y, |
573 |
|
zero, |
574 |
904 |
nm->mkNode(kind::MINUS, |
575 |
452 |
nm->mkNode(kind::STRING_LENGTH, y), |
576 |
|
one))), |
577 |
|
y) |
578 |
452 |
.negate(); |
579 |
|
|
580 |
|
// assert: |
581 |
|
// IF y="" |
582 |
|
// THEN: rpw = str.++( z, x ) |
583 |
|
// ELIF: contains( x, y ) |
584 |
|
// THEN: x = str.++( rp1, y, rp2 ) ^ |
585 |
|
// rpw = str.++( rp1, z, rp2 ) ^ |
586 |
|
// ~contains( str.++( rp1, substr( y, 0, len(y)-1 ) ), y ), |
587 |
|
// ELSE: rpw = x |
588 |
|
// for fresh rp1, rp2, rpw |
589 |
|
Node rr = nm->mkNode(kind::ITE, |
590 |
|
cond1, |
591 |
|
c1, |
592 |
904 |
nm->mkNode(kind::ITE, |
593 |
|
cond2, |
594 |
452 |
nm->mkNode(kind::AND, c21, c22, c23), |
595 |
904 |
rpw.eqNode(x))); |
596 |
226 |
asserts.push_back(rr); |
597 |
|
|
598 |
|
// Thus, replace( x, y, z ) = rpw. |
599 |
226 |
retNode = rpw; |
600 |
|
} |
601 |
2098 |
else if (t.getKind() == kind::STRING_REPLACE_ALL) |
602 |
|
{ |
603 |
|
// processing term: replaceall( x, y, z ) |
604 |
32 |
Node x = t[0]; |
605 |
32 |
Node y = t[1]; |
606 |
32 |
Node z = t[2]; |
607 |
32 |
Node rpaw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpaw"); |
608 |
|
|
609 |
|
Node numOcc = sc->mkSkolemFun( |
610 |
32 |
SkolemFunId::STRINGS_NUM_OCCUR, nm->integerType(), x, y); |
611 |
32 |
std::vector<TypeNode> argTypes; |
612 |
16 |
argTypes.push_back(nm->integerType()); |
613 |
32 |
TypeNode raResultType = nm->mkFunctionType(argTypes, t.getType()); |
614 |
|
Node us = sc->mkSkolemFun( |
615 |
32 |
SkolemFunId::STRINGS_REPLACE_ALL_RESULT, raResultType, t); |
616 |
32 |
TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType()); |
617 |
32 |
Node uf = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_INDEX, ufType, x, y); |
618 |
|
|
619 |
32 |
Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); |
620 |
32 |
Node usno = nm->mkNode(APPLY_UF, us, numOcc); |
621 |
32 |
Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x)); |
622 |
|
|
623 |
32 |
std::vector<Node> lem; |
624 |
16 |
lem.push_back(nm->mkNode(GEQ, numOcc, zero)); |
625 |
16 |
lem.push_back(rpaw.eqNode(nm->mkNode(APPLY_UF, us, zero))); |
626 |
16 |
lem.push_back(usno.eqNode(rem)); |
627 |
16 |
lem.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero)); |
628 |
16 |
lem.push_back(nm->mkNode(STRING_INDEXOF, x, y, ufno).eqNode(negOne)); |
629 |
|
|
630 |
32 |
Node i = SkolemCache::mkIndexVar(t); |
631 |
32 |
Node bvli = nm->mkNode(BOUND_VAR_LIST, i); |
632 |
|
Node bound = |
633 |
32 |
nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc)); |
634 |
32 |
Node ufi = nm->mkNode(APPLY_UF, uf, i); |
635 |
32 |
Node ufip1 = nm->mkNode(APPLY_UF, uf, nm->mkNode(PLUS, i, one)); |
636 |
32 |
Node ii = nm->mkNode(STRING_INDEXOF, x, y, ufi); |
637 |
|
Node cc = nm->mkNode( |
638 |
|
STRING_CONCAT, |
639 |
32 |
nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)), |
640 |
|
z, |
641 |
64 |
nm->mkNode(APPLY_UF, us, nm->mkNode(PLUS, i, one))); |
642 |
|
|
643 |
32 |
std::vector<Node> flem; |
644 |
16 |
flem.push_back(ii.eqNode(negOne).negate()); |
645 |
16 |
flem.push_back(nm->mkNode(APPLY_UF, us, i).eqNode(cc)); |
646 |
16 |
flem.push_back( |
647 |
32 |
ufip1.eqNode(nm->mkNode(PLUS, ii, nm->mkNode(STRING_LENGTH, y)))); |
648 |
|
|
649 |
32 |
Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem)); |
650 |
32 |
Node q = utils::mkForallInternal(bvli, body); |
651 |
16 |
lem.push_back(q); |
652 |
|
|
653 |
|
// assert: |
654 |
|
// IF y="" |
655 |
|
// THEN: rpaw = x |
656 |
|
// ELSE: |
657 |
|
// numOcc >= 0 ^ |
658 |
|
// rpaw = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc), len(x)) ^ |
659 |
|
// Uf(0) = 0 ^ indexof( x, y, Uf(numOcc) ) = -1 ^ |
660 |
|
// forall i. 0 <= i < numOcc => |
661 |
|
// ii != -1 ^ |
662 |
|
// Us( i ) = str.substr( x, Uf(i), ii - Uf(i) ) ++ z ++ Us(i+1) ^ |
663 |
|
// Uf( i+1 ) = ii + len(y) |
664 |
|
// where ii == indexof( x, y, Uf(i) ) |
665 |
|
|
666 |
|
// Conceptually, numOcc is the number of occurrences of y in x, Uf( i ) is |
667 |
|
// the index to begin searching in x for y after the i^th occurrence of y in |
668 |
|
// x, and Us( i ) is the result of processing the remainder after processing |
669 |
|
// the i^th occurrence of y in x. |
670 |
32 |
Node emp = Word::mkEmptyWord(t.getType()); |
671 |
|
Node assert = |
672 |
32 |
nm->mkNode(ITE, y.eqNode(emp), rpaw.eqNode(x), nm->mkNode(AND, lem)); |
673 |
16 |
asserts.push_back(assert); |
674 |
|
|
675 |
|
// Thus, replaceall( x, y, z ) = rpaw |
676 |
16 |
retNode = rpaw; |
677 |
|
} |
678 |
2082 |
else if (t.getKind() == STRING_REPLACE_RE) |
679 |
|
{ |
680 |
|
// processing term: replace_re( x, y, z ) |
681 |
100 |
Node x = t[0]; |
682 |
100 |
Node y = t[1]; |
683 |
100 |
Node z = t[2]; |
684 |
100 |
Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k"); |
685 |
|
|
686 |
|
// indexof_re(x, y, 0) |
687 |
100 |
Node idx = nm->mkNode(STRING_INDEXOF_RE, x, y, zero); |
688 |
|
|
689 |
|
// in_re("", y) |
690 |
|
Node matchesEmpty = |
691 |
100 |
nm->mkNode(STRING_IN_REGEXP, nm->mkConst(String("")), y); |
692 |
|
// k = z ++ x |
693 |
100 |
Node res1 = k.eqNode(nm->mkNode(STRING_CONCAT, z, x)); |
694 |
|
|
695 |
100 |
TypeNode ktype = t.getType(); |
696 |
100 |
Node k1 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH_PRE, ktype, x, y); |
697 |
100 |
Node k2 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH, ktype, x, y); |
698 |
100 |
Node k3 = sc->mkSkolemFun(SkolemFunId::SK_FIRST_MATCH_POST, ktype, x, y); |
699 |
100 |
Node k2Len = nm->mkNode(STRING_LENGTH, k2); |
700 |
|
// x = k1 ++ k2 ++ k3 |
701 |
100 |
Node split = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3)); |
702 |
|
// len(k1) = indexof_re(x, y, 0) |
703 |
100 |
Node k1Len = nm->mkNode(STRING_LENGTH, k1).eqNode(idx); |
704 |
100 |
Node l = SkolemCache::mkLengthVar(t); |
705 |
100 |
Node bvll = nm->mkNode(BOUND_VAR_LIST, l); |
706 |
|
Node bound = |
707 |
100 |
nm->mkNode(AND, nm->mkNode(LEQ, zero, l), nm->mkNode(LT, l, k2Len)); |
708 |
|
Node body = nm->mkNode( |
709 |
|
OR, |
710 |
100 |
bound.negate(), |
711 |
100 |
nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, k2, zero, l), y) |
712 |
200 |
.negate()); |
713 |
|
// forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r) |
714 |
100 |
Node shortestMatch = utils::mkForallInternal(bvll, body); |
715 |
|
// in_re(k2, y) |
716 |
100 |
Node match = nm->mkNode(STRING_IN_REGEXP, k2, y); |
717 |
|
// k = k1 ++ z ++ k3 |
718 |
100 |
Node res = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3)); |
719 |
|
|
720 |
|
// IF: indexof_re(x, y, 0) = -1 |
721 |
|
// THEN: k = x |
722 |
|
// ELSE: |
723 |
|
// x = k1 ++ k2 ++ k3 ^ |
724 |
|
// len(k1) = indexof_re(x, y, 0) ^ |
725 |
|
// (forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)) ^ |
726 |
|
// in_re(k2, y) ^ |
727 |
|
// k = k1 ++ z ++ k3 |
728 |
50 |
asserts.push_back( |
729 |
300 |
nm->mkNode(ITE, |
730 |
100 |
idx.eqNode(negOne), |
731 |
100 |
k.eqNode(x), |
732 |
100 |
nm->mkNode(AND, {split, k1Len, shortestMatch, match, res}))); |
733 |
50 |
retNode = k; |
734 |
|
} |
735 |
2032 |
else if (t.getKind() == STRING_REPLACE_RE_ALL) |
736 |
|
{ |
737 |
52 |
Node x = t[0]; |
738 |
52 |
Node y = t[1]; |
739 |
52 |
Node z = t[2]; |
740 |
52 |
Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k"); |
741 |
|
|
742 |
|
Node numOcc = sc->mkSkolemFun( |
743 |
52 |
SkolemFunId::STRINGS_NUM_OCCUR, nm->integerType(), x, y); |
744 |
52 |
std::vector<TypeNode> argTypes; |
745 |
26 |
argTypes.push_back(nm->integerType()); |
746 |
52 |
TypeNode raResultType = nm->mkFunctionType(argTypes, t.getType()); |
747 |
|
Node us = sc->mkSkolemFun( |
748 |
52 |
SkolemFunId::STRINGS_REPLACE_ALL_RESULT, raResultType, t); |
749 |
52 |
TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType()); |
750 |
52 |
Node uf = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_INDEX, ufType, x, y); |
751 |
52 |
Node ul = sc->mkSkolemFun(SkolemFunId::STRINGS_OCCUR_LEN, ufType, x, y); |
752 |
|
|
753 |
52 |
Node emp = Word::mkEmptyWord(t.getType()); |
754 |
|
|
755 |
52 |
Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)); |
756 |
|
// indexof_re(x, y', 0) = -1 |
757 |
52 |
Node noMatch = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero).eqNode(negOne); |
758 |
|
|
759 |
52 |
Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); |
760 |
52 |
Node usno = nm->mkNode(APPLY_UF, us, numOcc); |
761 |
52 |
Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x)); |
762 |
|
|
763 |
52 |
std::vector<Node> lemmas; |
764 |
|
// numOcc > 0 |
765 |
26 |
lemmas.push_back(nm->mkNode(GT, numOcc, zero)); |
766 |
|
// k = Us(0) |
767 |
26 |
lemmas.push_back(k.eqNode(nm->mkNode(APPLY_UF, us, zero))); |
768 |
|
// Us(numOcc) = substr(x, Uf(numOcc)) |
769 |
26 |
lemmas.push_back(usno.eqNode(rem)); |
770 |
|
// Uf(0) = 0 |
771 |
26 |
lemmas.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero)); |
772 |
|
// indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 |
773 |
26 |
lemmas.push_back( |
774 |
52 |
nm->mkNode(STRING_INDEXOF_RE, rem, yp, zero).eqNode(negOne)); |
775 |
|
|
776 |
52 |
Node i = SkolemCache::mkIndexVar(t); |
777 |
52 |
Node bvli = nm->mkNode(BOUND_VAR_LIST, i); |
778 |
|
Node bound = |
779 |
52 |
nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc)); |
780 |
52 |
Node ip1 = nm->mkNode(PLUS, i, one); |
781 |
52 |
Node ufi = nm->mkNode(APPLY_UF, uf, i); |
782 |
52 |
Node ufip1 = nm->mkNode(APPLY_UF, uf, ip1); |
783 |
52 |
Node ulip1 = nm->mkNode(APPLY_UF, ul, ip1); |
784 |
|
// ii = Uf(i + 1) - Ul(i + 1) |
785 |
52 |
Node ii = nm->mkNode(MINUS, ufip1, ulip1); |
786 |
|
|
787 |
52 |
std::vector<Node> flem; |
788 |
|
// Ul(i + 1) > 0 |
789 |
26 |
flem.push_back(nm->mkNode(GT, ulip1, zero)); |
790 |
|
// Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) |
791 |
26 |
flem.push_back(ufip1.eqNode( |
792 |
52 |
nm->mkNode(PLUS, nm->mkNode(STRING_INDEXOF_RE, x, yp, ufi), ulip1))); |
793 |
|
// in_re(substr(x, ii, Ul(i + 1)), y') |
794 |
78 |
flem.push_back(nm->mkNode( |
795 |
52 |
STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, ulip1), yp)); |
796 |
52 |
Node l = SkolemCache::mkLengthVar(t); |
797 |
52 |
Node bvll = nm->mkNode(BOUND_VAR_LIST, l); |
798 |
|
Node lenBound = |
799 |
52 |
nm->mkNode(AND, nm->mkNode(LT, zero, l), nm->mkNode(LT, l, ulip1)); |
800 |
|
Node shortestMatchBody = nm->mkNode( |
801 |
|
OR, |
802 |
52 |
lenBound.negate(), |
803 |
52 |
nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, l), y) |
804 |
104 |
.negate()); |
805 |
|
// forall l. 0 < l < Ul(i + 1) => |
806 |
|
// ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y') |
807 |
26 |
flem.push_back(utils::mkForallInternal(bvll, shortestMatchBody)); |
808 |
|
Node pfxMatch = |
809 |
52 |
nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)); |
810 |
|
// Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) |
811 |
26 |
flem.push_back( |
812 |
52 |
nm->mkNode(APPLY_UF, us, i) |
813 |
52 |
.eqNode(nm->mkNode( |
814 |
52 |
STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1)))); |
815 |
52 |
Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem)); |
816 |
52 |
Node forall = utils::mkForallInternal(bvli, body); |
817 |
26 |
lemmas.push_back(forall); |
818 |
|
|
819 |
|
// IF indexof(x, y', 0) = -1 |
820 |
|
// THEN: k = x |
821 |
|
// ELSE: |
822 |
|
// numOcc > 0 ^ |
823 |
|
// k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^ |
824 |
|
// Uf(0) = 0 ^ indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 ^ |
825 |
|
// forall i. 0 <= i < nummOcc => |
826 |
|
// Ul(i + 1) > 0 ^ |
827 |
|
// Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) ^ |
828 |
|
// in_re(substr(x, ii, Ul(i + 1)), y') ^ |
829 |
|
// forall l. 0 < l < Ul(i + 1) => |
830 |
|
// ~in_re(substr(x, ii, l), y') |
831 |
|
// Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) |
832 |
|
// where ii = Uf(i + 1) - Ul(i + 1) |
833 |
|
// where y' = re.diff(y, "") |
834 |
|
// |
835 |
|
// Conceptually, y' is the regex y but excluding the empty string (because |
836 |
|
// we do not want to match empty strings), numOcc is the number of shortest |
837 |
|
// matches of y' in x, Uf(i) is the end position of the i-th match, Ul(i) |
838 |
|
// is the length of the i^th match, and Us(i) is the result of processing |
839 |
|
// the remainder after processing the i^th occurrence of y in x. |
840 |
|
// |
841 |
|
// Visualization of Uf(i) and Ul(i): |
842 |
|
// |
843 |
|
// x = |------------| match 1 |-----------| match 2 |---| |
844 |
|
// | | | |
845 |
|
// Uf(0) Uf(1) Uf(2) |
846 |
|
// |
847 |
|
// |---------| |-----------| |
848 |
|
// Ul(1) Ul(2) |
849 |
26 |
asserts.push_back( |
850 |
52 |
nm->mkNode(ITE, noMatch, k.eqNode(x), nm->mkNode(AND, lemmas))); |
851 |
26 |
retNode = k; |
852 |
|
} |
853 |
2006 |
else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER) |
854 |
|
{ |
855 |
12 |
Node x = t[0]; |
856 |
12 |
Node r = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); |
857 |
|
|
858 |
12 |
Node lenx = nm->mkNode(STRING_LENGTH, x); |
859 |
12 |
Node lenr = nm->mkNode(STRING_LENGTH, r); |
860 |
12 |
Node eqLenA = lenx.eqNode(lenr); |
861 |
|
|
862 |
12 |
Node i = SkolemCache::mkIndexVar(t); |
863 |
12 |
Node bvi = nm->mkNode(BOUND_VAR_LIST, i); |
864 |
|
|
865 |
12 |
Node ci = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, x, i, one)); |
866 |
12 |
Node ri = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, r, i, one)); |
867 |
|
|
868 |
12 |
Node lb = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 97 : 65)); |
869 |
12 |
Node ub = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 122 : 90)); |
870 |
|
Node offset = |
871 |
12 |
nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? -32 : 32)); |
872 |
|
|
873 |
|
Node res = nm->mkNode( |
874 |
|
ITE, |
875 |
12 |
nm->mkNode(AND, nm->mkNode(LEQ, lb, ci), nm->mkNode(LEQ, ci, ub)), |
876 |
12 |
nm->mkNode(PLUS, ci, offset), |
877 |
36 |
ci); |
878 |
|
|
879 |
|
Node bound = |
880 |
12 |
nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr)); |
881 |
12 |
Node body = nm->mkNode(OR, bound.negate(), ri.eqNode(res)); |
882 |
12 |
Node rangeA = utils::mkForallInternal(bvi, body); |
883 |
|
|
884 |
|
// upper 65 ... 90 |
885 |
|
// lower 97 ... 122 |
886 |
|
// assert: |
887 |
|
// len(r) = len(x) ^ |
888 |
|
// forall i. 0 <= i < len(r) => |
889 |
|
// str.code( str.substr(r,i,1) ) = ite( 97 <= ci <= 122, ci-32, ci) |
890 |
|
// where ci = str.code( str.substr(x,i,1) ) |
891 |
12 |
Node assert = nm->mkNode(AND, eqLenA, rangeA); |
892 |
6 |
asserts.push_back(assert); |
893 |
|
|
894 |
|
// Thus, toLower( x ) = r |
895 |
6 |
retNode = r; |
896 |
|
} |
897 |
2000 |
else if (t.getKind() == STRING_REV) |
898 |
|
{ |
899 |
36 |
Node x = t[0]; |
900 |
36 |
Node r = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); |
901 |
|
|
902 |
36 |
Node lenx = nm->mkNode(STRING_LENGTH, x); |
903 |
36 |
Node lenr = nm->mkNode(STRING_LENGTH, r); |
904 |
36 |
Node eqLenA = lenx.eqNode(lenr); |
905 |
|
|
906 |
36 |
Node i = SkolemCache::mkIndexVar(t); |
907 |
36 |
Node bvi = nm->mkNode(BOUND_VAR_LIST, i); |
908 |
|
|
909 |
|
Node revi = nm->mkNode( |
910 |
36 |
MINUS, nm->mkNode(STRING_LENGTH, x), nm->mkNode(PLUS, i, one)); |
911 |
36 |
Node ssr = nm->mkNode(STRING_SUBSTR, r, i, one); |
912 |
36 |
Node ssx = nm->mkNode(STRING_SUBSTR, x, revi, one); |
913 |
|
|
914 |
|
Node bound = |
915 |
36 |
nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr)); |
916 |
36 |
Node body = nm->mkNode(OR, bound.negate(), ssr.eqNode(ssx)); |
917 |
36 |
Node rangeA = utils::mkForallInternal(bvi, body); |
918 |
|
// assert: |
919 |
|
// len(r) = len(x) ^ |
920 |
|
// forall i. 0 <= i < len(r) => |
921 |
|
// substr(r,i,1) = substr(x,len(x)-(i+1),1) |
922 |
36 |
Node assert = nm->mkNode(AND, eqLenA, rangeA); |
923 |
18 |
asserts.push_back(assert); |
924 |
|
|
925 |
|
// Thus, (str.rev x) = r |
926 |
18 |
retNode = r; |
927 |
|
} |
928 |
1982 |
else if (t.getKind() == kind::STRING_CONTAINS) |
929 |
|
{ |
930 |
260 |
Node x = t[0]; |
931 |
260 |
Node s = t[1]; |
932 |
|
//negative contains reduces to existential |
933 |
260 |
Node lenx = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x); |
934 |
260 |
Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s); |
935 |
260 |
Node b1 = SkolemCache::mkIndexVar(t); |
936 |
260 |
Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); |
937 |
|
Node body = NodeManager::currentNM()->mkNode( |
938 |
|
kind::AND, |
939 |
260 |
NodeManager::currentNM()->mkNode(kind::LEQ, zero, b1), |
940 |
390 |
NodeManager::currentNM()->mkNode( |
941 |
|
kind::LEQ, |
942 |
|
b1, |
943 |
260 |
NodeManager::currentNM()->mkNode(kind::MINUS, lenx, lens)), |
944 |
520 |
NodeManager::currentNM()->mkNode( |
945 |
|
kind::EQUAL, |
946 |
260 |
NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, x, b1, lens), |
947 |
910 |
s)); |
948 |
130 |
retNode = utils::mkForallInternal(b1v, body.negate()).negate(); |
949 |
|
} |
950 |
1852 |
else if (t.getKind() == kind::STRING_LEQ) |
951 |
|
{ |
952 |
|
Node ltp = sc->mkTypedSkolemCached( |
953 |
92 |
nm->booleanType(), t, SkolemCache::SK_PURIFY, "ltp"); |
954 |
92 |
Node k = SkolemCache::mkIndexVar(t); |
955 |
|
|
956 |
92 |
std::vector<Node> conj; |
957 |
46 |
conj.push_back(nm->mkNode(GEQ, k, zero)); |
958 |
92 |
Node substr[2]; |
959 |
92 |
Node code[2]; |
960 |
138 |
for (unsigned r = 0; r < 2; r++) |
961 |
|
{ |
962 |
184 |
Node ta = t[r]; |
963 |
184 |
Node tb = t[1 - r]; |
964 |
92 |
substr[r] = nm->mkNode(STRING_SUBSTR, ta, zero, k); |
965 |
92 |
code[r] = |
966 |
184 |
nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, ta, k, one)); |
967 |
92 |
conj.push_back(nm->mkNode(LEQ, k, nm->mkNode(STRING_LENGTH, ta))); |
968 |
|
} |
969 |
46 |
conj.push_back(substr[0].eqNode(substr[1])); |
970 |
92 |
std::vector<Node> ite_ch; |
971 |
46 |
ite_ch.push_back(ltp); |
972 |
138 |
for (unsigned r = 0; r < 2; r++) |
973 |
|
{ |
974 |
92 |
ite_ch.push_back(nm->mkNode(LT, code[r], code[1 - r])); |
975 |
|
} |
976 |
46 |
conj.push_back(nm->mkNode(ITE, ite_ch)); |
977 |
|
|
978 |
92 |
Node conjn = utils::mkForallInternal(nm->mkNode(BOUND_VAR_LIST, k), |
979 |
92 |
nm->mkNode(AND, conj).negate()) |
980 |
92 |
.negate(); |
981 |
|
// Intuitively, the reduction says either x and y are equal, or they have |
982 |
|
// some (maximal) common prefix after which their characters at position k |
983 |
|
// are distinct, and the comparison of their code matches the return value |
984 |
|
// of the overall term. |
985 |
|
// Notice the below reduction relies on the semantics of str.code being -1 |
986 |
|
// for the empty string. In particular, say x = "a" and y = "ab", then the |
987 |
|
// reduction below is satisfied for k = 1, since |
988 |
|
// str.code(substr( "a", 1, 1 )) = str.code( "" ) = -1 < |
989 |
|
// str.code(substr( "ab", 1, 1 )) = str.code( "b" ) = 66 |
990 |
|
|
991 |
|
// assert: |
992 |
|
// IF x=y |
993 |
|
// THEN: ltp |
994 |
|
// ELSE: exists k. |
995 |
|
// k >= 0 AND k <= len( x ) AND k <= len( y ) AND |
996 |
|
// substr( x, 0, k ) = substr( y, 0, k ) AND |
997 |
|
// IF ltp |
998 |
|
// THEN: str.code(substr( x, k, 1 )) < str.code(substr( y, k, 1 )) |
999 |
|
// ELSE: str.code(substr( x, k, 1 )) > str.code(substr( y, k, 1 )) |
1000 |
92 |
Node assert = nm->mkNode(ITE, t[0].eqNode(t[1]), ltp, conjn); |
1001 |
46 |
asserts.push_back(assert); |
1002 |
|
|
1003 |
|
// Thus, str.<=( x, y ) = ltp |
1004 |
46 |
retNode = ltp; |
1005 |
|
} |
1006 |
9120 |
return retNode; |
1007 |
|
} |
1008 |
|
|
1009 |
4073 |
Node StringsPreprocess::simplify(Node t, std::vector<Node>& asserts) |
1010 |
|
{ |
1011 |
4073 |
size_t prev_asserts = asserts.size(); |
1012 |
|
// call the static reduce routine |
1013 |
4073 |
Node retNode = reduce(t, asserts, d_sc); |
1014 |
4073 |
if( t!=retNode ){ |
1015 |
2267 |
Trace("strings-preprocess") << "StringsPreprocess::simplify: " << t << " -> " << retNode << std::endl; |
1016 |
2267 |
if (!asserts.empty()) |
1017 |
|
{ |
1018 |
4312 |
Trace("strings-preprocess") |
1019 |
4312 |
<< " ... new nodes (" << (asserts.size() - prev_asserts) |
1020 |
2156 |
<< "):" << std::endl; |
1021 |
4293 |
for (size_t i = prev_asserts; i < asserts.size(); ++i) |
1022 |
|
{ |
1023 |
2137 |
Trace("strings-preprocess") << " " << asserts[i] << std::endl; |
1024 |
|
} |
1025 |
|
} |
1026 |
2267 |
if (d_statReductions != nullptr) |
1027 |
|
{ |
1028 |
2167 |
(*d_statReductions) << t.getKind(); |
1029 |
|
} |
1030 |
|
} |
1031 |
|
else |
1032 |
|
{ |
1033 |
3612 |
Trace("strings-preprocess-debug") |
1034 |
1806 |
<< "Return " << retNode << " unchanged" << std::endl; |
1035 |
|
} |
1036 |
4073 |
return retNode; |
1037 |
|
} |
1038 |
|
|
1039 |
3174 |
Node StringsPreprocess::simplifyRec(Node t, std::vector<Node>& asserts) |
1040 |
|
{ |
1041 |
3174 |
std::map<Node, Node>::iterator it = d_visited.find(t); |
1042 |
3174 |
if (it != d_visited.end()) |
1043 |
|
{ |
1044 |
1260 |
return it->second; |
1045 |
|
}else{ |
1046 |
3828 |
Node retNode = t; |
1047 |
1914 |
if( t.getNumChildren()==0 ){ |
1048 |
472 |
retNode = simplify(t, asserts); |
1049 |
|
} |
1050 |
1442 |
else if (!t.isClosure()) |
1051 |
|
{ |
1052 |
1434 |
bool changed = false; |
1053 |
2868 |
std::vector< Node > cc; |
1054 |
1434 |
if( t.getMetaKind() == kind::metakind::PARAMETERIZED ){ |
1055 |
20 |
cc.push_back( t.getOperator() ); |
1056 |
|
} |
1057 |
4431 |
for(unsigned i=0; i<t.getNumChildren(); i++) { |
1058 |
5994 |
Node s = simplifyRec(t[i], asserts); |
1059 |
2997 |
cc.push_back( s ); |
1060 |
2997 |
if( s!=t[i] ) { |
1061 |
243 |
changed = true; |
1062 |
|
} |
1063 |
|
} |
1064 |
2868 |
Node tmp = t; |
1065 |
1434 |
if( changed ){ |
1066 |
210 |
tmp = NodeManager::currentNM()->mkNode( t.getKind(), cc ); |
1067 |
|
} |
1068 |
1434 |
retNode = simplify(tmp, asserts); |
1069 |
|
} |
1070 |
1914 |
d_visited[t] = retNode; |
1071 |
1914 |
return retNode; |
1072 |
|
} |
1073 |
|
} |
1074 |
|
|
1075 |
107 |
Node StringsPreprocess::processAssertion(Node n, std::vector<Node>& asserts) |
1076 |
|
{ |
1077 |
214 |
std::vector<Node> asserts_curr; |
1078 |
107 |
Node ret = simplifyRec(n, asserts_curr); |
1079 |
247 |
while (!asserts_curr.empty()) |
1080 |
|
{ |
1081 |
140 |
Node curr = asserts_curr.back(); |
1082 |
70 |
asserts_curr.pop_back(); |
1083 |
140 |
std::vector<Node> asserts_tmp; |
1084 |
70 |
curr = simplifyRec(curr, asserts_tmp); |
1085 |
70 |
asserts_curr.insert( |
1086 |
140 |
asserts_curr.end(), asserts_tmp.begin(), asserts_tmp.end()); |
1087 |
70 |
asserts.push_back(curr); |
1088 |
|
} |
1089 |
214 |
return ret; |
1090 |
|
} |
1091 |
|
|
1092 |
|
} // namespace strings |
1093 |
|
} // namespace theory |
1094 |
31125 |
} // namespace cvc5 |