1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Tianyi Liang |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Strings Preprocess. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/theory_strings_preprocess.h" |
17 |
|
|
18 |
|
#include "expr/kind.h" |
19 |
|
#include "expr/skolem_manager.h" |
20 |
|
#include "options/smt_options.h" |
21 |
|
#include "options/strings_options.h" |
22 |
|
#include "smt/logic_exception.h" |
23 |
|
#include "theory/strings/arith_entail.h" |
24 |
|
#include "theory/strings/sequences_rewriter.h" |
25 |
|
#include "theory/strings/theory_strings_utils.h" |
26 |
|
#include "theory/strings/word.h" |
27 |
|
#include "util/rational.h" |
28 |
|
#include "util/statistics_registry.h" |
29 |
|
#include "util/string.h" |
30 |
|
|
31 |
|
using namespace cvc5; |
32 |
|
using namespace cvc5::kind; |
33 |
|
|
34 |
|
namespace cvc5 { |
35 |
|
namespace theory { |
36 |
|
namespace strings { |
37 |
|
|
38 |
9893 |
StringsPreprocess::StringsPreprocess(SkolemCache* sc, |
39 |
9893 |
HistogramStat<Kind>* statReductions) |
40 |
9893 |
: d_sc(sc), d_statReductions(statReductions) |
41 |
|
{ |
42 |
9893 |
} |
43 |
|
|
44 |
9893 |
StringsPreprocess::~StringsPreprocess(){ |
45 |
|
|
46 |
9893 |
} |
47 |
|
|
48 |
4278 |
Node StringsPreprocess::reduce(Node t, |
49 |
|
std::vector<Node>& asserts, |
50 |
|
SkolemCache* sc) |
51 |
|
{ |
52 |
8556 |
Trace("strings-preprocess-debug") |
53 |
4278 |
<< "StringsPreprocess::reduce: " << t << std::endl; |
54 |
4278 |
Node retNode = t; |
55 |
4278 |
NodeManager* nm = NodeManager::currentNM(); |
56 |
4278 |
SkolemManager* sm = nm->getSkolemManager(); |
57 |
8556 |
Node zero = nm->mkConst(Rational(0)); |
58 |
8556 |
Node one = nm->mkConst(Rational(1)); |
59 |
8556 |
Node negOne = nm->mkConst(Rational(-1)); |
60 |
|
|
61 |
4278 |
if( t.getKind() == kind::STRING_SUBSTR ) { |
62 |
|
// processing term: substr( s, n, m ) |
63 |
3328 |
Node s = t[0]; |
64 |
3328 |
Node n = t[1]; |
65 |
3328 |
Node m = t[2]; |
66 |
3328 |
Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); |
67 |
3328 |
Node t12 = nm->mkNode(PLUS, n, m); |
68 |
3328 |
Node lt0 = nm->mkNode(STRING_LENGTH, s); |
69 |
|
//start point is greater than or equal zero |
70 |
3328 |
Node c1 = nm->mkNode(GEQ, n, zero); |
71 |
|
//start point is less than end of string |
72 |
3328 |
Node c2 = nm->mkNode(GT, lt0, n); |
73 |
|
//length is positive |
74 |
3328 |
Node c3 = nm->mkNode(GT, m, zero); |
75 |
3328 |
Node cond = nm->mkNode(AND, c1, c2, c3); |
76 |
|
|
77 |
3328 |
Node emp = Word::mkEmptyWord(t.getType()); |
78 |
|
|
79 |
3328 |
Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre"); |
80 |
3328 |
Node sk2 = sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); |
81 |
3328 |
Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, skt, sk2)); |
82 |
|
//length of first skolem is second argument |
83 |
3328 |
Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n); |
84 |
3328 |
Node lsk2 = nm->mkNode(STRING_LENGTH, sk2); |
85 |
|
// Length of the suffix is either 0 (in the case where m + n > len(s)) or |
86 |
|
// len(s) - n -m |
87 |
|
Node b13 = nm->mkNode( |
88 |
|
OR, |
89 |
3328 |
nm->mkNode(EQUAL, lsk2, nm->mkNode(MINUS, lt0, nm->mkNode(PLUS, n, m))), |
90 |
6656 |
nm->mkNode(EQUAL, lsk2, zero)); |
91 |
|
// Length of the result is at most m |
92 |
3328 |
Node b14 = nm->mkNode(LEQ, nm->mkNode(STRING_LENGTH, skt), m); |
93 |
|
|
94 |
3328 |
Node b1 = nm->mkNode(AND, {b11, b12, b13, b14}); |
95 |
3328 |
Node b2 = skt.eqNode(emp); |
96 |
3328 |
Node lemma = nm->mkNode(ITE, cond, b1, b2); |
97 |
|
|
98 |
|
// assert: |
99 |
|
// IF n >=0 AND len( s ) > n AND m > 0 |
100 |
|
// THEN: s = sk1 ++ skt ++ sk2 AND |
101 |
|
// len( sk1 ) = n AND |
102 |
|
// ( len( sk2 ) = len( s )-(n+m) OR len( sk2 ) = 0 ) AND |
103 |
|
// len( skt ) <= m |
104 |
|
// ELSE: skt = "" |
105 |
|
// |
106 |
|
// Note: The length of sk2 is either 0 (if the n + m is greater or equal to |
107 |
|
// the length of s) or len(s) - (n + m) otherwise. If n + m is greater or |
108 |
|
// equal to the length of s, then len(s) - (n + m) is negative and in |
109 |
|
// conflict with lengths always being positive, so only len(sk2) = 0 may be |
110 |
|
// satisfied. If n + m is less than the length of s, then len(sk2) = 0 |
111 |
|
// cannot be satisfied because we have the constraint that len(skt) <= m, |
112 |
|
// so sk2 must be greater than 0. |
113 |
1664 |
asserts.push_back(lemma); |
114 |
|
|
115 |
|
// Thus, substr( s, n, m ) = skt |
116 |
1664 |
retNode = skt; |
117 |
|
} |
118 |
2614 |
else if (t.getKind() == kind::STRING_UPDATE) |
119 |
|
{ |
120 |
|
// processing term: update( s, n, r ) |
121 |
34 |
Node s = t[0]; |
122 |
34 |
Node n = t[1]; |
123 |
34 |
Node r = t[2]; |
124 |
34 |
Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); |
125 |
34 |
Node ls = nm->mkNode(STRING_LENGTH, s); |
126 |
|
// start point is greater than or equal zero |
127 |
34 |
Node c1 = nm->mkNode(GEQ, n, zero); |
128 |
|
// start point is less than end of string |
129 |
34 |
Node c2 = nm->mkNode(GT, ls, n); |
130 |
34 |
Node cond = nm->mkNode(AND, c1, c2); |
131 |
|
|
132 |
|
// substr(r,0,|s|-n) |
133 |
34 |
Node lens = nm->mkNode(STRING_LENGTH, s); |
134 |
34 |
Node rs; |
135 |
17 |
if (r.isConst() && Word::getLength(r) == 1) |
136 |
|
{ |
137 |
|
// optimization: don't need to take substring for single characters, due |
138 |
|
// to guard on where it is used in the reduction below. |
139 |
2 |
rs = r; |
140 |
|
} |
141 |
|
else |
142 |
|
{ |
143 |
15 |
rs = nm->mkNode(STRING_SUBSTR, r, zero, nm->mkNode(MINUS, lens, n)); |
144 |
|
} |
145 |
34 |
Node rslen = nm->mkNode(STRING_LENGTH, rs); |
146 |
34 |
Node nsuf = nm->mkNode(PLUS, n, rslen); |
147 |
|
// substr(s, n, len(substr(r,0,|s|-n))), which is used for formalizing the |
148 |
|
// purification variable sk3 below. |
149 |
34 |
Node ssubstr = nm->mkNode(STRING_SUBSTR, s, n, rslen); |
150 |
|
|
151 |
34 |
Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre"); |
152 |
|
Node sk2 = |
153 |
34 |
sc->mkSkolemCached(s, nsuf, SkolemCache::SK_SUFFIX_REM, "sssufr"); |
154 |
34 |
Node sk3 = sc->mkSkolemCached(ssubstr, SkolemCache::SK_PURIFY, "ssubstr"); |
155 |
34 |
Node a1 = skt.eqNode(nm->mkNode(STRING_CONCAT, sk1, rs, sk2)); |
156 |
34 |
Node a2 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, sk3, sk2)); |
157 |
|
// length of first skolem is second argument |
158 |
34 |
Node a3 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n); |
159 |
34 |
Node a4 = nm->mkNode(STRING_LENGTH, rs).eqNode(nm->mkNode(STRING_LENGTH, sk3)); |
160 |
|
|
161 |
34 |
Node b1 = nm->mkNode(AND, {a1, a2, a3, a4}); |
162 |
34 |
Node b2 = skt.eqNode(s); |
163 |
34 |
Node lemma = nm->mkNode(ITE, cond, b1, b2); |
164 |
|
|
165 |
|
// assert: |
166 |
|
// IF n >=0 AND n < len( s ) |
167 |
|
// THEN: skt = sk1 ++ substr(r,0,len(s)-n) ++ sk2 AND |
168 |
|
// s = sk1 ++ sk3 ++ sk2 AND |
169 |
|
// len( sk1 ) = n AND |
170 |
|
// len( substr(r,0,len(s)-n) ) = len( sk3 ) |
171 |
|
// ELSE: skt = s |
172 |
|
// We use an optimization where r is used instead of substr(r,0,len(s)-n) |
173 |
|
// if r is a constant of length one. |
174 |
17 |
asserts.push_back(lemma); |
175 |
|
|
176 |
|
// Thus, str.update( s, n, r ) = skt |
177 |
17 |
retNode = skt; |
178 |
|
} |
179 |
2597 |
else if (t.getKind() == kind::STRING_INDEXOF) |
180 |
|
{ |
181 |
|
// processing term: indexof( x, y, n ) |
182 |
284 |
Node x = t[0]; |
183 |
284 |
Node y = t[1]; |
184 |
284 |
Node n = t[2]; |
185 |
|
Node skk = sc->mkTypedSkolemCached( |
186 |
284 |
nm->integerType(), t, SkolemCache::SK_PURIFY, "iok"); |
187 |
|
|
188 |
284 |
Node negone = nm->mkConst(Rational(-1)); |
189 |
|
|
190 |
|
// substr( x, n, len( x ) - n ) |
191 |
|
Node st = nm->mkNode(STRING_SUBSTR, |
192 |
|
x, |
193 |
|
n, |
194 |
284 |
nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, x), n)); |
195 |
|
Node io2 = |
196 |
284 |
sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_PRE, "iopre"); |
197 |
|
Node io4 = |
198 |
284 |
sc->mkSkolemCached(st, y, SkolemCache::SK_FIRST_CTN_POST, "iopost"); |
199 |
|
|
200 |
|
// ~contains( substr( x, n, len( x ) - n ), y ) |
201 |
284 |
Node c11 = nm->mkNode(STRING_CONTAINS, st, y).negate(); |
202 |
|
// n > len( x ) |
203 |
284 |
Node c12 = nm->mkNode(GT, n, nm->mkNode(STRING_LENGTH, x)); |
204 |
|
// 0 > n |
205 |
284 |
Node c13 = nm->mkNode(GT, zero, n); |
206 |
284 |
Node cond1 = nm->mkNode(OR, c11, c12, c13); |
207 |
|
// skk = -1 |
208 |
284 |
Node cc1 = skk.eqNode(negone); |
209 |
|
|
210 |
|
// y = "" |
211 |
284 |
Node emp = Word::mkEmptyWord(x.getType()); |
212 |
284 |
Node cond2 = y.eqNode(emp); |
213 |
|
// skk = n |
214 |
284 |
Node cc2 = skk.eqNode(t[2]); |
215 |
|
|
216 |
|
// substr( x, n, len( x ) - n ) = str.++( io2, y, io4 ) |
217 |
284 |
Node c31 = st.eqNode(nm->mkNode(STRING_CONCAT, io2, y, io4)); |
218 |
|
// ~contains( str.++( io2, substr( y, 0, len( y ) - 1) ), y ) |
219 |
|
Node c32 = |
220 |
568 |
nm->mkNode( |
221 |
|
STRING_CONTAINS, |
222 |
284 |
nm->mkNode( |
223 |
|
STRING_CONCAT, |
224 |
|
io2, |
225 |
284 |
nm->mkNode( |
226 |
|
STRING_SUBSTR, |
227 |
|
y, |
228 |
|
zero, |
229 |
284 |
nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, y), one))), |
230 |
|
y) |
231 |
284 |
.negate(); |
232 |
|
// skk = n + len( io2 ) |
233 |
284 |
Node c33 = skk.eqNode(nm->mkNode(PLUS, n, nm->mkNode(STRING_LENGTH, io2))); |
234 |
284 |
Node cc3 = nm->mkNode(AND, c31, c32, c33); |
235 |
|
|
236 |
|
// assert: |
237 |
|
// IF: ~contains( substr( x, n, len( x ) - n ), y ) OR n > len(x) OR 0 > n |
238 |
|
// THEN: skk = -1 |
239 |
|
// ELIF: y = "" |
240 |
|
// THEN: skk = n |
241 |
|
// ELSE: substr( x, n, len( x ) - n ) = str.++( io2, y, io4 ) ^ |
242 |
|
// ~contains( str.++( io2, substr( y, 0, len( y ) - 1) ), y ) ^ |
243 |
|
// skk = n + len( io2 ) |
244 |
|
// for fresh io2, io4. |
245 |
284 |
Node rr = nm->mkNode(ITE, cond1, cc1, nm->mkNode(ITE, cond2, cc2, cc3)); |
246 |
142 |
asserts.push_back(rr); |
247 |
|
|
248 |
|
// Thus, indexof( x, y, n ) = skk. |
249 |
142 |
retNode = skk; |
250 |
|
} |
251 |
2455 |
else if (t.getKind() == kind::STRING_INDEXOF_RE) |
252 |
|
{ |
253 |
|
// processing term: indexof_re(s, r, n) |
254 |
236 |
Node s = t[0]; |
255 |
236 |
Node r = t[1]; |
256 |
236 |
Node n = t[2]; |
257 |
|
Node skk = sc->mkTypedSkolemCached( |
258 |
236 |
nm->integerType(), t, SkolemCache::SK_PURIFY, "iork"); |
259 |
236 |
Node sLen = nm->mkNode(STRING_LENGTH, s); |
260 |
|
|
261 |
|
// n > len(x) |
262 |
236 |
Node ub = nm->mkNode(GT, n, sLen); |
263 |
|
// 0 > n |
264 |
236 |
Node lb = nm->mkNode(GT, zero, n); |
265 |
|
// n > len(x) OR 0 > n |
266 |
236 |
Node condNegOne = nm->mkNode(OR, ub, lb); |
267 |
|
// skk = -1 |
268 |
236 |
Node retNegOne = skk.eqNode(negOne); |
269 |
|
|
270 |
236 |
Node emp = Word::mkEmptyWord(s.getType()); |
271 |
|
// in_re("", r) |
272 |
236 |
Node matchEmptyStr = nm->mkNode(STRING_IN_REGEXP, emp, r); |
273 |
|
// skk = n |
274 |
236 |
Node retN = skk.eqNode(n); |
275 |
|
|
276 |
236 |
Node i = SkolemCache::mkIndexVar(t); |
277 |
236 |
Node l = SkolemCache::mkLengthVar(t); |
278 |
236 |
Node bvl = nm->mkNode(BOUND_VAR_LIST, i, l); |
279 |
|
Node bound = |
280 |
|
nm->mkNode(AND, |
281 |
|
{ |
282 |
236 |
nm->mkNode(GEQ, i, n), |
283 |
236 |
nm->mkNode(LT, i, nm->mkNode(ITE, retNegOne, sLen, skk)), |
284 |
236 |
nm->mkNode(GT, l, zero), |
285 |
236 |
nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, i)), |
286 |
590 |
}); |
287 |
|
Node body = nm->mkNode( |
288 |
|
OR, |
289 |
236 |
bound.negate(), |
290 |
236 |
nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, i, l), r) |
291 |
472 |
.negate()); |
292 |
|
// forall il. |
293 |
|
// n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i => |
294 |
|
// ~in_re(substr(s, i, l), r) |
295 |
236 |
Node firstMatch = utils::mkForallInternal(bvl, body); |
296 |
236 |
Node bvll = nm->mkNode(BOUND_VAR_LIST, l); |
297 |
|
Node validLen = |
298 |
|
nm->mkNode(AND, |
299 |
236 |
nm->mkNode(GEQ, l, zero), |
300 |
472 |
nm->mkNode(LEQ, l, nm->mkNode(MINUS, sLen, skk))); |
301 |
|
Node matchBody = nm->mkNode( |
302 |
|
AND, |
303 |
|
validLen, |
304 |
236 |
nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, s, skk, l), r)); |
305 |
|
// skk != -1 => |
306 |
|
// exists l. (0 <= l < len(s) - skk) ^ in_re(substr(s, skk, l), r)) |
307 |
|
Node match = |
308 |
|
nm->mkNode(OR, |
309 |
|
retNegOne, |
310 |
236 |
utils::mkForallInternal(bvll, matchBody.negate()).negate()); |
311 |
|
|
312 |
|
// assert: |
313 |
|
// IF: n > len(s) OR 0 > n |
314 |
|
// THEN: skk = -1 |
315 |
|
// ELIF: in_re("", r) |
316 |
|
// THEN: skk = n |
317 |
|
// ELSE: (forall il. |
318 |
|
// n <= i < ite(skk = -1, len(s), skk) ^ 0 < l <= len(s) - i => |
319 |
|
// ~in_re(substr(s, i, l), r)) ^ |
320 |
|
// (skk != -1 => |
321 |
|
// exists l. 0 <= l <= len(s) - skk ^ in_re(substr(s, skk, l), r)) |
322 |
|
// |
323 |
|
// Note that this reduction relies on eager reduction lemmas being sent to |
324 |
|
// properly limit the range of skk. |
325 |
|
Node rr = nm->mkNode( |
326 |
|
ITE, |
327 |
|
condNegOne, |
328 |
|
retNegOne, |
329 |
236 |
nm->mkNode( |
330 |
472 |
ITE, matchEmptyStr, retN, nm->mkNode(AND, firstMatch, match))); |
331 |
118 |
asserts.push_back(rr); |
332 |
|
|
333 |
|
// Thus, indexof_re(s, r, n) = skk. |
334 |
118 |
retNode = skk; |
335 |
|
} |
336 |
2337 |
else if (t.getKind() == STRING_ITOS) |
337 |
|
{ |
338 |
|
// processing term: int.to.str( n ) |
339 |
54 |
Node n = t[0]; |
340 |
54 |
Node itost = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "itost"); |
341 |
54 |
Node leni = nm->mkNode(STRING_LENGTH, itost); |
342 |
|
|
343 |
54 |
std::vector<Node> conc; |
344 |
54 |
std::vector< TypeNode > argTypes; |
345 |
27 |
argTypes.push_back(nm->integerType()); |
346 |
|
Node u = |
347 |
54 |
sm->mkDummySkolem("U", nm->mkFunctionType(argTypes, nm->integerType())); |
348 |
|
|
349 |
54 |
Node lem = nm->mkNode(GEQ, leni, one); |
350 |
27 |
conc.push_back(lem); |
351 |
|
|
352 |
27 |
lem = n.eqNode(nm->mkNode(APPLY_UF, u, leni)); |
353 |
27 |
conc.push_back(lem); |
354 |
|
|
355 |
27 |
lem = zero.eqNode(nm->mkNode(APPLY_UF, u, zero)); |
356 |
27 |
conc.push_back(lem); |
357 |
|
|
358 |
54 |
Node x = SkolemCache::mkIndexVar(t); |
359 |
54 |
Node xPlusOne = nm->mkNode(PLUS, x, one); |
360 |
54 |
Node xbv = nm->mkNode(BOUND_VAR_LIST, x); |
361 |
54 |
Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, leni)); |
362 |
54 |
Node sx = nm->mkNode(STRING_SUBSTR, itost, x, one); |
363 |
54 |
Node ux = nm->mkNode(APPLY_UF, u, x); |
364 |
54 |
Node ux1 = nm->mkNode(APPLY_UF, u, xPlusOne); |
365 |
54 |
Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0"))); |
366 |
54 |
Node c = nm->mkNode(MINUS, nm->mkNode(STRING_TO_CODE, sx), c0); |
367 |
|
|
368 |
54 |
Node ten = nm->mkConst(Rational(10)); |
369 |
54 |
Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux))); |
370 |
|
Node leadingZeroPos = |
371 |
54 |
nm->mkNode(AND, x.eqNode(zero), nm->mkNode(GT, leni, one)); |
372 |
|
Node cb = nm->mkNode( |
373 |
|
AND, |
374 |
54 |
nm->mkNode(GEQ, c, nm->mkNode(ITE, leadingZeroPos, one, zero)), |
375 |
108 |
nm->mkNode(LT, c, ten)); |
376 |
|
|
377 |
54 |
Node ux1lem = nm->mkNode(GEQ, n, ux1); |
378 |
|
|
379 |
27 |
lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eq, cb, ux1lem)); |
380 |
27 |
lem = utils::mkForallInternal(xbv, lem); |
381 |
27 |
conc.push_back(lem); |
382 |
|
|
383 |
54 |
Node nonneg = nm->mkNode(GEQ, n, zero); |
384 |
|
|
385 |
54 |
Node emp = Word::mkEmptyWord(t.getType()); |
386 |
27 |
lem = nm->mkNode(ITE, nonneg, nm->mkNode(AND, conc), itost.eqNode(emp)); |
387 |
27 |
asserts.push_back(lem); |
388 |
|
// assert: |
389 |
|
// IF n>=0 |
390 |
|
// THEN: |
391 |
|
// len( itost ) >= 1 ^ |
392 |
|
// n = U( len( itost ) ) ^ U( 0 ) = 0 ^ |
393 |
|
// forall x. (x>=0 ^ x < str.len(itost)) => |
394 |
|
// U( x+1 ) = (str.code( str.at(itost, x) )-48) + 10*U( x ) ^ |
395 |
|
// ite( x=0 AND str.len(itost)>1, 49, 48 ) <= |
396 |
|
// str.code( str.at(itost, x) ) < 58 ^ |
397 |
|
// n >= U(x + 1) |
398 |
|
// ELSE |
399 |
|
// itost = "" |
400 |
|
// thus: |
401 |
|
// int.to.str( n ) = itost |
402 |
|
// |
403 |
|
// Note: The conjunct `n >= U(x + 1)` is not needed for correctness but is |
404 |
|
// just an optimization. |
405 |
|
|
406 |
|
// The function U is an accumulator, where U( x ) for x>0 is the value of |
407 |
|
// str.to.int( str.substr( int.to.str( n ), 0, x ) ). For example, for |
408 |
|
// n=345, we have that U(1), U(2), U(3) = 3, 34, 345. |
409 |
|
// Above, we use str.code to map characters to their integer value, where |
410 |
|
// note that str.code( "0" ) = 48. Further notice that |
411 |
|
// ite( x=0 AND str.len(itost)>1, 49, 48 ) |
412 |
|
// enforces that int.to.str( n ) has no leading zeroes. |
413 |
27 |
retNode = itost; |
414 |
2310 |
} else if( t.getKind() == kind::STRING_STOI ) { |
415 |
54 |
Node s = t[0]; |
416 |
|
Node stoit = sc->mkTypedSkolemCached( |
417 |
54 |
nm->integerType(), t, SkolemCache::SK_PURIFY, "stoit"); |
418 |
54 |
Node lens = nm->mkNode(STRING_LENGTH, s); |
419 |
|
|
420 |
54 |
std::vector<Node> conc1; |
421 |
54 |
Node lem = stoit.eqNode(negOne); |
422 |
27 |
conc1.push_back(lem); |
423 |
|
|
424 |
54 |
Node emp = Word::mkEmptyWord(s.getType()); |
425 |
54 |
Node sEmpty = s.eqNode(emp); |
426 |
54 |
Node k = sm->mkDummySkolem("k", nm->integerType()); |
427 |
54 |
Node kc1 = nm->mkNode(GEQ, k, zero); |
428 |
54 |
Node kc2 = nm->mkNode(LT, k, lens); |
429 |
54 |
Node c0 = nm->mkNode(STRING_TO_CODE, nm->mkConst(String("0"))); |
430 |
|
Node codeSk = nm->mkNode( |
431 |
|
MINUS, |
432 |
54 |
nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, s, k, one)), |
433 |
108 |
c0); |
434 |
54 |
Node ten = nm->mkConst(Rational(10)); |
435 |
|
Node kc3 = nm->mkNode( |
436 |
54 |
OR, nm->mkNode(LT, codeSk, zero), nm->mkNode(GEQ, codeSk, ten)); |
437 |
27 |
conc1.push_back(nm->mkNode(OR, sEmpty, nm->mkNode(AND, kc1, kc2, kc3))); |
438 |
|
|
439 |
54 |
std::vector<Node> conc2; |
440 |
54 |
std::vector< TypeNode > argTypes; |
441 |
27 |
argTypes.push_back(nm->integerType()); |
442 |
|
Node u = |
443 |
54 |
sm->mkDummySkolem("U", nm->mkFunctionType(argTypes, nm->integerType())); |
444 |
|
|
445 |
27 |
lem = stoit.eqNode(nm->mkNode(APPLY_UF, u, lens)); |
446 |
27 |
conc2.push_back(lem); |
447 |
|
|
448 |
27 |
lem = zero.eqNode(nm->mkNode(APPLY_UF, u, zero)); |
449 |
27 |
conc2.push_back(lem); |
450 |
|
|
451 |
27 |
lem = nm->mkNode(GT, lens, zero); |
452 |
27 |
conc2.push_back(lem); |
453 |
|
|
454 |
54 |
Node x = SkolemCache::mkIndexVar(t); |
455 |
54 |
Node xbv = nm->mkNode(BOUND_VAR_LIST, x); |
456 |
54 |
Node g = nm->mkNode(AND, nm->mkNode(GEQ, x, zero), nm->mkNode(LT, x, lens)); |
457 |
54 |
Node sx = nm->mkNode(STRING_SUBSTR, s, x, one); |
458 |
54 |
Node ux = nm->mkNode(APPLY_UF, u, x); |
459 |
54 |
Node ux1 = nm->mkNode(APPLY_UF, u, nm->mkNode(PLUS, x, one)); |
460 |
54 |
Node c = nm->mkNode(MINUS, nm->mkNode(STRING_TO_CODE, sx), c0); |
461 |
|
|
462 |
54 |
Node eq = ux1.eqNode(nm->mkNode(PLUS, c, nm->mkNode(MULT, ten, ux))); |
463 |
54 |
Node cb = nm->mkNode(AND, nm->mkNode(GEQ, c, zero), nm->mkNode(LT, c, ten)); |
464 |
|
|
465 |
54 |
Node ux1lem = nm->mkNode(GEQ, stoit, ux1); |
466 |
|
|
467 |
27 |
lem = nm->mkNode(OR, g.negate(), nm->mkNode(AND, eq, cb, ux1lem)); |
468 |
27 |
lem = utils::mkForallInternal(xbv, lem); |
469 |
27 |
conc2.push_back(lem); |
470 |
|
|
471 |
54 |
Node sneg = nm->mkNode(LT, stoit, zero); |
472 |
27 |
lem = nm->mkNode(ITE, sneg, nm->mkNode(AND, conc1), nm->mkNode(AND, conc2)); |
473 |
27 |
asserts.push_back(lem); |
474 |
|
|
475 |
|
// assert: |
476 |
|
// IF stoit < 0 |
477 |
|
// THEN: |
478 |
|
// stoit = -1 ^ |
479 |
|
// ( s = "" OR |
480 |
|
// ( k>=0 ^ k<len( s ) ^ ( str.code( str.at(s, k) ) < 48 OR |
481 |
|
// str.code( str.at(s, k) ) >= 58 ))) |
482 |
|
// ELSE: |
483 |
|
// stoit = U( len( s ) ) ^ U( 0 ) = 0 ^ |
484 |
|
// str.len( s ) > 0 ^ |
485 |
|
// forall x. (x>=0 ^ x < str.len(s)) => |
486 |
|
// U( x+1 ) = ( str.code( str.at(s, x) ) - 48 ) + 10*U( x ) ^ |
487 |
|
// 48 <= str.code( str.at(s, x) ) < 58 ^ |
488 |
|
// stoit >= U( x+1 ) |
489 |
|
// Thus, str.to.int( s ) = stoit |
490 |
|
// |
491 |
|
// Note: The conjunct `stoit >= U( x+1 )` is not needed for correctness but |
492 |
|
// is just an optimization. |
493 |
|
|
494 |
27 |
retNode = stoit; |
495 |
|
} |
496 |
2283 |
else if (t.getKind() == kind::SEQ_NTH) |
497 |
|
{ |
498 |
|
// processing term: str.nth( s, n) |
499 |
|
// similar to substr. |
500 |
112 |
Node s = t[0]; |
501 |
112 |
Node n = t[1]; |
502 |
112 |
Node skt = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "sst"); |
503 |
112 |
Node t12 = nm->mkNode(PLUS, n, one); |
504 |
112 |
Node lt0 = nm->mkNode(STRING_LENGTH, s); |
505 |
|
// start point is greater than or equal zero |
506 |
112 |
Node c1 = nm->mkNode(GEQ, n, zero); |
507 |
|
// start point is less than end of string |
508 |
112 |
Node c2 = nm->mkNode(GT, lt0, n); |
509 |
|
// check whether this application of seq.nth is defined. |
510 |
112 |
Node cond = nm->mkNode(AND, c1, c2); |
511 |
|
|
512 |
|
// nodes for the case where `seq.nth` is defined. |
513 |
112 |
Node sk1 = sc->mkSkolemCached(s, n, SkolemCache::SK_PREFIX, "sspre"); |
514 |
112 |
Node sk2 = sc->mkSkolemCached(s, t12, SkolemCache::SK_SUFFIX_REM, "sssufr"); |
515 |
112 |
Node unit = nm->mkNode(SEQ_UNIT, skt); |
516 |
112 |
Node b11 = s.eqNode(nm->mkNode(STRING_CONCAT, sk1, unit, sk2)); |
517 |
|
// length of first skolem is second argument |
518 |
112 |
Node b12 = nm->mkNode(STRING_LENGTH, sk1).eqNode(n); |
519 |
112 |
Node lsk2 = nm->mkNode(STRING_LENGTH, sk2); |
520 |
112 |
Node b13 = nm->mkNode(EQUAL, lsk2, nm->mkNode(MINUS, lt0, t12)); |
521 |
112 |
Node b1 = nm->mkNode(AND, b11, b12, b13); |
522 |
|
|
523 |
|
// nodes for the case where `seq.nth` is undefined. |
524 |
112 |
Node uf = SkolemCache::mkSkolemSeqNth(s.getType(), "Uf"); |
525 |
112 |
Node b2 = nm->mkNode(EQUAL, skt, nm->mkNode(APPLY_UF, uf, s, n)); |
526 |
|
|
527 |
|
// the full ite, split on definedness of `seq.nth` |
528 |
112 |
Node lemma = nm->mkNode(ITE, cond, b1, b2); |
529 |
|
|
530 |
|
// assert: |
531 |
|
// IF n >=0 AND n < len( s ) |
532 |
|
// THEN: s = sk1 ++ unit(skt) ++ sk2 AND |
533 |
|
// len( sk1 ) = n AND |
534 |
|
// ( len( sk2 ) = len( s )- (n+1) |
535 |
|
// ELSE: skt = Uf(s, n), where Uf is a cached skolem function. |
536 |
56 |
asserts.push_back(lemma); |
537 |
56 |
retNode = skt; |
538 |
|
} |
539 |
2227 |
else if (t.getKind() == kind::STRING_REPLACE) |
540 |
|
{ |
541 |
|
// processing term: replace( x, y, z ) |
542 |
502 |
Node x = t[0]; |
543 |
502 |
Node y = t[1]; |
544 |
502 |
Node z = t[2]; |
545 |
502 |
TypeNode tn = t[0].getType(); |
546 |
|
Node rp1 = |
547 |
502 |
sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_PRE, "rfcpre"); |
548 |
|
Node rp2 = |
549 |
502 |
sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_CTN_POST, "rfcpost"); |
550 |
502 |
Node rpw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpw"); |
551 |
|
|
552 |
|
// y = "" |
553 |
502 |
Node emp = Word::mkEmptyWord(tn); |
554 |
502 |
Node cond1 = y.eqNode(emp); |
555 |
|
// rpw = str.++( z, x ) |
556 |
502 |
Node c1 = rpw.eqNode(nm->mkNode(kind::STRING_CONCAT, z, x)); |
557 |
|
|
558 |
|
// contains( x, y ) |
559 |
502 |
Node cond2 = nm->mkNode(kind::STRING_CONTAINS, x, y); |
560 |
|
// x = str.++( rp1, y, rp2 ) |
561 |
502 |
Node c21 = x.eqNode(nm->mkNode(kind::STRING_CONCAT, rp1, y, rp2)); |
562 |
|
// rpw = str.++( rp1, z, rp2 ) |
563 |
502 |
Node c22 = rpw.eqNode(nm->mkNode(kind::STRING_CONCAT, rp1, z, rp2)); |
564 |
|
// ~contains( str.++( rp1, substr( y, 0, len(y)-1 ) ), y ) |
565 |
|
Node c23 = |
566 |
1004 |
nm->mkNode(kind::STRING_CONTAINS, |
567 |
502 |
nm->mkNode( |
568 |
|
kind::STRING_CONCAT, |
569 |
|
rp1, |
570 |
502 |
nm->mkNode(kind::STRING_SUBSTR, |
571 |
|
y, |
572 |
|
zero, |
573 |
1004 |
nm->mkNode(kind::MINUS, |
574 |
502 |
nm->mkNode(kind::STRING_LENGTH, y), |
575 |
|
one))), |
576 |
|
y) |
577 |
502 |
.negate(); |
578 |
|
|
579 |
|
// assert: |
580 |
|
// IF y="" |
581 |
|
// THEN: rpw = str.++( z, x ) |
582 |
|
// ELIF: contains( x, y ) |
583 |
|
// THEN: x = str.++( rp1, y, rp2 ) ^ |
584 |
|
// rpw = str.++( rp1, z, rp2 ) ^ |
585 |
|
// ~contains( str.++( rp1, substr( y, 0, len(y)-1 ) ), y ), |
586 |
|
// ELSE: rpw = x |
587 |
|
// for fresh rp1, rp2, rpw |
588 |
|
Node rr = nm->mkNode(kind::ITE, |
589 |
|
cond1, |
590 |
|
c1, |
591 |
1004 |
nm->mkNode(kind::ITE, |
592 |
|
cond2, |
593 |
502 |
nm->mkNode(kind::AND, c21, c22, c23), |
594 |
1004 |
rpw.eqNode(x))); |
595 |
251 |
asserts.push_back(rr); |
596 |
|
|
597 |
|
// Thus, replace( x, y, z ) = rpw. |
598 |
251 |
retNode = rpw; |
599 |
|
} |
600 |
1976 |
else if (t.getKind() == kind::STRING_REPLACE_ALL) |
601 |
|
{ |
602 |
|
// processing term: replaceall( x, y, z ) |
603 |
28 |
Node x = t[0]; |
604 |
28 |
Node y = t[1]; |
605 |
28 |
Node z = t[2]; |
606 |
28 |
Node rpaw = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "rpaw"); |
607 |
|
|
608 |
|
Node numOcc = sc->mkTypedSkolemCached( |
609 |
28 |
nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc"); |
610 |
28 |
std::vector<TypeNode> argTypes; |
611 |
14 |
argTypes.push_back(nm->integerType()); |
612 |
|
Node us = |
613 |
28 |
sm->mkDummySkolem("Us", nm->mkFunctionType(argTypes, nm->stringType())); |
614 |
28 |
TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType()); |
615 |
|
Node uf = sc->mkTypedSkolemCached( |
616 |
28 |
ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf"); |
617 |
|
|
618 |
28 |
Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); |
619 |
28 |
Node usno = nm->mkNode(APPLY_UF, us, numOcc); |
620 |
28 |
Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x)); |
621 |
|
|
622 |
28 |
std::vector<Node> lem; |
623 |
14 |
lem.push_back(nm->mkNode(GEQ, numOcc, zero)); |
624 |
14 |
lem.push_back(rpaw.eqNode(nm->mkNode(APPLY_UF, us, zero))); |
625 |
14 |
lem.push_back(usno.eqNode(rem)); |
626 |
14 |
lem.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero)); |
627 |
14 |
lem.push_back(nm->mkNode(STRING_INDEXOF, x, y, ufno).eqNode(negOne)); |
628 |
|
|
629 |
28 |
Node i = SkolemCache::mkIndexVar(t); |
630 |
28 |
Node bvli = nm->mkNode(BOUND_VAR_LIST, i); |
631 |
|
Node bound = |
632 |
28 |
nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc)); |
633 |
28 |
Node ufi = nm->mkNode(APPLY_UF, uf, i); |
634 |
28 |
Node ufip1 = nm->mkNode(APPLY_UF, uf, nm->mkNode(PLUS, i, one)); |
635 |
28 |
Node ii = nm->mkNode(STRING_INDEXOF, x, y, ufi); |
636 |
|
Node cc = nm->mkNode( |
637 |
|
STRING_CONCAT, |
638 |
28 |
nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)), |
639 |
|
z, |
640 |
56 |
nm->mkNode(APPLY_UF, us, nm->mkNode(PLUS, i, one))); |
641 |
|
|
642 |
28 |
std::vector<Node> flem; |
643 |
14 |
flem.push_back(ii.eqNode(negOne).negate()); |
644 |
14 |
flem.push_back(nm->mkNode(APPLY_UF, us, i).eqNode(cc)); |
645 |
14 |
flem.push_back( |
646 |
28 |
ufip1.eqNode(nm->mkNode(PLUS, ii, nm->mkNode(STRING_LENGTH, y)))); |
647 |
|
|
648 |
28 |
Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem)); |
649 |
28 |
Node q = utils::mkForallInternal(bvli, body); |
650 |
14 |
lem.push_back(q); |
651 |
|
|
652 |
|
// assert: |
653 |
|
// IF y="" |
654 |
|
// THEN: rpaw = x |
655 |
|
// ELSE: |
656 |
|
// numOcc >= 0 ^ |
657 |
|
// rpaw = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc), len(x)) ^ |
658 |
|
// Uf(0) = 0 ^ indexof( x, y, Uf(numOcc) ) = -1 ^ |
659 |
|
// forall i. 0 <= i < numOcc => |
660 |
|
// ii != -1 ^ |
661 |
|
// Us( i ) = str.substr( x, Uf(i), ii - Uf(i) ) ++ z ++ Us(i+1) ^ |
662 |
|
// Uf( i+1 ) = ii + len(y) |
663 |
|
// where ii == indexof( x, y, Uf(i) ) |
664 |
|
|
665 |
|
// Conceptually, numOcc is the number of occurrences of y in x, Uf( i ) is |
666 |
|
// the index to begin searching in x for y after the i^th occurrence of y in |
667 |
|
// x, and Us( i ) is the result of processing the remainder after processing |
668 |
|
// the i^th occurrence of y in x. |
669 |
28 |
Node emp = Word::mkEmptyWord(t.getType()); |
670 |
|
Node assert = |
671 |
28 |
nm->mkNode(ITE, y.eqNode(emp), rpaw.eqNode(x), nm->mkNode(AND, lem)); |
672 |
14 |
asserts.push_back(assert); |
673 |
|
|
674 |
|
// Thus, replaceall( x, y, z ) = rpaw |
675 |
14 |
retNode = rpaw; |
676 |
|
} |
677 |
1962 |
else if (t.getKind() == STRING_REPLACE_RE) |
678 |
|
{ |
679 |
|
// processing term: replace_re( x, y, z ) |
680 |
78 |
Node x = t[0]; |
681 |
78 |
Node y = t[1]; |
682 |
78 |
Node z = t[2]; |
683 |
78 |
Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k"); |
684 |
|
|
685 |
|
// indexof_re(x, y, 0) |
686 |
78 |
Node idx = nm->mkNode(STRING_INDEXOF_RE, x, y, zero); |
687 |
|
|
688 |
|
// in_re("", y) |
689 |
|
Node matchesEmpty = |
690 |
78 |
nm->mkNode(STRING_IN_REGEXP, nm->mkConst(String("")), y); |
691 |
|
// k = z ++ x |
692 |
78 |
Node res1 = k.eqNode(nm->mkNode(STRING_CONCAT, z, x)); |
693 |
|
|
694 |
|
Node k1 = |
695 |
78 |
sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_PRE, "rre_pre"); |
696 |
|
Node k2 = |
697 |
78 |
sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH, "rre_match"); |
698 |
|
Node k3 = |
699 |
78 |
sc->mkSkolemCached(x, y, SkolemCache::SK_FIRST_MATCH_POST, "rre_post"); |
700 |
78 |
Node k2Len = nm->mkNode(STRING_LENGTH, k2); |
701 |
|
// x = k1 ++ k2 ++ k3 |
702 |
78 |
Node split = x.eqNode(nm->mkNode(STRING_CONCAT, k1, k2, k3)); |
703 |
|
// len(k1) = indexof_re(x, y, 0) |
704 |
78 |
Node k1Len = nm->mkNode(STRING_LENGTH, k1).eqNode(idx); |
705 |
78 |
Node l = SkolemCache::mkLengthVar(t); |
706 |
78 |
Node bvll = nm->mkNode(BOUND_VAR_LIST, l); |
707 |
|
Node bound = |
708 |
78 |
nm->mkNode(AND, nm->mkNode(LEQ, zero, l), nm->mkNode(LT, l, k2Len)); |
709 |
|
Node body = nm->mkNode( |
710 |
|
OR, |
711 |
78 |
bound.negate(), |
712 |
78 |
nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, k2, zero, l), y) |
713 |
156 |
.negate()); |
714 |
|
// forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r) |
715 |
78 |
Node shortestMatch = utils::mkForallInternal(bvll, body); |
716 |
|
// in_re(k2, y) |
717 |
78 |
Node match = nm->mkNode(STRING_IN_REGEXP, k2, y); |
718 |
|
// k = k1 ++ z ++ k3 |
719 |
78 |
Node res = k.eqNode(nm->mkNode(STRING_CONCAT, k1, z, k3)); |
720 |
|
|
721 |
|
// IF: indexof_re(x, y, 0) = -1 |
722 |
|
// THEN: k = x |
723 |
|
// ELSE: |
724 |
|
// x = k1 ++ k2 ++ k3 ^ |
725 |
|
// len(k1) = indexof_re(x, y, 0) ^ |
726 |
|
// (forall l. 0 <= l < len(k2) => ~in_re(substr(k2, 0, l), r)) ^ |
727 |
|
// in_re(k2, y) ^ |
728 |
|
// k = k1 ++ z ++ k3 |
729 |
39 |
asserts.push_back( |
730 |
234 |
nm->mkNode(ITE, |
731 |
78 |
idx.eqNode(negOne), |
732 |
78 |
k.eqNode(x), |
733 |
78 |
nm->mkNode(AND, {split, k1Len, shortestMatch, match, res}))); |
734 |
39 |
retNode = k; |
735 |
|
} |
736 |
1923 |
else if (t.getKind() == STRING_REPLACE_RE_ALL) |
737 |
|
{ |
738 |
50 |
Node x = t[0]; |
739 |
50 |
Node y = t[1]; |
740 |
50 |
Node z = t[2]; |
741 |
50 |
Node k = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "k"); |
742 |
|
|
743 |
|
Node numOcc = sc->mkTypedSkolemCached( |
744 |
50 |
nm->integerType(), x, y, SkolemCache::SK_NUM_OCCUR, "numOcc"); |
745 |
50 |
std::vector<TypeNode> argTypes; |
746 |
25 |
argTypes.push_back(nm->integerType()); |
747 |
|
Node us = |
748 |
50 |
sm->mkDummySkolem("Us", nm->mkFunctionType(argTypes, t.getType())); |
749 |
50 |
TypeNode ufType = nm->mkFunctionType(argTypes, nm->integerType()); |
750 |
|
Node uf = sc->mkTypedSkolemCached( |
751 |
50 |
ufType, x, y, SkolemCache::SK_OCCUR_INDEX, "Uf"); |
752 |
|
Node ul = |
753 |
50 |
sc->mkTypedSkolemCached(ufType, x, y, SkolemCache::SK_OCCUR_LEN, "Ul"); |
754 |
|
|
755 |
50 |
Node emp = Word::mkEmptyWord(t.getType()); |
756 |
|
|
757 |
50 |
Node yp = nm->mkNode(REGEXP_DIFF, y, nm->mkNode(STRING_TO_REGEXP, emp)); |
758 |
|
// indexof_re(x, y', 0) = -1 |
759 |
50 |
Node noMatch = nm->mkNode(STRING_INDEXOF_RE, x, yp, zero).eqNode(negOne); |
760 |
|
|
761 |
50 |
Node ufno = nm->mkNode(APPLY_UF, uf, numOcc); |
762 |
50 |
Node usno = nm->mkNode(APPLY_UF, us, numOcc); |
763 |
50 |
Node rem = nm->mkNode(STRING_SUBSTR, x, ufno, nm->mkNode(STRING_LENGTH, x)); |
764 |
|
|
765 |
50 |
std::vector<Node> lemmas; |
766 |
|
// numOcc > 0 |
767 |
25 |
lemmas.push_back(nm->mkNode(GT, numOcc, zero)); |
768 |
|
// k = Us(0) |
769 |
25 |
lemmas.push_back(k.eqNode(nm->mkNode(APPLY_UF, us, zero))); |
770 |
|
// Us(numOcc) = substr(x, Uf(numOcc)) |
771 |
25 |
lemmas.push_back(usno.eqNode(rem)); |
772 |
|
// Uf(0) = 0 |
773 |
25 |
lemmas.push_back(nm->mkNode(APPLY_UF, uf, zero).eqNode(zero)); |
774 |
|
// indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 |
775 |
25 |
lemmas.push_back( |
776 |
50 |
nm->mkNode(STRING_INDEXOF_RE, rem, yp, zero).eqNode(negOne)); |
777 |
|
|
778 |
50 |
Node i = SkolemCache::mkIndexVar(t); |
779 |
50 |
Node bvli = nm->mkNode(BOUND_VAR_LIST, i); |
780 |
|
Node bound = |
781 |
50 |
nm->mkNode(AND, nm->mkNode(GEQ, i, zero), nm->mkNode(LT, i, numOcc)); |
782 |
50 |
Node ip1 = nm->mkNode(PLUS, i, one); |
783 |
50 |
Node ufi = nm->mkNode(APPLY_UF, uf, i); |
784 |
50 |
Node ufip1 = nm->mkNode(APPLY_UF, uf, ip1); |
785 |
50 |
Node ulip1 = nm->mkNode(APPLY_UF, ul, ip1); |
786 |
|
// ii = Uf(i + 1) - Ul(i + 1) |
787 |
50 |
Node ii = nm->mkNode(MINUS, ufip1, ulip1); |
788 |
|
|
789 |
50 |
std::vector<Node> flem; |
790 |
|
// Ul(i + 1) > 0 |
791 |
25 |
flem.push_back(nm->mkNode(GT, ulip1, zero)); |
792 |
|
// Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) |
793 |
25 |
flem.push_back(ufip1.eqNode( |
794 |
50 |
nm->mkNode(PLUS, nm->mkNode(STRING_INDEXOF_RE, x, yp, ufi), ulip1))); |
795 |
|
// in_re(substr(x, ii, Ul(i + 1)), y') |
796 |
75 |
flem.push_back(nm->mkNode( |
797 |
50 |
STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, ulip1), yp)); |
798 |
50 |
Node l = SkolemCache::mkLengthVar(t); |
799 |
50 |
Node bvll = nm->mkNode(BOUND_VAR_LIST, l); |
800 |
|
Node lenBound = |
801 |
50 |
nm->mkNode(AND, nm->mkNode(LT, zero, l), nm->mkNode(LT, l, ulip1)); |
802 |
|
Node shortestMatchBody = nm->mkNode( |
803 |
|
OR, |
804 |
50 |
lenBound.negate(), |
805 |
50 |
nm->mkNode(STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, ii, l), y) |
806 |
100 |
.negate()); |
807 |
|
// forall l. 0 < l < Ul(i + 1) => |
808 |
|
// ~in_re(substr(x, Uf(i + 1) - Ul(i + 1), l), y') |
809 |
25 |
flem.push_back(utils::mkForallInternal(bvll, shortestMatchBody)); |
810 |
|
Node pfxMatch = |
811 |
50 |
nm->mkNode(STRING_SUBSTR, x, ufi, nm->mkNode(MINUS, ii, ufi)); |
812 |
|
// Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) |
813 |
25 |
flem.push_back( |
814 |
50 |
nm->mkNode(APPLY_UF, us, i) |
815 |
50 |
.eqNode(nm->mkNode( |
816 |
50 |
STRING_CONCAT, pfxMatch, z, nm->mkNode(APPLY_UF, us, ip1)))); |
817 |
50 |
Node body = nm->mkNode(OR, bound.negate(), nm->mkNode(AND, flem)); |
818 |
50 |
Node forall = utils::mkForallInternal(bvli, body); |
819 |
25 |
lemmas.push_back(forall); |
820 |
|
|
821 |
|
// IF indexof(x, y', 0) = -1 |
822 |
|
// THEN: k = x |
823 |
|
// ELSE: |
824 |
|
// numOcc > 0 ^ |
825 |
|
// k = Us(0) ^ Us(numOcc) = substr(x, Uf(numOcc)) ^ |
826 |
|
// Uf(0) = 0 ^ indexof_re(substr(x, Uf(numOcc)), y', 0) = -1 ^ |
827 |
|
// forall i. 0 <= i < nummOcc => |
828 |
|
// Ul(i + 1) > 0 ^ |
829 |
|
// Uf(i + 1) = indexof_re(x, yp, Uf(i)) + Ul(i + 1) ^ |
830 |
|
// in_re(substr(x, ii, Ul(i + 1)), y') ^ |
831 |
|
// forall l. 0 < l < Ul(i + 1) => |
832 |
|
// ~in_re(substr(x, ii, l), y') |
833 |
|
// Us(i) = substr(x, Uf(i), ii - Uf(i)) ++ z ++ Us(i + 1) |
834 |
|
// where ii = Uf(i + 1) - Ul(i + 1) |
835 |
|
// where y' = re.diff(y, "") |
836 |
|
// |
837 |
|
// Conceptually, y' is the regex y but excluding the empty string (because |
838 |
|
// we do not want to match empty strings), numOcc is the number of shortest |
839 |
|
// matches of y' in x, Uf(i) is the end position of the i-th match, Ul(i) |
840 |
|
// is the length of the i^th match, and Us(i) is the result of processing |
841 |
|
// the remainder after processing the i^th occurrence of y in x. |
842 |
|
// |
843 |
|
// Visualization of Uf(i) and Ul(i): |
844 |
|
// |
845 |
|
// x = |------------| match 1 |-----------| match 2 |---| |
846 |
|
// | | | |
847 |
|
// Uf(0) Uf(1) Uf(2) |
848 |
|
// |
849 |
|
// |---------| |-----------| |
850 |
|
// Ul(1) Ul(2) |
851 |
25 |
asserts.push_back( |
852 |
50 |
nm->mkNode(ITE, noMatch, k.eqNode(x), nm->mkNode(AND, lemmas))); |
853 |
25 |
retNode = k; |
854 |
|
} |
855 |
1898 |
else if (t.getKind() == STRING_TOLOWER || t.getKind() == STRING_TOUPPER) |
856 |
|
{ |
857 |
12 |
Node x = t[0]; |
858 |
12 |
Node r = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); |
859 |
|
|
860 |
12 |
Node lenx = nm->mkNode(STRING_LENGTH, x); |
861 |
12 |
Node lenr = nm->mkNode(STRING_LENGTH, r); |
862 |
12 |
Node eqLenA = lenx.eqNode(lenr); |
863 |
|
|
864 |
12 |
Node i = SkolemCache::mkIndexVar(t); |
865 |
12 |
Node bvi = nm->mkNode(BOUND_VAR_LIST, i); |
866 |
|
|
867 |
12 |
Node ci = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, x, i, one)); |
868 |
12 |
Node ri = nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, r, i, one)); |
869 |
|
|
870 |
12 |
Node lb = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 97 : 65)); |
871 |
12 |
Node ub = nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? 122 : 90)); |
872 |
|
Node offset = |
873 |
12 |
nm->mkConst(Rational(t.getKind() == STRING_TOUPPER ? -32 : 32)); |
874 |
|
|
875 |
|
Node res = nm->mkNode( |
876 |
|
ITE, |
877 |
12 |
nm->mkNode(AND, nm->mkNode(LEQ, lb, ci), nm->mkNode(LEQ, ci, ub)), |
878 |
12 |
nm->mkNode(PLUS, ci, offset), |
879 |
36 |
ci); |
880 |
|
|
881 |
|
Node bound = |
882 |
12 |
nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr)); |
883 |
12 |
Node body = nm->mkNode(OR, bound.negate(), ri.eqNode(res)); |
884 |
12 |
Node rangeA = utils::mkForallInternal(bvi, body); |
885 |
|
|
886 |
|
// upper 65 ... 90 |
887 |
|
// lower 97 ... 122 |
888 |
|
// assert: |
889 |
|
// len(r) = len(x) ^ |
890 |
|
// forall i. 0 <= i < len(r) => |
891 |
|
// str.code( str.substr(r,i,1) ) = ite( 97 <= ci <= 122, ci-32, ci) |
892 |
|
// where ci = str.code( str.substr(x,i,1) ) |
893 |
12 |
Node assert = nm->mkNode(AND, eqLenA, rangeA); |
894 |
6 |
asserts.push_back(assert); |
895 |
|
|
896 |
|
// Thus, toLower( x ) = r |
897 |
6 |
retNode = r; |
898 |
|
} |
899 |
1892 |
else if (t.getKind() == STRING_REV) |
900 |
|
{ |
901 |
36 |
Node x = t[0]; |
902 |
36 |
Node r = sc->mkSkolemCached(t, SkolemCache::SK_PURIFY, "r"); |
903 |
|
|
904 |
36 |
Node lenx = nm->mkNode(STRING_LENGTH, x); |
905 |
36 |
Node lenr = nm->mkNode(STRING_LENGTH, r); |
906 |
36 |
Node eqLenA = lenx.eqNode(lenr); |
907 |
|
|
908 |
36 |
Node i = SkolemCache::mkIndexVar(t); |
909 |
36 |
Node bvi = nm->mkNode(BOUND_VAR_LIST, i); |
910 |
|
|
911 |
|
Node revi = nm->mkNode( |
912 |
36 |
MINUS, nm->mkNode(STRING_LENGTH, x), nm->mkNode(PLUS, i, one)); |
913 |
36 |
Node ssr = nm->mkNode(STRING_SUBSTR, r, i, one); |
914 |
36 |
Node ssx = nm->mkNode(STRING_SUBSTR, x, revi, one); |
915 |
|
|
916 |
|
Node bound = |
917 |
36 |
nm->mkNode(AND, nm->mkNode(LEQ, zero, i), nm->mkNode(LT, i, lenr)); |
918 |
36 |
Node body = nm->mkNode(OR, bound.negate(), ssr.eqNode(ssx)); |
919 |
36 |
Node rangeA = utils::mkForallInternal(bvi, body); |
920 |
|
// assert: |
921 |
|
// len(r) = len(x) ^ |
922 |
|
// forall i. 0 <= i < len(r) => |
923 |
|
// substr(r,i,1) = substr(x,len(x)-(i+1),1) |
924 |
36 |
Node assert = nm->mkNode(AND, eqLenA, rangeA); |
925 |
18 |
asserts.push_back(assert); |
926 |
|
|
927 |
|
// Thus, (str.rev x) = r |
928 |
18 |
retNode = r; |
929 |
|
} |
930 |
1874 |
else if (t.getKind() == kind::STRING_CONTAINS) |
931 |
|
{ |
932 |
256 |
Node x = t[0]; |
933 |
256 |
Node s = t[1]; |
934 |
|
//negative contains reduces to existential |
935 |
256 |
Node lenx = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, x); |
936 |
256 |
Node lens = NodeManager::currentNM()->mkNode(kind::STRING_LENGTH, s); |
937 |
256 |
Node b1 = SkolemCache::mkIndexVar(t); |
938 |
256 |
Node b1v = NodeManager::currentNM()->mkNode(kind::BOUND_VAR_LIST, b1); |
939 |
|
Node body = NodeManager::currentNM()->mkNode( |
940 |
|
kind::AND, |
941 |
256 |
NodeManager::currentNM()->mkNode(kind::LEQ, zero, b1), |
942 |
384 |
NodeManager::currentNM()->mkNode( |
943 |
|
kind::LEQ, |
944 |
|
b1, |
945 |
256 |
NodeManager::currentNM()->mkNode(kind::MINUS, lenx, lens)), |
946 |
512 |
NodeManager::currentNM()->mkNode( |
947 |
|
kind::EQUAL, |
948 |
256 |
NodeManager::currentNM()->mkNode(kind::STRING_SUBSTR, x, b1, lens), |
949 |
896 |
s)); |
950 |
128 |
retNode = utils::mkForallInternal(b1v, body.negate()).negate(); |
951 |
|
} |
952 |
1746 |
else if (t.getKind() == kind::STRING_LEQ) |
953 |
|
{ |
954 |
|
Node ltp = sc->mkTypedSkolemCached( |
955 |
72 |
nm->booleanType(), t, SkolemCache::SK_PURIFY, "ltp"); |
956 |
72 |
Node k = SkolemCache::mkIndexVar(t); |
957 |
|
|
958 |
72 |
std::vector<Node> conj; |
959 |
36 |
conj.push_back(nm->mkNode(GEQ, k, zero)); |
960 |
72 |
Node substr[2]; |
961 |
72 |
Node code[2]; |
962 |
108 |
for (unsigned r = 0; r < 2; r++) |
963 |
|
{ |
964 |
144 |
Node ta = t[r]; |
965 |
144 |
Node tb = t[1 - r]; |
966 |
72 |
substr[r] = nm->mkNode(STRING_SUBSTR, ta, zero, k); |
967 |
72 |
code[r] = |
968 |
144 |
nm->mkNode(STRING_TO_CODE, nm->mkNode(STRING_SUBSTR, ta, k, one)); |
969 |
72 |
conj.push_back(nm->mkNode(LEQ, k, nm->mkNode(STRING_LENGTH, ta))); |
970 |
|
} |
971 |
36 |
conj.push_back(substr[0].eqNode(substr[1])); |
972 |
72 |
std::vector<Node> ite_ch; |
973 |
36 |
ite_ch.push_back(ltp); |
974 |
108 |
for (unsigned r = 0; r < 2; r++) |
975 |
|
{ |
976 |
72 |
ite_ch.push_back(nm->mkNode(LT, code[r], code[1 - r])); |
977 |
|
} |
978 |
36 |
conj.push_back(nm->mkNode(ITE, ite_ch)); |
979 |
|
|
980 |
72 |
Node conjn = utils::mkForallInternal(nm->mkNode(BOUND_VAR_LIST, k), |
981 |
72 |
nm->mkNode(AND, conj).negate()) |
982 |
72 |
.negate(); |
983 |
|
// Intuitively, the reduction says either x and y are equal, or they have |
984 |
|
// some (maximal) common prefix after which their characters at position k |
985 |
|
// are distinct, and the comparison of their code matches the return value |
986 |
|
// of the overall term. |
987 |
|
// Notice the below reduction relies on the semantics of str.code being -1 |
988 |
|
// for the empty string. In particular, say x = "a" and y = "ab", then the |
989 |
|
// reduction below is satisfied for k = 1, since |
990 |
|
// str.code(substr( "a", 1, 1 )) = str.code( "" ) = -1 < |
991 |
|
// str.code(substr( "ab", 1, 1 )) = str.code( "b" ) = 66 |
992 |
|
|
993 |
|
// assert: |
994 |
|
// IF x=y |
995 |
|
// THEN: ltp |
996 |
|
// ELSE: exists k. |
997 |
|
// k >= 0 AND k <= len( x ) AND k <= len( y ) AND |
998 |
|
// substr( x, 0, k ) = substr( y, 0, k ) AND |
999 |
|
// IF ltp |
1000 |
|
// THEN: str.code(substr( x, k, 1 )) < str.code(substr( y, k, 1 )) |
1001 |
|
// ELSE: str.code(substr( x, k, 1 )) > str.code(substr( y, k, 1 )) |
1002 |
72 |
Node assert = nm->mkNode(ITE, t[0].eqNode(t[1]), ltp, conjn); |
1003 |
36 |
asserts.push_back(assert); |
1004 |
|
|
1005 |
|
// Thus, str.<=( x, y ) = ltp |
1006 |
36 |
retNode = ltp; |
1007 |
|
} |
1008 |
8556 |
return retNode; |
1009 |
|
} |
1010 |
|
|
1011 |
3738 |
Node StringsPreprocess::simplify(Node t, std::vector<Node>& asserts) |
1012 |
|
{ |
1013 |
3738 |
size_t prev_asserts = asserts.size(); |
1014 |
|
// call the static reduce routine |
1015 |
3738 |
Node retNode = reduce(t, asserts, d_sc); |
1016 |
3738 |
if( t!=retNode ){ |
1017 |
2028 |
Trace("strings-preprocess") << "StringsPreprocess::simplify: " << t << " -> " << retNode << std::endl; |
1018 |
2028 |
if (!asserts.empty()) |
1019 |
|
{ |
1020 |
3854 |
Trace("strings-preprocess") |
1021 |
3854 |
<< " ... new nodes (" << (asserts.size() - prev_asserts) |
1022 |
1927 |
<< "):" << std::endl; |
1023 |
3835 |
for (size_t i = prev_asserts; i < asserts.size(); ++i) |
1024 |
|
{ |
1025 |
1908 |
Trace("strings-preprocess") << " " << asserts[i] << std::endl; |
1026 |
|
} |
1027 |
|
} |
1028 |
2028 |
if (d_statReductions != nullptr) |
1029 |
|
{ |
1030 |
1932 |
(*d_statReductions) << t.getKind(); |
1031 |
|
} |
1032 |
|
} |
1033 |
|
else |
1034 |
|
{ |
1035 |
3420 |
Trace("strings-preprocess-debug") |
1036 |
1710 |
<< "Return " << retNode << " unchanged" << std::endl; |
1037 |
|
} |
1038 |
3738 |
return retNode; |
1039 |
|
} |
1040 |
|
|
1041 |
3016 |
Node StringsPreprocess::simplifyRec(Node t, std::vector<Node>& asserts) |
1042 |
|
{ |
1043 |
3016 |
std::map<Node, Node>::iterator it = d_visited.find(t); |
1044 |
3016 |
if (it != d_visited.end()) |
1045 |
|
{ |
1046 |
1202 |
return it->second; |
1047 |
|
}else{ |
1048 |
3628 |
Node retNode = t; |
1049 |
1814 |
if( t.getNumChildren()==0 ){ |
1050 |
448 |
retNode = simplify(t, asserts); |
1051 |
|
} |
1052 |
1366 |
else if (!t.isClosure()) |
1053 |
|
{ |
1054 |
1358 |
bool changed = false; |
1055 |
2716 |
std::vector< Node > cc; |
1056 |
1358 |
if( t.getMetaKind() == kind::metakind::PARAMETERIZED ){ |
1057 |
16 |
cc.push_back( t.getOperator() ); |
1058 |
|
} |
1059 |
4207 |
for(unsigned i=0; i<t.getNumChildren(); i++) { |
1060 |
5698 |
Node s = simplifyRec(t[i], asserts); |
1061 |
2849 |
cc.push_back( s ); |
1062 |
2849 |
if( s!=t[i] ) { |
1063 |
237 |
changed = true; |
1064 |
|
} |
1065 |
|
} |
1066 |
2716 |
Node tmp = t; |
1067 |
1358 |
if( changed ){ |
1068 |
206 |
tmp = NodeManager::currentNM()->mkNode( t.getKind(), cc ); |
1069 |
|
} |
1070 |
1358 |
retNode = simplify(tmp, asserts); |
1071 |
|
} |
1072 |
1814 |
d_visited[t] = retNode; |
1073 |
1814 |
return retNode; |
1074 |
|
} |
1075 |
|
} |
1076 |
|
|
1077 |
101 |
Node StringsPreprocess::processAssertion(Node n, std::vector<Node>& asserts) |
1078 |
|
{ |
1079 |
202 |
std::vector<Node> asserts_curr; |
1080 |
101 |
Node ret = simplifyRec(n, asserts_curr); |
1081 |
233 |
while (!asserts_curr.empty()) |
1082 |
|
{ |
1083 |
132 |
Node curr = asserts_curr.back(); |
1084 |
66 |
asserts_curr.pop_back(); |
1085 |
132 |
std::vector<Node> asserts_tmp; |
1086 |
66 |
curr = simplifyRec(curr, asserts_tmp); |
1087 |
66 |
asserts_curr.insert( |
1088 |
132 |
asserts_curr.end(), asserts_tmp.begin(), asserts_tmp.end()); |
1089 |
66 |
asserts.push_back(curr); |
1090 |
|
} |
1091 |
202 |
return ret; |
1092 |
|
} |
1093 |
|
|
1094 |
|
} // namespace strings |
1095 |
|
} // namespace theory |
1096 |
29340 |
} // namespace cvc5 |