1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Aina Niemetz |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of entailment tests involving strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/strings_entail.h" |
17 |
|
|
18 |
|
#include "expr/node_builder.h" |
19 |
|
#include "theory/rewriter.h" |
20 |
|
#include "theory/strings/arith_entail.h" |
21 |
|
#include "theory/strings/sequences_rewriter.h" |
22 |
|
#include "theory/strings/theory_strings_utils.h" |
23 |
|
#include "theory/strings/word.h" |
24 |
|
#include "util/rational.h" |
25 |
|
#include "util/string.h" |
26 |
|
|
27 |
|
using namespace cvc5::kind; |
28 |
|
|
29 |
|
namespace cvc5 { |
30 |
|
namespace theory { |
31 |
|
namespace strings { |
32 |
|
|
33 |
10027 |
StringsEntail::StringsEntail(SequencesRewriter& rewriter) : d_rewriter(rewriter) |
34 |
|
{ |
35 |
10027 |
} |
36 |
|
|
37 |
2823 |
bool StringsEntail::canConstantContainConcat(Node c, |
38 |
|
Node n, |
39 |
|
int& firstc, |
40 |
|
int& lastc) |
41 |
|
{ |
42 |
2823 |
Assert(c.isConst()); |
43 |
2823 |
Assert(n.getKind() == STRING_CONCAT); |
44 |
|
// must find constant components in order |
45 |
2823 |
size_t pos = 0; |
46 |
2823 |
firstc = -1; |
47 |
2823 |
lastc = -1; |
48 |
10537 |
for (unsigned i = 0; i < n.getNumChildren(); i++) |
49 |
|
{ |
50 |
7842 |
if (n[i].isConst()) |
51 |
|
{ |
52 |
1838 |
firstc = firstc == -1 ? i : firstc; |
53 |
1838 |
lastc = i; |
54 |
1838 |
size_t new_pos = Word::find(c, n[i], pos); |
55 |
1838 |
if (new_pos == std::string::npos) |
56 |
|
{ |
57 |
120 |
return false; |
58 |
|
} |
59 |
|
else |
60 |
|
{ |
61 |
1718 |
pos = new_pos + Word::getLength(n[i]); |
62 |
|
} |
63 |
|
} |
64 |
6004 |
else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0])) |
65 |
|
{ |
66 |
20 |
Assert(c.getType().isString()); // string-only |
67 |
20 |
const std::vector<unsigned>& tvec = c.getConst<String>().getVec(); |
68 |
|
// find the first occurrence of a digit starting at pos |
69 |
116 |
while (pos < tvec.size() && !String::isDigit(tvec[pos])) |
70 |
|
{ |
71 |
48 |
pos++; |
72 |
|
} |
73 |
20 |
if (pos == tvec.size()) |
74 |
|
{ |
75 |
8 |
return false; |
76 |
|
} |
77 |
|
// must consume at least one digit here |
78 |
12 |
pos++; |
79 |
|
} |
80 |
|
} |
81 |
2695 |
return true; |
82 |
|
} |
83 |
|
|
84 |
46088 |
bool StringsEntail::canConstantContainList(Node c, |
85 |
|
std::vector<Node>& l, |
86 |
|
int& firstc, |
87 |
|
int& lastc) |
88 |
|
{ |
89 |
46088 |
Assert(c.isConst()); |
90 |
|
// must find constant components in order |
91 |
46088 |
size_t pos = 0; |
92 |
46088 |
firstc = -1; |
93 |
46088 |
lastc = -1; |
94 |
133878 |
for (unsigned i = 0; i < l.size(); i++) |
95 |
|
{ |
96 |
88284 |
if (l[i].isConst()) |
97 |
|
{ |
98 |
77269 |
firstc = firstc == -1 ? i : firstc; |
99 |
77269 |
lastc = i; |
100 |
77269 |
size_t new_pos = Word::find(c, l[i], pos); |
101 |
77269 |
if (new_pos == std::string::npos) |
102 |
|
{ |
103 |
494 |
return false; |
104 |
|
} |
105 |
|
else |
106 |
|
{ |
107 |
76775 |
pos = new_pos + Word::getLength(l[i]); |
108 |
|
} |
109 |
|
} |
110 |
|
} |
111 |
45594 |
return true; |
112 |
|
} |
113 |
|
|
114 |
78990 |
bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1, |
115 |
|
std::vector<Node>& nr, |
116 |
|
int dir, |
117 |
|
Node& curr, |
118 |
|
bool strict) |
119 |
|
{ |
120 |
78990 |
Assert(dir == 1 || dir == -1); |
121 |
78990 |
Assert(nr.empty()); |
122 |
78990 |
NodeManager* nm = NodeManager::currentNM(); |
123 |
157980 |
Node zero = nm->mkConst(cvc5::Rational(0)); |
124 |
78990 |
bool ret = false; |
125 |
78990 |
bool success = true; |
126 |
78990 |
unsigned sindex = 0; |
127 |
211816 |
while (success && curr != zero && sindex < n1.size()) |
128 |
|
{ |
129 |
66413 |
Assert(!curr.isNull()); |
130 |
66413 |
success = false; |
131 |
66413 |
unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); |
132 |
66413 |
if (n1[sindex_use].isConst()) |
133 |
|
{ |
134 |
|
// could strip part of a constant |
135 |
25028 |
Node lowerBound = ArithEntail::getConstantBound(Rewriter::rewrite(curr)); |
136 |
12514 |
if (!lowerBound.isNull()) |
137 |
|
{ |
138 |
3942 |
Assert(lowerBound.isConst()); |
139 |
7884 |
Rational lbr = lowerBound.getConst<Rational>(); |
140 |
3942 |
if (lbr.sgn() > 0) |
141 |
|
{ |
142 |
2319 |
Assert(ArithEntail::check(curr, true)); |
143 |
4638 |
Node s = n1[sindex_use]; |
144 |
2319 |
size_t slen = Word::getLength(s); |
145 |
4638 |
Node ncl = nm->mkConst(cvc5::Rational(slen)); |
146 |
4638 |
Node next_s = nm->mkNode(MINUS, lowerBound, ncl); |
147 |
2319 |
next_s = Rewriter::rewrite(next_s); |
148 |
2319 |
Assert(next_s.isConst()); |
149 |
|
// we can remove the entire constant |
150 |
2319 |
if (next_s.getConst<Rational>().sgn() >= 0) |
151 |
|
{ |
152 |
1037 |
curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl)); |
153 |
1037 |
success = true; |
154 |
1037 |
sindex++; |
155 |
|
} |
156 |
|
else |
157 |
|
{ |
158 |
|
// we can remove part of the constant |
159 |
|
// lower bound minus the length of a concrete string is negative, |
160 |
|
// hence lowerBound cannot be larger than long max |
161 |
1282 |
Assert(lbr < Rational(String::maxSize())); |
162 |
1282 |
curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound)); |
163 |
1282 |
uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); |
164 |
1282 |
Assert(lbsize < slen); |
165 |
1282 |
if (dir == 1) |
166 |
|
{ |
167 |
|
// strip partially from the front |
168 |
1128 |
nr.push_back(Word::prefix(s, lbsize)); |
169 |
1128 |
n1[sindex_use] = Word::suffix(s, slen - lbsize); |
170 |
|
} |
171 |
|
else |
172 |
|
{ |
173 |
|
// strip partially from the back |
174 |
154 |
nr.push_back(Word::suffix(s, lbsize)); |
175 |
154 |
n1[sindex_use] = Word::prefix(s, slen - lbsize); |
176 |
|
} |
177 |
1282 |
ret = true; |
178 |
|
} |
179 |
2319 |
Assert(ArithEntail::check(curr)); |
180 |
|
} |
181 |
|
else |
182 |
|
{ |
183 |
|
// we cannot remove the constant |
184 |
|
} |
185 |
|
} |
186 |
|
} |
187 |
|
else |
188 |
|
{ |
189 |
|
Node next_s = NodeManager::currentNM()->mkNode( |
190 |
|
MINUS, |
191 |
|
curr, |
192 |
107798 |
NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use])); |
193 |
53899 |
next_s = Rewriter::rewrite(next_s); |
194 |
53899 |
if (ArithEntail::check(next_s)) |
195 |
|
{ |
196 |
362 |
success = true; |
197 |
362 |
curr = next_s; |
198 |
362 |
sindex++; |
199 |
|
} |
200 |
|
} |
201 |
|
} |
202 |
78990 |
if (sindex > 0 && (!strict || curr == zero)) |
203 |
|
{ |
204 |
1299 |
if (dir == 1) |
205 |
|
{ |
206 |
1205 |
nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex); |
207 |
1205 |
n1.erase(n1.begin(), n1.begin() + sindex); |
208 |
|
} |
209 |
|
else |
210 |
|
{ |
211 |
94 |
nr.insert(nr.end(), n1.end() - sindex, n1.end()); |
212 |
94 |
n1.erase(n1.end() - sindex, n1.end()); |
213 |
|
} |
214 |
1299 |
ret = true; |
215 |
|
} |
216 |
157980 |
return ret; |
217 |
|
} |
218 |
|
|
219 |
210455 |
int StringsEntail::componentContains(std::vector<Node>& n1, |
220 |
|
std::vector<Node>& n2, |
221 |
|
std::vector<Node>& nb, |
222 |
|
std::vector<Node>& ne, |
223 |
|
bool computeRemainder, |
224 |
|
int remainderDir) |
225 |
|
{ |
226 |
210455 |
Assert(nb.empty()); |
227 |
210455 |
Assert(ne.empty()); |
228 |
210455 |
Trace("strings-entail") << "Component contains: " << std::endl; |
229 |
210455 |
Trace("strings-entail") << "n1 = " << n1 << std::endl; |
230 |
210455 |
Trace("strings-entail") << "n2 = " << n2 << std::endl; |
231 |
|
// if n2 is a singleton, we can do optimized version here |
232 |
210455 |
if (n2.size() == 1) |
233 |
|
{ |
234 |
348820 |
for (unsigned i = 0; i < n1.size(); i++) |
235 |
|
{ |
236 |
399284 |
Node n1rb; |
237 |
399284 |
Node n1re; |
238 |
201512 |
if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder)) |
239 |
|
{ |
240 |
3740 |
if (computeRemainder) |
241 |
|
{ |
242 |
266 |
n1[i] = n2[0]; |
243 |
266 |
if (remainderDir != -1) |
244 |
|
{ |
245 |
266 |
if (!n1re.isNull()) |
246 |
|
{ |
247 |
41 |
ne.push_back(n1re); |
248 |
|
} |
249 |
266 |
ne.insert(ne.end(), n1.begin() + i + 1, n1.end()); |
250 |
266 |
n1.erase(n1.begin() + i + 1, n1.end()); |
251 |
|
} |
252 |
|
else if (!n1re.isNull()) |
253 |
|
{ |
254 |
|
n1[i] = Rewriter::rewrite( |
255 |
|
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re)); |
256 |
|
} |
257 |
266 |
if (remainderDir != 1) |
258 |
|
{ |
259 |
|
nb.insert(nb.end(), n1.begin(), n1.begin() + i); |
260 |
|
n1.erase(n1.begin(), n1.begin() + i); |
261 |
|
if (!n1rb.isNull()) |
262 |
|
{ |
263 |
|
nb.push_back(n1rb); |
264 |
|
} |
265 |
|
} |
266 |
266 |
else if (!n1rb.isNull()) |
267 |
|
{ |
268 |
90 |
n1[i] = Rewriter::rewrite( |
269 |
180 |
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i])); |
270 |
|
} |
271 |
|
} |
272 |
3740 |
return i; |
273 |
|
} |
274 |
|
} |
275 |
|
} |
276 |
59407 |
else if (n1.size() >= n2.size()) |
277 |
|
{ |
278 |
17004 |
unsigned diff = n1.size() - n2.size(); |
279 |
44033 |
for (unsigned i = 0; i <= diff; i++) |
280 |
|
{ |
281 |
54503 |
Node n1rb_first; |
282 |
54503 |
Node n1re_first; |
283 |
|
// first component of n2 must be a suffix |
284 |
54948 |
if (componentContainsBase(n1[i], |
285 |
27474 |
n2[0], |
286 |
|
n1rb_first, |
287 |
|
n1re_first, |
288 |
|
1, |
289 |
27474 |
computeRemainder && remainderDir != 1)) |
290 |
|
{ |
291 |
6880 |
Assert(n1re_first.isNull()); |
292 |
10893 |
for (unsigned j = 1; j < n2.size(); j++) |
293 |
|
{ |
294 |
|
// are we in the last component? |
295 |
10893 |
if (j + 1 == n2.size()) |
296 |
|
{ |
297 |
1927 |
Node n1rb_last; |
298 |
1927 |
Node n1re_last; |
299 |
|
// last component of n2 must be a prefix |
300 |
3854 |
if (componentContainsBase(n1[i + j], |
301 |
1927 |
n2[j], |
302 |
|
n1rb_last, |
303 |
|
n1re_last, |
304 |
|
-1, |
305 |
1927 |
computeRemainder && remainderDir != -1)) |
306 |
|
{ |
307 |
890 |
Trace("strings-entail-debug") |
308 |
445 |
<< "Last remainder begin is " << n1rb_last << std::endl; |
309 |
890 |
Trace("strings-entail-debug") |
310 |
445 |
<< "Last remainder end is " << n1re_last << std::endl; |
311 |
445 |
Assert(n1rb_last.isNull()); |
312 |
445 |
if (computeRemainder) |
313 |
|
{ |
314 |
12 |
if (remainderDir != -1) |
315 |
|
{ |
316 |
12 |
if (!n1re_last.isNull()) |
317 |
|
{ |
318 |
2 |
ne.push_back(n1re_last); |
319 |
|
} |
320 |
12 |
ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end()); |
321 |
12 |
n1.erase(n1.begin() + i + j + 1, n1.end()); |
322 |
12 |
n1[i + j] = n2[j]; |
323 |
|
} |
324 |
12 |
if (remainderDir != 1) |
325 |
|
{ |
326 |
|
n1[i] = n2[0]; |
327 |
|
nb.insert(nb.end(), n1.begin(), n1.begin() + i); |
328 |
|
n1.erase(n1.begin(), n1.begin() + i); |
329 |
|
if (!n1rb_first.isNull()) |
330 |
|
{ |
331 |
|
nb.push_back(n1rb_first); |
332 |
|
} |
333 |
|
} |
334 |
|
} |
335 |
445 |
Trace("strings-entail-debug") << "ne = " << ne << std::endl; |
336 |
445 |
Trace("strings-entail-debug") << "nb = " << nb << std::endl; |
337 |
445 |
Trace("strings-entail-debug") << "...return " << i << std::endl; |
338 |
445 |
return i; |
339 |
|
} |
340 |
|
else |
341 |
|
{ |
342 |
1482 |
break; |
343 |
|
} |
344 |
|
} |
345 |
8966 |
else if (n1[i + j] != n2[j]) |
346 |
|
{ |
347 |
4953 |
break; |
348 |
|
} |
349 |
|
} |
350 |
|
} |
351 |
|
} |
352 |
|
} |
353 |
206270 |
return -1; |
354 |
|
} |
355 |
|
|
356 |
230913 |
bool StringsEntail::componentContainsBase( |
357 |
|
Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder) |
358 |
|
{ |
359 |
230913 |
Assert(n1rb.isNull()); |
360 |
230913 |
Assert(n1re.isNull()); |
361 |
|
|
362 |
230913 |
NodeManager* nm = NodeManager::currentNM(); |
363 |
|
|
364 |
230913 |
if (n1 == n2) |
365 |
|
{ |
366 |
10095 |
return true; |
367 |
|
} |
368 |
|
else |
369 |
|
{ |
370 |
220818 |
if (n1.isConst() && n2.isConst()) |
371 |
|
{ |
372 |
3457 |
size_t len1 = Word::getLength(n1); |
373 |
3457 |
size_t len2 = Word::getLength(n2); |
374 |
3457 |
if (len2 < len1) |
375 |
|
{ |
376 |
586 |
if (dir == 1) |
377 |
|
{ |
378 |
358 |
if (Word::suffix(n1, len2) == n2) |
379 |
|
{ |
380 |
64 |
if (computeRemainder) |
381 |
|
{ |
382 |
|
n1rb = Word::prefix(n1, len1 - len2); |
383 |
|
} |
384 |
64 |
return true; |
385 |
|
} |
386 |
|
} |
387 |
228 |
else if (dir == -1) |
388 |
|
{ |
389 |
4 |
if (Word::prefix(n1, len2) == n2) |
390 |
|
{ |
391 |
4 |
if (computeRemainder) |
392 |
|
{ |
393 |
|
n1re = Word::suffix(n1, len1 - len2); |
394 |
|
} |
395 |
4 |
return true; |
396 |
|
} |
397 |
|
} |
398 |
|
else |
399 |
|
{ |
400 |
224 |
size_t f = Word::find(n1, n2); |
401 |
224 |
if (f != std::string::npos) |
402 |
|
{ |
403 |
220 |
if (computeRemainder) |
404 |
|
{ |
405 |
117 |
if (f > 0) |
406 |
|
{ |
407 |
86 |
n1rb = Word::prefix(n1, f); |
408 |
|
} |
409 |
117 |
if (len1 > f + len2) |
410 |
|
{ |
411 |
37 |
n1re = Word::suffix(n1, len1 - (f + len2)); |
412 |
|
} |
413 |
|
} |
414 |
220 |
return true; |
415 |
|
} |
416 |
|
} |
417 |
|
} |
418 |
|
} |
419 |
|
else |
420 |
|
{ |
421 |
|
// cases for: |
422 |
|
// n1 = x containing n2 = substr( x, n2[1], n2[2] ) |
423 |
217361 |
if (n2.getKind() == STRING_SUBSTR) |
424 |
|
{ |
425 |
33584 |
if (n2[0] == n1) |
426 |
|
{ |
427 |
464 |
bool success = true; |
428 |
464 |
Node start_pos = n2[1]; |
429 |
464 |
Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]); |
430 |
464 |
Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]); |
431 |
464 |
if (dir == 1) |
432 |
|
{ |
433 |
|
// To be a suffix, start + length must be greater than |
434 |
|
// or equal to the length of the string. |
435 |
4 |
success = ArithEntail::check(end_pos, len_n2s); |
436 |
|
} |
437 |
460 |
else if (dir == -1) |
438 |
|
{ |
439 |
|
// To be a prefix, must literally start at 0, since |
440 |
|
// if we knew it started at <0, it should be rewritten to "", |
441 |
|
// if we knew it started at 0, then n2[1] should be rewritten to |
442 |
|
// 0. |
443 |
24 |
success = start_pos.isConst() |
444 |
12 |
&& start_pos.getConst<Rational>().sgn() == 0; |
445 |
|
} |
446 |
464 |
if (success) |
447 |
|
{ |
448 |
464 |
if (computeRemainder) |
449 |
|
{ |
450 |
|
// we can only compute the remainder if start_pos and end_pos |
451 |
|
// are known to be non-negative. |
452 |
54 |
if (!ArithEntail::check(start_pos) |
453 |
54 |
|| !ArithEntail::check(end_pos)) |
454 |
|
{ |
455 |
12 |
return false; |
456 |
|
} |
457 |
6 |
if (dir != -1) |
458 |
|
{ |
459 |
12 |
n1rb = nm->mkNode( |
460 |
8 |
STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos); |
461 |
|
} |
462 |
6 |
if (dir != 1) |
463 |
|
{ |
464 |
6 |
n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s); |
465 |
|
} |
466 |
|
} |
467 |
452 |
return true; |
468 |
|
} |
469 |
|
} |
470 |
|
} |
471 |
|
|
472 |
216897 |
if (!computeRemainder && dir == 0) |
473 |
|
{ |
474 |
195591 |
if (n1.getKind() == STRING_REPLACE) |
475 |
|
{ |
476 |
|
// (str.contains (str.replace x y z) w) ---> true |
477 |
|
// if (str.contains x w) --> true and (str.contains z w) ---> true |
478 |
4118 |
Node xCtnW = checkContains(n1[0], n2); |
479 |
2174 |
if (!xCtnW.isNull() && xCtnW.getConst<bool>()) |
480 |
|
{ |
481 |
832 |
Node zCtnW = checkContains(n1[2], n2); |
482 |
531 |
if (!zCtnW.isNull() && zCtnW.getConst<bool>()) |
483 |
|
{ |
484 |
230 |
return true; |
485 |
|
} |
486 |
|
} |
487 |
|
} |
488 |
|
} |
489 |
|
} |
490 |
|
} |
491 |
219836 |
return false; |
492 |
|
} |
493 |
|
|
494 |
210479 |
bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, |
495 |
|
std::vector<Node>& n2, |
496 |
|
std::vector<Node>& nb, |
497 |
|
std::vector<Node>& ne, |
498 |
|
int dir) |
499 |
|
{ |
500 |
210479 |
Assert(nb.empty()); |
501 |
210479 |
Assert(ne.empty()); |
502 |
210479 |
bool changed = false; |
503 |
|
// for ( forwards, backwards ) direction |
504 |
566447 |
for (unsigned r = 0; r < 2; r++) |
505 |
|
{ |
506 |
388475 |
if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1)) |
507 |
|
{ |
508 |
384481 |
unsigned index0 = r == 0 ? 0 : n1.size() - 1; |
509 |
384481 |
unsigned index1 = r == 0 ? 0 : n2.size() - 1; |
510 |
384481 |
bool removeComponent = false; |
511 |
736455 |
Node n1cmp = n1[index0]; |
512 |
|
|
513 |
384481 |
if (n1cmp.isConst() && Word::isEmpty(n1cmp)) |
514 |
|
{ |
515 |
32458 |
return false; |
516 |
|
} |
517 |
|
|
518 |
703997 |
std::vector<Node> sss; |
519 |
703997 |
std::vector<Node> sls; |
520 |
352023 |
n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls); |
521 |
704046 |
Trace("strings-rewrite-debug2") |
522 |
352023 |
<< "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1] |
523 |
352023 |
<< ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl; |
524 |
352023 |
if (n1cmp.isConst()) |
525 |
|
{ |
526 |
180086 |
Node s = n1cmp; |
527 |
90043 |
size_t slen = Word::getLength(s); |
528 |
|
// overlap is an overapproximation of the number of characters |
529 |
|
// n2[index1] can match in s |
530 |
90043 |
unsigned overlap = Word::getLength(s); |
531 |
90043 |
if (n2[index1].isConst()) |
532 |
|
{ |
533 |
26642 |
Node t = n2[index1]; |
534 |
13321 |
std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t); |
535 |
13321 |
if (ret == std::string::npos) |
536 |
|
{ |
537 |
3362 |
if (n1.size() == 1) |
538 |
|
{ |
539 |
|
// can remove everything |
540 |
|
// e.g. str.contains( "abc", str.++( "ba", x ) ) --> |
541 |
|
// str.contains( "", str.++( "ba", x ) ) |
542 |
|
// or std.contains( str.substr( "abc", x, y ), "d" ) ---> |
543 |
|
// str.contains( "", "d" ) |
544 |
36 |
removeComponent = true; |
545 |
|
} |
546 |
3326 |
else if (sss.empty()) // only if not substr |
547 |
|
{ |
548 |
|
// check how much overlap there is |
549 |
|
// This is used to partially strip off the endpoint |
550 |
|
// e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) --> |
551 |
|
// str.contains( str.++( "c", x ), str.++( "cd", y ) ) |
552 |
3281 |
overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s); |
553 |
|
} |
554 |
|
// note that we cannot process substring here, since t may |
555 |
|
// match only part of s. Consider: |
556 |
|
// (str.++ "C" (str.substr "AB" x y)), "CB" |
557 |
|
// where "AB" and "CB" have no overlap, but "C" is not part of what |
558 |
|
// is matched with "AB". |
559 |
|
} |
560 |
9959 |
else if (sss.empty()) // only if not substr |
561 |
|
{ |
562 |
6749 |
Assert(ret < slen); |
563 |
|
// can strip off up to the find position, e.g. |
564 |
|
// str.contains( str.++( "abc", x ), str.++( "b", y ) ) --> |
565 |
|
// str.contains( str.++( "bc", x ), str.++( "b", y ) ), |
566 |
|
// and |
567 |
|
// str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) --> |
568 |
|
// str.contains( str.++( x, "abb" ), str.++( y, "b" ) ) |
569 |
6749 |
overlap = slen - ret; |
570 |
|
} |
571 |
|
} |
572 |
|
else |
573 |
|
{ |
574 |
|
// inconclusive |
575 |
|
} |
576 |
90043 |
Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl; |
577 |
|
// process the overlap |
578 |
90043 |
if (overlap < slen) |
579 |
|
{ |
580 |
298 |
changed = true; |
581 |
298 |
if (overlap == 0) |
582 |
|
{ |
583 |
173 |
removeComponent = true; |
584 |
|
} |
585 |
|
else |
586 |
|
{ |
587 |
|
// can drop the prefix (resp. suffix) from the first (resp. last) |
588 |
|
// component |
589 |
125 |
if (r == 0) |
590 |
|
{ |
591 |
82 |
nb.push_back(Word::prefix(s, slen - overlap)); |
592 |
82 |
n1[index0] = Word::suffix(s, overlap); |
593 |
|
} |
594 |
|
else |
595 |
|
{ |
596 |
43 |
ne.push_back(Word::suffix(s, slen - overlap)); |
597 |
43 |
n1[index0] = Word::prefix(s, overlap); |
598 |
|
} |
599 |
|
} |
600 |
|
} |
601 |
|
} |
602 |
261980 |
else if (n1cmp.getKind() == STRING_ITOS) |
603 |
|
{ |
604 |
437 |
if (n2[index1].isConst()) |
605 |
|
{ |
606 |
41 |
Assert(n2[index1].getType().isString()); // string-only |
607 |
82 |
cvc5::String t = n2[index1].getConst<String>(); |
608 |
41 |
if (n1.size() == 1) |
609 |
|
{ |
610 |
|
// if n1.size()==1, then if n2[index1] is not a number, we can drop |
611 |
|
// the entire component |
612 |
|
// e.g. str.contains( int.to.str(x), "123a45") --> false |
613 |
21 |
if (!t.isNumber()) |
614 |
|
{ |
615 |
13 |
removeComponent = true; |
616 |
|
} |
617 |
|
} |
618 |
|
else |
619 |
|
{ |
620 |
20 |
const std::vector<unsigned>& tvec = t.getVec(); |
621 |
20 |
Assert(tvec.size() > 0); |
622 |
|
|
623 |
|
// if n1.size()>1, then if the first (resp. last) character of |
624 |
|
// n2[index1] |
625 |
|
// is not a digit, we can drop the entire component, e.g.: |
626 |
|
// str.contains( str.++( int.to.str(x), y ), "a12") --> |
627 |
|
// str.contains( y, "a12" ) |
628 |
|
// str.contains( str.++( y, int.to.str(x) ), "a0b") --> |
629 |
|
// str.contains( y, "a0b" ) |
630 |
20 |
unsigned i = r == 0 ? 0 : (tvec.size() - 1); |
631 |
20 |
if (!String::isDigit(tvec[i])) |
632 |
|
{ |
633 |
16 |
removeComponent = true; |
634 |
|
} |
635 |
|
} |
636 |
|
} |
637 |
|
} |
638 |
352023 |
if (removeComponent) |
639 |
|
{ |
640 |
238 |
Trace("strings-rewrite-debug2") << "...remove component" << std::endl; |
641 |
|
// can drop entire first (resp. last) component |
642 |
238 |
if (r == 0) |
643 |
|
{ |
644 |
158 |
nb.push_back(n1[index0]); |
645 |
158 |
n1.erase(n1.begin(), n1.begin() + 1); |
646 |
|
} |
647 |
|
else |
648 |
|
{ |
649 |
80 |
ne.push_back(n1[index0]); |
650 |
80 |
n1.pop_back(); |
651 |
|
} |
652 |
238 |
if (n1.empty()) |
653 |
|
{ |
654 |
|
// if we've removed everything, just return (we will rewrite to false) |
655 |
49 |
return true; |
656 |
|
} |
657 |
|
else |
658 |
|
{ |
659 |
189 |
changed = true; |
660 |
|
} |
661 |
|
} |
662 |
|
} |
663 |
|
} |
664 |
|
// TODO (#1180) : computing the maximal overlap in this function may be |
665 |
|
// important. |
666 |
|
// str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) ---> |
667 |
|
// false |
668 |
|
// ...since str.to.int(x) can contain at most 1 character from "2aaaa", |
669 |
|
// leaving 4 characters |
670 |
|
// which is larger that the upper bound for length of str.substr(y,0,3), |
671 |
|
// which is 3. |
672 |
177972 |
return changed; |
673 |
|
} |
674 |
|
|
675 |
239451 |
Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter) |
676 |
|
{ |
677 |
239451 |
NodeManager* nm = NodeManager::currentNM(); |
678 |
478902 |
Node ctn = nm->mkNode(STRING_CONTAINS, a, b); |
679 |
|
|
680 |
239451 |
if (fullRewriter) |
681 |
|
{ |
682 |
13166 |
ctn = Rewriter::rewrite(ctn); |
683 |
|
} |
684 |
|
else |
685 |
|
{ |
686 |
452570 |
Node prev; |
687 |
255 |
do |
688 |
|
{ |
689 |
226540 |
prev = ctn; |
690 |
226540 |
ctn = d_rewriter.rewriteContains(ctn); |
691 |
226540 |
} while (prev != ctn && ctn.getKind() == STRING_CONTAINS); |
692 |
|
} |
693 |
|
|
694 |
239451 |
Assert(ctn.getType().isBoolean()); |
695 |
478902 |
return ctn.isConst() ? ctn : Node::null(); |
696 |
|
} |
697 |
|
|
698 |
26384 |
bool StringsEntail::checkNonEmpty(Node a) |
699 |
|
{ |
700 |
52768 |
Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a); |
701 |
26384 |
len = Rewriter::rewrite(len); |
702 |
52768 |
return ArithEntail::check(len, true); |
703 |
|
} |
704 |
|
|
705 |
48877 |
bool StringsEntail::checkLengthOne(Node s, bool strict) |
706 |
|
{ |
707 |
48877 |
NodeManager* nm = NodeManager::currentNM(); |
708 |
97754 |
Node one = nm->mkConst(Rational(1)); |
709 |
97754 |
Node len = nm->mkNode(STRING_LENGTH, s); |
710 |
48877 |
len = Rewriter::rewrite(len); |
711 |
195508 |
return ArithEntail::check(one, len) |
712 |
244385 |
&& (!strict || ArithEntail::check(len, true)); |
713 |
|
} |
714 |
|
|
715 |
205506 |
bool StringsEntail::checkMultisetSubset(Node a, Node b) |
716 |
|
{ |
717 |
411012 |
std::vector<Node> avec; |
718 |
205506 |
utils::getConcat(getMultisetApproximation(a), avec); |
719 |
411012 |
std::vector<Node> bvec; |
720 |
205506 |
utils::getConcat(b, bvec); |
721 |
|
|
722 |
411012 |
std::map<Node, unsigned> num_nconst[2]; |
723 |
411012 |
std::map<Node, unsigned> num_const[2]; |
724 |
616518 |
for (unsigned j = 0; j < 2; j++) |
725 |
|
{ |
726 |
411012 |
std::vector<Node>& jvec = j == 0 ? avec : bvec; |
727 |
1046536 |
for (const Node& cc : jvec) |
728 |
|
{ |
729 |
635524 |
if (cc.isConst()) |
730 |
|
{ |
731 |
171211 |
num_const[j][cc]++; |
732 |
|
} |
733 |
|
else |
734 |
|
{ |
735 |
464313 |
num_nconst[j][cc]++; |
736 |
|
} |
737 |
|
} |
738 |
|
} |
739 |
205506 |
bool ms_success = true; |
740 |
219416 |
for (std::pair<const Node, unsigned>& nncp : num_nconst[0]) |
741 |
|
{ |
742 |
150258 |
if (nncp.second > num_nconst[1][nncp.first]) |
743 |
|
{ |
744 |
136348 |
ms_success = false; |
745 |
136348 |
break; |
746 |
|
} |
747 |
|
} |
748 |
205506 |
if (ms_success) |
749 |
|
{ |
750 |
|
// count the number of constant characters in the first argument |
751 |
207474 |
std::map<Node, unsigned> count_const[2]; |
752 |
138268 |
std::vector<Node> chars; |
753 |
207474 |
for (unsigned j = 0; j < 2; j++) |
754 |
|
{ |
755 |
210191 |
for (std::pair<const Node, unsigned>& ncp : num_const[j]) |
756 |
|
{ |
757 |
143750 |
Node cn = ncp.first; |
758 |
71875 |
Assert(cn.isConst()); |
759 |
143750 |
std::vector<Node> cnChars = Word::getChars(cn); |
760 |
218166 |
for (const Node& ch : cnChars) |
761 |
|
{ |
762 |
146291 |
count_const[j][ch] += ncp.second; |
763 |
146291 |
if (std::find(chars.begin(), chars.end(), ch) == chars.end()) |
764 |
|
{ |
765 |
96625 |
chars.push_back(ch); |
766 |
|
} |
767 |
|
} |
768 |
|
} |
769 |
|
} |
770 |
138316 |
Trace("strings-entail-ms-ss") |
771 |
69158 |
<< "For " << a << " and " << b << " : " << std::endl; |
772 |
165715 |
for (const Node& ch : chars) |
773 |
|
{ |
774 |
96605 |
Trace("strings-entail-ms-ss") << " # occurrences of substring "; |
775 |
96605 |
Trace("strings-entail-ms-ss") << ch << " in arguments is "; |
776 |
193210 |
Trace("strings-entail-ms-ss") |
777 |
96605 |
<< count_const[0][ch] << " / " << count_const[1][ch] << std::endl; |
778 |
96605 |
if (count_const[0][ch] < count_const[1][ch]) |
779 |
|
{ |
780 |
48 |
return true; |
781 |
|
} |
782 |
|
} |
783 |
|
|
784 |
|
// TODO (#1180): count the number of 2,3,4,.. character substrings |
785 |
|
// for example: |
786 |
|
// str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false |
787 |
|
// since the second argument contains more occurrences of "bb". |
788 |
|
// note this is orthogonal reasoning to inductive reasoning |
789 |
|
// via regular membership reduction in Liang et al CAV 2015. |
790 |
|
} |
791 |
205458 |
return false; |
792 |
|
} |
793 |
|
|
794 |
177950 |
Node StringsEntail::checkHomogeneousString(Node a) |
795 |
|
{ |
796 |
355900 |
std::vector<Node> avec; |
797 |
177950 |
utils::getConcat(getMultisetApproximation(a), avec); |
798 |
|
|
799 |
177950 |
bool cValid = false; |
800 |
355900 |
Node c; |
801 |
269130 |
for (const Node& ac : avec) |
802 |
|
{ |
803 |
183923 |
if (ac.isConst()) |
804 |
|
{ |
805 |
199601 |
std::vector<Node> acv = Word::getChars(ac); |
806 |
161652 |
for (const Node& cc : acv) |
807 |
|
{ |
808 |
70472 |
if (!cValid) |
809 |
|
{ |
810 |
48271 |
cValid = true; |
811 |
48271 |
c = cc; |
812 |
|
} |
813 |
22201 |
else if (c != cc) |
814 |
|
{ |
815 |
|
// Found a different character |
816 |
17241 |
return Node::null(); |
817 |
|
} |
818 |
|
} |
819 |
|
} |
820 |
|
else |
821 |
|
{ |
822 |
|
// Could produce a different character |
823 |
75502 |
return Node::null(); |
824 |
|
} |
825 |
|
} |
826 |
|
|
827 |
85207 |
if (!cValid) |
828 |
|
{ |
829 |
58929 |
return Word::mkEmptyWord(a.getType()); |
830 |
|
} |
831 |
|
|
832 |
26278 |
return c; |
833 |
|
} |
834 |
|
|
835 |
580290 |
Node StringsEntail::getMultisetApproximation(Node a) |
836 |
|
{ |
837 |
580290 |
NodeManager* nm = NodeManager::currentNM(); |
838 |
580290 |
if (a.getKind() == STRING_SUBSTR) |
839 |
|
{ |
840 |
77704 |
return a[0]; |
841 |
|
} |
842 |
502586 |
else if (a.getKind() == STRING_REPLACE) |
843 |
|
{ |
844 |
8197 |
return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2])); |
845 |
|
} |
846 |
494389 |
else if (a.getKind() == STRING_CONCAT) |
847 |
|
{ |
848 |
136202 |
NodeBuilder nb(STRING_CONCAT); |
849 |
256738 |
for (const Node& ac : a) |
850 |
|
{ |
851 |
188637 |
nb << getMultisetApproximation(ac); |
852 |
|
} |
853 |
68101 |
return nb.constructNode(); |
854 |
|
} |
855 |
|
else |
856 |
|
{ |
857 |
426288 |
return a; |
858 |
|
} |
859 |
|
} |
860 |
|
|
861 |
141074 |
Node StringsEntail::getStringOrEmpty(Node n) |
862 |
|
{ |
863 |
141074 |
Node res; |
864 |
423262 |
while (res.isNull()) |
865 |
|
{ |
866 |
141094 |
switch (n.getKind()) |
867 |
|
{ |
868 |
622 |
case STRING_REPLACE: |
869 |
|
{ |
870 |
622 |
if (Word::isEmpty(n[0])) |
871 |
|
{ |
872 |
|
// (str.replace "" x y) --> y |
873 |
20 |
n = n[2]; |
874 |
20 |
break; |
875 |
|
} |
876 |
|
|
877 |
602 |
if (checkLengthOne(n[0]) && Word::isEmpty(n[2])) |
878 |
|
{ |
879 |
|
// (str.replace "A" x "") --> "A" |
880 |
36 |
res = n[0]; |
881 |
36 |
break; |
882 |
|
} |
883 |
|
|
884 |
566 |
res = n; |
885 |
566 |
break; |
886 |
|
} |
887 |
7985 |
case STRING_SUBSTR: |
888 |
|
{ |
889 |
7985 |
if (checkLengthOne(n[0])) |
890 |
|
{ |
891 |
|
// (str.substr "A" x y) --> "A" |
892 |
563 |
res = n[0]; |
893 |
563 |
break; |
894 |
|
} |
895 |
7422 |
res = n; |
896 |
7422 |
break; |
897 |
|
} |
898 |
132487 |
default: |
899 |
|
{ |
900 |
132487 |
res = n; |
901 |
132487 |
break; |
902 |
|
} |
903 |
|
} |
904 |
|
} |
905 |
141074 |
return res; |
906 |
|
} |
907 |
|
|
908 |
2265 |
Node StringsEntail::inferEqsFromContains(Node x, Node y) |
909 |
|
{ |
910 |
2265 |
NodeManager* nm = NodeManager::currentNM(); |
911 |
4530 |
Node emp = Word::mkEmptyWord(x.getType()); |
912 |
2265 |
Assert(x.getType() == y.getType()); |
913 |
4530 |
TypeNode stype = x.getType(); |
914 |
|
|
915 |
4530 |
Node xLen = nm->mkNode(STRING_LENGTH, x); |
916 |
4530 |
std::vector<Node> yLens; |
917 |
2265 |
if (y.getKind() != STRING_CONCAT) |
918 |
|
{ |
919 |
92 |
yLens.push_back(nm->mkNode(STRING_LENGTH, y)); |
920 |
|
} |
921 |
|
else |
922 |
|
{ |
923 |
7946 |
for (const Node& yi : y) |
924 |
|
{ |
925 |
5773 |
yLens.push_back(nm->mkNode(STRING_LENGTH, yi)); |
926 |
|
} |
927 |
|
} |
928 |
|
|
929 |
4530 |
std::vector<Node> zeroLens; |
930 |
2265 |
if (x == emp) |
931 |
|
{ |
932 |
|
// If x is the empty string, then all ys must be empty, too, and we can |
933 |
|
// skip the expensive checks. Note that this is just a performance |
934 |
|
// optimization. |
935 |
426 |
zeroLens.swap(yLens); |
936 |
|
} |
937 |
|
else |
938 |
|
{ |
939 |
|
// Check if we can infer that str.len(x) <= str.len(y). If that is the |
940 |
|
// case, try to minimize the sum in str.len(x) <= str.len(y1) + ... + |
941 |
|
// str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality |
942 |
|
// true. The terms that can have length zero without making the inequality |
943 |
|
// false must be all be empty if (str.contains x y) is true. |
944 |
1839 |
if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens)) |
945 |
|
{ |
946 |
|
// We could not prove that the inequality holds |
947 |
532 |
return Node::null(); |
948 |
|
} |
949 |
1307 |
else if (yLens.size() == y.getNumChildren()) |
950 |
|
{ |
951 |
|
// We could only prove that the inequality holds but not that any of the |
952 |
|
// ys must be empty |
953 |
563 |
return nm->mkNode(EQUAL, x, y); |
954 |
|
} |
955 |
|
} |
956 |
|
|
957 |
1170 |
if (y.getKind() != STRING_CONCAT) |
958 |
|
{ |
959 |
8 |
if (zeroLens.size() == 1) |
960 |
|
{ |
961 |
|
// y is not a concatenation and we found that it must be empty, so just |
962 |
|
// return (= y "") |
963 |
|
Assert(zeroLens[0][0] == y); |
964 |
|
return nm->mkNode(EQUAL, y, emp); |
965 |
|
} |
966 |
|
else |
967 |
|
{ |
968 |
8 |
Assert(yLens.size() == 1 && yLens[0][0] == y); |
969 |
8 |
return nm->mkNode(EQUAL, x, y); |
970 |
|
} |
971 |
|
} |
972 |
|
|
973 |
2324 |
std::vector<Node> cs; |
974 |
2039 |
for (const Node& yiLen : yLens) |
975 |
|
{ |
976 |
877 |
Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end()); |
977 |
877 |
cs.push_back(yiLen[0]); |
978 |
|
} |
979 |
|
|
980 |
2324 |
NodeBuilder nb(AND); |
981 |
|
// (= x (str.++ y1' ... ym')) |
982 |
1162 |
if (!cs.empty()) |
983 |
|
{ |
984 |
736 |
nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype)); |
985 |
|
} |
986 |
|
// (= y1'' "") ... (= yk'' "") |
987 |
3527 |
for (const Node& zeroLen : zeroLens) |
988 |
|
{ |
989 |
2365 |
Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end()); |
990 |
2365 |
nb << nm->mkNode(EQUAL, zeroLen[0], emp); |
991 |
|
} |
992 |
|
|
993 |
|
// (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) |
994 |
1162 |
return nb.constructNode(); |
995 |
|
} |
996 |
|
|
997 |
|
} // namespace strings |
998 |
|
} // namespace theory |
999 |
29280 |
} // namespace cvc5 |