1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Aina Niemetz |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of entailment tests involving strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/strings_entail.h" |
17 |
|
|
18 |
|
#include "expr/node_builder.h" |
19 |
|
#include "theory/rewriter.h" |
20 |
|
#include "theory/strings/arith_entail.h" |
21 |
|
#include "theory/strings/sequences_rewriter.h" |
22 |
|
#include "theory/strings/theory_strings_utils.h" |
23 |
|
#include "theory/strings/word.h" |
24 |
|
|
25 |
|
using namespace cvc5::kind; |
26 |
|
|
27 |
|
namespace cvc5 { |
28 |
|
namespace theory { |
29 |
|
namespace strings { |
30 |
|
|
31 |
9648 |
StringsEntail::StringsEntail(SequencesRewriter& rewriter) : d_rewriter(rewriter) |
32 |
|
{ |
33 |
9648 |
} |
34 |
|
|
35 |
2544 |
bool StringsEntail::canConstantContainConcat(Node c, |
36 |
|
Node n, |
37 |
|
int& firstc, |
38 |
|
int& lastc) |
39 |
|
{ |
40 |
2544 |
Assert(c.isConst()); |
41 |
2544 |
Assert(n.getKind() == STRING_CONCAT); |
42 |
|
// must find constant components in order |
43 |
2544 |
size_t pos = 0; |
44 |
2544 |
firstc = -1; |
45 |
2544 |
lastc = -1; |
46 |
9606 |
for (unsigned i = 0; i < n.getNumChildren(); i++) |
47 |
|
{ |
48 |
7181 |
if (n[i].isConst()) |
49 |
|
{ |
50 |
1665 |
firstc = firstc == -1 ? i : firstc; |
51 |
1665 |
lastc = i; |
52 |
1665 |
size_t new_pos = Word::find(c, n[i], pos); |
53 |
1665 |
if (new_pos == std::string::npos) |
54 |
|
{ |
55 |
111 |
return false; |
56 |
|
} |
57 |
|
else |
58 |
|
{ |
59 |
1554 |
pos = new_pos + Word::getLength(n[i]); |
60 |
|
} |
61 |
|
} |
62 |
5516 |
else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0])) |
63 |
|
{ |
64 |
20 |
Assert(c.getType().isString()); // string-only |
65 |
20 |
const std::vector<unsigned>& tvec = c.getConst<String>().getVec(); |
66 |
|
// find the first occurrence of a digit starting at pos |
67 |
116 |
while (pos < tvec.size() && !String::isDigit(tvec[pos])) |
68 |
|
{ |
69 |
48 |
pos++; |
70 |
|
} |
71 |
20 |
if (pos == tvec.size()) |
72 |
|
{ |
73 |
8 |
return false; |
74 |
|
} |
75 |
|
// must consume at least one digit here |
76 |
12 |
pos++; |
77 |
|
} |
78 |
|
} |
79 |
2425 |
return true; |
80 |
|
} |
81 |
|
|
82 |
27959 |
bool StringsEntail::canConstantContainList(Node c, |
83 |
|
std::vector<Node>& l, |
84 |
|
int& firstc, |
85 |
|
int& lastc) |
86 |
|
{ |
87 |
27959 |
Assert(c.isConst()); |
88 |
|
// must find constant components in order |
89 |
27959 |
size_t pos = 0; |
90 |
27959 |
firstc = -1; |
91 |
27959 |
lastc = -1; |
92 |
81224 |
for (unsigned i = 0; i < l.size(); i++) |
93 |
|
{ |
94 |
53791 |
if (l[i].isConst()) |
95 |
|
{ |
96 |
44456 |
firstc = firstc == -1 ? i : firstc; |
97 |
44456 |
lastc = i; |
98 |
44456 |
size_t new_pos = Word::find(c, l[i], pos); |
99 |
44456 |
if (new_pos == std::string::npos) |
100 |
|
{ |
101 |
526 |
return false; |
102 |
|
} |
103 |
|
else |
104 |
|
{ |
105 |
43930 |
pos = new_pos + Word::getLength(l[i]); |
106 |
|
} |
107 |
|
} |
108 |
|
} |
109 |
27433 |
return true; |
110 |
|
} |
111 |
|
|
112 |
54448 |
bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1, |
113 |
|
std::vector<Node>& nr, |
114 |
|
int dir, |
115 |
|
Node& curr, |
116 |
|
bool strict) |
117 |
|
{ |
118 |
54448 |
Assert(dir == 1 || dir == -1); |
119 |
54448 |
Assert(nr.empty()); |
120 |
54448 |
NodeManager* nm = NodeManager::currentNM(); |
121 |
108896 |
Node zero = nm->mkConst(cvc5::Rational(0)); |
122 |
54448 |
bool ret = false; |
123 |
54448 |
bool success = true; |
124 |
54448 |
unsigned sindex = 0; |
125 |
144750 |
while (success && curr != zero && sindex < n1.size()) |
126 |
|
{ |
127 |
45151 |
Assert(!curr.isNull()); |
128 |
45151 |
success = false; |
129 |
45151 |
unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); |
130 |
45151 |
if (n1[sindex_use].isConst()) |
131 |
|
{ |
132 |
|
// could strip part of a constant |
133 |
20106 |
Node lowerBound = ArithEntail::getConstantBound(Rewriter::rewrite(curr)); |
134 |
10053 |
if (!lowerBound.isNull()) |
135 |
|
{ |
136 |
3174 |
Assert(lowerBound.isConst()); |
137 |
6348 |
Rational lbr = lowerBound.getConst<Rational>(); |
138 |
3174 |
if (lbr.sgn() > 0) |
139 |
|
{ |
140 |
1715 |
Assert(ArithEntail::check(curr, true)); |
141 |
3430 |
Node s = n1[sindex_use]; |
142 |
1715 |
size_t slen = Word::getLength(s); |
143 |
3430 |
Node ncl = nm->mkConst(cvc5::Rational(slen)); |
144 |
3430 |
Node next_s = nm->mkNode(MINUS, lowerBound, ncl); |
145 |
1715 |
next_s = Rewriter::rewrite(next_s); |
146 |
1715 |
Assert(next_s.isConst()); |
147 |
|
// we can remove the entire constant |
148 |
1715 |
if (next_s.getConst<Rational>().sgn() >= 0) |
149 |
|
{ |
150 |
632 |
curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl)); |
151 |
632 |
success = true; |
152 |
632 |
sindex++; |
153 |
|
} |
154 |
|
else |
155 |
|
{ |
156 |
|
// we can remove part of the constant |
157 |
|
// lower bound minus the length of a concrete string is negative, |
158 |
|
// hence lowerBound cannot be larger than long max |
159 |
1083 |
Assert(lbr < Rational(String::maxSize())); |
160 |
1083 |
curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound)); |
161 |
1083 |
uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); |
162 |
1083 |
Assert(lbsize < slen); |
163 |
1083 |
if (dir == 1) |
164 |
|
{ |
165 |
|
// strip partially from the front |
166 |
1034 |
nr.push_back(Word::prefix(s, lbsize)); |
167 |
1034 |
n1[sindex_use] = Word::suffix(s, slen - lbsize); |
168 |
|
} |
169 |
|
else |
170 |
|
{ |
171 |
|
// strip partially from the back |
172 |
49 |
nr.push_back(Word::suffix(s, lbsize)); |
173 |
49 |
n1[sindex_use] = Word::prefix(s, slen - lbsize); |
174 |
|
} |
175 |
1083 |
ret = true; |
176 |
|
} |
177 |
1715 |
Assert(ArithEntail::check(curr)); |
178 |
|
} |
179 |
|
else |
180 |
|
{ |
181 |
|
// we cannot remove the constant |
182 |
|
} |
183 |
|
} |
184 |
|
} |
185 |
|
else |
186 |
|
{ |
187 |
|
Node next_s = NodeManager::currentNM()->mkNode( |
188 |
|
MINUS, |
189 |
|
curr, |
190 |
70196 |
NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use])); |
191 |
35098 |
next_s = Rewriter::rewrite(next_s); |
192 |
35098 |
if (ArithEntail::check(next_s)) |
193 |
|
{ |
194 |
468 |
success = true; |
195 |
468 |
curr = next_s; |
196 |
468 |
sindex++; |
197 |
|
} |
198 |
|
} |
199 |
|
} |
200 |
54448 |
if (sindex > 0 && (!strict || curr == zero)) |
201 |
|
{ |
202 |
977 |
if (dir == 1) |
203 |
|
{ |
204 |
948 |
nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex); |
205 |
948 |
n1.erase(n1.begin(), n1.begin() + sindex); |
206 |
|
} |
207 |
|
else |
208 |
|
{ |
209 |
29 |
nr.insert(nr.end(), n1.end() - sindex, n1.end()); |
210 |
29 |
n1.erase(n1.end() - sindex, n1.end()); |
211 |
|
} |
212 |
977 |
ret = true; |
213 |
|
} |
214 |
108896 |
return ret; |
215 |
|
} |
216 |
|
|
217 |
158229 |
int StringsEntail::componentContains(std::vector<Node>& n1, |
218 |
|
std::vector<Node>& n2, |
219 |
|
std::vector<Node>& nb, |
220 |
|
std::vector<Node>& ne, |
221 |
|
bool computeRemainder, |
222 |
|
int remainderDir) |
223 |
|
{ |
224 |
158229 |
Assert(nb.empty()); |
225 |
158229 |
Assert(ne.empty()); |
226 |
158229 |
Trace("strings-entail") << "Component contains: " << std::endl; |
227 |
158229 |
Trace("strings-entail") << "n1 = " << n1 << std::endl; |
228 |
158229 |
Trace("strings-entail") << "n2 = " << n2 << std::endl; |
229 |
|
// if n2 is a singleton, we can do optimized version here |
230 |
158229 |
if (n2.size() == 1) |
231 |
|
{ |
232 |
270622 |
for (unsigned i = 0; i < n1.size(); i++) |
233 |
|
{ |
234 |
306548 |
Node n1rb; |
235 |
306548 |
Node n1re; |
236 |
154541 |
if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder)) |
237 |
|
{ |
238 |
2534 |
if (computeRemainder) |
239 |
|
{ |
240 |
271 |
n1[i] = n2[0]; |
241 |
271 |
if (remainderDir != -1) |
242 |
|
{ |
243 |
271 |
if (!n1re.isNull()) |
244 |
|
{ |
245 |
37 |
ne.push_back(n1re); |
246 |
|
} |
247 |
271 |
ne.insert(ne.end(), n1.begin() + i + 1, n1.end()); |
248 |
271 |
n1.erase(n1.begin() + i + 1, n1.end()); |
249 |
|
} |
250 |
|
else if (!n1re.isNull()) |
251 |
|
{ |
252 |
|
n1[i] = Rewriter::rewrite( |
253 |
|
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re)); |
254 |
|
} |
255 |
271 |
if (remainderDir != 1) |
256 |
|
{ |
257 |
|
nb.insert(nb.end(), n1.begin(), n1.begin() + i); |
258 |
|
n1.erase(n1.begin(), n1.begin() + i); |
259 |
|
if (!n1rb.isNull()) |
260 |
|
{ |
261 |
|
nb.push_back(n1rb); |
262 |
|
} |
263 |
|
} |
264 |
271 |
else if (!n1rb.isNull()) |
265 |
|
{ |
266 |
94 |
n1[i] = Rewriter::rewrite( |
267 |
188 |
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i])); |
268 |
|
} |
269 |
|
} |
270 |
2534 |
return i; |
271 |
|
} |
272 |
|
} |
273 |
|
} |
274 |
39614 |
else if (n1.size() >= n2.size()) |
275 |
|
{ |
276 |
10395 |
unsigned diff = n1.size() - n2.size(); |
277 |
27439 |
for (unsigned i = 0; i <= diff; i++) |
278 |
|
{ |
279 |
34215 |
Node n1rb_first; |
280 |
34215 |
Node n1re_first; |
281 |
|
// first component of n2 must be a suffix |
282 |
34342 |
if (componentContainsBase(n1[i], |
283 |
17171 |
n2[0], |
284 |
|
n1rb_first, |
285 |
|
n1re_first, |
286 |
|
1, |
287 |
17171 |
computeRemainder && remainderDir != 1)) |
288 |
|
{ |
289 |
4305 |
Assert(n1re_first.isNull()); |
290 |
7495 |
for (unsigned j = 1; j < n2.size(); j++) |
291 |
|
{ |
292 |
|
// are we in the last component? |
293 |
7495 |
if (j + 1 == n2.size()) |
294 |
|
{ |
295 |
921 |
Node n1rb_last; |
296 |
921 |
Node n1re_last; |
297 |
|
// last component of n2 must be a prefix |
298 |
1842 |
if (componentContainsBase(n1[i + j], |
299 |
921 |
n2[j], |
300 |
|
n1rb_last, |
301 |
|
n1re_last, |
302 |
|
-1, |
303 |
921 |
computeRemainder && remainderDir != -1)) |
304 |
|
{ |
305 |
254 |
Trace("strings-entail-debug") |
306 |
127 |
<< "Last remainder begin is " << n1rb_last << std::endl; |
307 |
254 |
Trace("strings-entail-debug") |
308 |
127 |
<< "Last remainder end is " << n1re_last << std::endl; |
309 |
127 |
Assert(n1rb_last.isNull()); |
310 |
127 |
if (computeRemainder) |
311 |
|
{ |
312 |
12 |
if (remainderDir != -1) |
313 |
|
{ |
314 |
12 |
if (!n1re_last.isNull()) |
315 |
|
{ |
316 |
2 |
ne.push_back(n1re_last); |
317 |
|
} |
318 |
12 |
ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end()); |
319 |
12 |
n1.erase(n1.begin() + i + j + 1, n1.end()); |
320 |
12 |
n1[i + j] = n2[j]; |
321 |
|
} |
322 |
12 |
if (remainderDir != 1) |
323 |
|
{ |
324 |
|
n1[i] = n2[0]; |
325 |
|
nb.insert(nb.end(), n1.begin(), n1.begin() + i); |
326 |
|
n1.erase(n1.begin(), n1.begin() + i); |
327 |
|
if (!n1rb_first.isNull()) |
328 |
|
{ |
329 |
|
nb.push_back(n1rb_first); |
330 |
|
} |
331 |
|
} |
332 |
|
} |
333 |
127 |
Trace("strings-entail-debug") << "ne = " << ne << std::endl; |
334 |
127 |
Trace("strings-entail-debug") << "nb = " << nb << std::endl; |
335 |
127 |
Trace("strings-entail-debug") << "...return " << i << std::endl; |
336 |
127 |
return i; |
337 |
|
} |
338 |
|
else |
339 |
|
{ |
340 |
794 |
break; |
341 |
|
} |
342 |
|
} |
343 |
6574 |
else if (n1[i + j] != n2[j]) |
344 |
|
{ |
345 |
3384 |
break; |
346 |
|
} |
347 |
|
} |
348 |
|
} |
349 |
|
} |
350 |
|
} |
351 |
155568 |
return -1; |
352 |
|
} |
353 |
|
|
354 |
172633 |
bool StringsEntail::componentContainsBase( |
355 |
|
Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder) |
356 |
|
{ |
357 |
172633 |
Assert(n1rb.isNull()); |
358 |
172633 |
Assert(n1re.isNull()); |
359 |
|
|
360 |
172633 |
NodeManager* nm = NodeManager::currentNM(); |
361 |
|
|
362 |
172633 |
if (n1 == n2) |
363 |
|
{ |
364 |
6358 |
return true; |
365 |
|
} |
366 |
|
else |
367 |
|
{ |
368 |
166275 |
if (n1.isConst() && n2.isConst()) |
369 |
|
{ |
370 |
2764 |
size_t len1 = Word::getLength(n1); |
371 |
2764 |
size_t len2 = Word::getLength(n2); |
372 |
2764 |
if (len2 < len1) |
373 |
|
{ |
374 |
492 |
if (dir == 1) |
375 |
|
{ |
376 |
317 |
if (Word::suffix(n1, len2) == n2) |
377 |
|
{ |
378 |
64 |
if (computeRemainder) |
379 |
|
{ |
380 |
|
n1rb = Word::prefix(n1, len1 - len2); |
381 |
|
} |
382 |
64 |
return true; |
383 |
|
} |
384 |
|
} |
385 |
175 |
else if (dir == -1) |
386 |
|
{ |
387 |
4 |
if (Word::prefix(n1, len2) == n2) |
388 |
|
{ |
389 |
4 |
if (computeRemainder) |
390 |
|
{ |
391 |
|
n1re = Word::suffix(n1, len1 - len2); |
392 |
|
} |
393 |
4 |
return true; |
394 |
|
} |
395 |
|
} |
396 |
|
else |
397 |
|
{ |
398 |
171 |
size_t f = Word::find(n1, n2); |
399 |
171 |
if (f != std::string::npos) |
400 |
|
{ |
401 |
163 |
if (computeRemainder) |
402 |
|
{ |
403 |
119 |
if (f > 0) |
404 |
|
{ |
405 |
90 |
n1rb = Word::prefix(n1, f); |
406 |
|
} |
407 |
119 |
if (len1 > f + len2) |
408 |
|
{ |
409 |
33 |
n1re = Word::suffix(n1, len1 - (f + len2)); |
410 |
|
} |
411 |
|
} |
412 |
163 |
return true; |
413 |
|
} |
414 |
|
} |
415 |
|
} |
416 |
|
} |
417 |
|
else |
418 |
|
{ |
419 |
|
// cases for: |
420 |
|
// n1 = x containing n2 = substr( x, n2[1], n2[2] ) |
421 |
163511 |
if (n2.getKind() == STRING_SUBSTR) |
422 |
|
{ |
423 |
23583 |
if (n2[0] == n1) |
424 |
|
{ |
425 |
185 |
bool success = true; |
426 |
185 |
Node start_pos = n2[1]; |
427 |
185 |
Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]); |
428 |
185 |
Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]); |
429 |
185 |
if (dir == 1) |
430 |
|
{ |
431 |
|
// To be a suffix, start + length must be greater than |
432 |
|
// or equal to the length of the string. |
433 |
4 |
success = ArithEntail::check(end_pos, len_n2s); |
434 |
|
} |
435 |
181 |
else if (dir == -1) |
436 |
|
{ |
437 |
|
// To be a prefix, must literally start at 0, since |
438 |
|
// if we knew it started at <0, it should be rewritten to "", |
439 |
|
// if we knew it started at 0, then n2[1] should be rewritten to |
440 |
|
// 0. |
441 |
24 |
success = start_pos.isConst() |
442 |
12 |
&& start_pos.getConst<Rational>().sgn() == 0; |
443 |
|
} |
444 |
185 |
if (success) |
445 |
|
{ |
446 |
185 |
if (computeRemainder) |
447 |
|
{ |
448 |
|
// we can only compute the remainder if start_pos and end_pos |
449 |
|
// are known to be non-negative. |
450 |
54 |
if (!ArithEntail::check(start_pos) |
451 |
54 |
|| !ArithEntail::check(end_pos)) |
452 |
|
{ |
453 |
12 |
return false; |
454 |
|
} |
455 |
6 |
if (dir != -1) |
456 |
|
{ |
457 |
12 |
n1rb = nm->mkNode( |
458 |
8 |
STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos); |
459 |
|
} |
460 |
6 |
if (dir != 1) |
461 |
|
{ |
462 |
6 |
n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s); |
463 |
|
} |
464 |
|
} |
465 |
173 |
return true; |
466 |
|
} |
467 |
|
} |
468 |
|
} |
469 |
|
|
470 |
163326 |
if (!computeRemainder && dir == 0) |
471 |
|
{ |
472 |
150016 |
if (n1.getKind() == STRING_STRREPL) |
473 |
|
{ |
474 |
|
// (str.contains (str.replace x y z) w) ---> true |
475 |
|
// if (str.contains x w) --> true and (str.contains z w) ---> true |
476 |
3972 |
Node xCtnW = checkContains(n1[0], n2); |
477 |
2088 |
if (!xCtnW.isNull() && xCtnW.getConst<bool>()) |
478 |
|
{ |
479 |
758 |
Node zCtnW = checkContains(n1[2], n2); |
480 |
481 |
if (!zCtnW.isNull() && zCtnW.getConst<bool>()) |
481 |
|
{ |
482 |
204 |
return true; |
483 |
|
} |
484 |
|
} |
485 |
|
} |
486 |
|
} |
487 |
|
} |
488 |
|
} |
489 |
165655 |
return false; |
490 |
|
} |
491 |
|
|
492 |
158882 |
bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, |
493 |
|
std::vector<Node>& n2, |
494 |
|
std::vector<Node>& nb, |
495 |
|
std::vector<Node>& ne, |
496 |
|
int dir) |
497 |
|
{ |
498 |
158882 |
Assert(nb.empty()); |
499 |
158882 |
Assert(ne.empty()); |
500 |
158882 |
bool changed = false; |
501 |
|
// for ( forwards, backwards ) direction |
502 |
427548 |
for (unsigned r = 0; r < 2; r++) |
503 |
|
{ |
504 |
293227 |
if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1)) |
505 |
|
{ |
506 |
290100 |
unsigned index0 = r == 0 ? 0 : n1.size() - 1; |
507 |
290100 |
unsigned index1 = r == 0 ? 0 : n2.size() - 1; |
508 |
290100 |
bool removeComponent = false; |
509 |
555639 |
Node n1cmp = n1[index0]; |
510 |
|
|
511 |
290100 |
if (n1cmp.isConst() && Word::isEmpty(n1cmp)) |
512 |
|
{ |
513 |
24519 |
return false; |
514 |
|
} |
515 |
|
|
516 |
531120 |
std::vector<Node> sss; |
517 |
531120 |
std::vector<Node> sls; |
518 |
265581 |
n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls); |
519 |
531162 |
Trace("strings-rewrite-debug2") |
520 |
265581 |
<< "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1] |
521 |
265581 |
<< ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl; |
522 |
265581 |
if (n1cmp.isConst()) |
523 |
|
{ |
524 |
145274 |
Node s = n1cmp; |
525 |
72637 |
size_t slen = Word::getLength(s); |
526 |
|
// overlap is an overapproximation of the number of characters |
527 |
|
// n2[index1] can match in s |
528 |
72637 |
unsigned overlap = Word::getLength(s); |
529 |
72637 |
if (n2[index1].isConst()) |
530 |
|
{ |
531 |
20576 |
Node t = n2[index1]; |
532 |
10288 |
std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t); |
533 |
10288 |
if (ret == std::string::npos) |
534 |
|
{ |
535 |
2517 |
if (n1.size() == 1) |
536 |
|
{ |
537 |
|
// can remove everything |
538 |
|
// e.g. str.contains( "abc", str.++( "ba", x ) ) --> |
539 |
|
// str.contains( "", str.++( "ba", x ) ) |
540 |
|
// or std.contains( str.substr( "abc", x, y ), "d" ) ---> |
541 |
|
// str.contains( "", "d" ) |
542 |
37 |
removeComponent = true; |
543 |
|
} |
544 |
2480 |
else if (sss.empty()) // only if not substr |
545 |
|
{ |
546 |
|
// check how much overlap there is |
547 |
|
// This is used to partially strip off the endpoint |
548 |
|
// e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) --> |
549 |
|
// str.contains( str.++( "c", x ), str.++( "cd", y ) ) |
550 |
2435 |
overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s); |
551 |
|
} |
552 |
|
// note that we cannot process substring here, since t may |
553 |
|
// match only part of s. Consider: |
554 |
|
// (str.++ "C" (str.substr "AB" x y)), "CB" |
555 |
|
// where "AB" and "CB" have no overlap, but "C" is not part of what |
556 |
|
// is matched with "AB". |
557 |
|
} |
558 |
7771 |
else if (sss.empty()) // only if not substr |
559 |
|
{ |
560 |
5053 |
Assert(ret < slen); |
561 |
|
// can strip off up to the find position, e.g. |
562 |
|
// str.contains( str.++( "abc", x ), str.++( "b", y ) ) --> |
563 |
|
// str.contains( str.++( "bc", x ), str.++( "b", y ) ), |
564 |
|
// and |
565 |
|
// str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) --> |
566 |
|
// str.contains( str.++( x, "abb" ), str.++( y, "b" ) ) |
567 |
5053 |
overlap = slen - ret; |
568 |
|
} |
569 |
|
} |
570 |
|
else |
571 |
|
{ |
572 |
|
// inconclusive |
573 |
|
} |
574 |
72637 |
Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl; |
575 |
|
// process the overlap |
576 |
72637 |
if (overlap < slen) |
577 |
|
{ |
578 |
213 |
changed = true; |
579 |
213 |
if (overlap == 0) |
580 |
|
{ |
581 |
165 |
removeComponent = true; |
582 |
|
} |
583 |
|
else |
584 |
|
{ |
585 |
|
// can drop the prefix (resp. suffix) from the first (resp. last) |
586 |
|
// component |
587 |
48 |
if (r == 0) |
588 |
|
{ |
589 |
27 |
nb.push_back(Word::prefix(s, slen - overlap)); |
590 |
27 |
n1[index0] = Word::suffix(s, overlap); |
591 |
|
} |
592 |
|
else |
593 |
|
{ |
594 |
21 |
ne.push_back(Word::suffix(s, slen - overlap)); |
595 |
21 |
n1[index0] = Word::prefix(s, overlap); |
596 |
|
} |
597 |
|
} |
598 |
|
} |
599 |
|
} |
600 |
192944 |
else if (n1cmp.getKind() == STRING_ITOS) |
601 |
|
{ |
602 |
379 |
if (n2[index1].isConst()) |
603 |
|
{ |
604 |
29 |
Assert(n2[index1].getType().isString()); // string-only |
605 |
58 |
cvc5::String t = n2[index1].getConst<String>(); |
606 |
29 |
if (n1.size() == 1) |
607 |
|
{ |
608 |
|
// if n1.size()==1, then if n2[index1] is not a number, we can drop |
609 |
|
// the entire component |
610 |
|
// e.g. str.contains( int.to.str(x), "123a45") --> false |
611 |
13 |
if (!t.isNumber()) |
612 |
|
{ |
613 |
5 |
removeComponent = true; |
614 |
|
} |
615 |
|
} |
616 |
|
else |
617 |
|
{ |
618 |
16 |
const std::vector<unsigned>& tvec = t.getVec(); |
619 |
16 |
Assert(tvec.size() > 0); |
620 |
|
|
621 |
|
// if n1.size()>1, then if the first (resp. last) character of |
622 |
|
// n2[index1] |
623 |
|
// is not a digit, we can drop the entire component, e.g.: |
624 |
|
// str.contains( str.++( int.to.str(x), y ), "a12") --> |
625 |
|
// str.contains( y, "a12" ) |
626 |
|
// str.contains( str.++( y, int.to.str(x) ), "a0b") --> |
627 |
|
// str.contains( y, "a0b" ) |
628 |
16 |
unsigned i = r == 0 ? 0 : (tvec.size() - 1); |
629 |
16 |
if (!String::isDigit(tvec[i])) |
630 |
|
{ |
631 |
12 |
removeComponent = true; |
632 |
|
} |
633 |
|
} |
634 |
|
} |
635 |
|
} |
636 |
265581 |
if (removeComponent) |
637 |
|
{ |
638 |
219 |
Trace("strings-rewrite-debug2") << "...remove component" << std::endl; |
639 |
|
// can drop entire first (resp. last) component |
640 |
219 |
if (r == 0) |
641 |
|
{ |
642 |
150 |
nb.push_back(n1[index0]); |
643 |
150 |
n1.erase(n1.begin(), n1.begin() + 1); |
644 |
|
} |
645 |
|
else |
646 |
|
{ |
647 |
69 |
ne.push_back(n1[index0]); |
648 |
69 |
n1.pop_back(); |
649 |
|
} |
650 |
219 |
if (n1.empty()) |
651 |
|
{ |
652 |
|
// if we've removed everything, just return (we will rewrite to false) |
653 |
42 |
return true; |
654 |
|
} |
655 |
|
else |
656 |
|
{ |
657 |
177 |
changed = true; |
658 |
|
} |
659 |
|
} |
660 |
|
} |
661 |
|
} |
662 |
|
// TODO (#1180) : computing the maximal overlap in this function may be |
663 |
|
// important. |
664 |
|
// str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) ---> |
665 |
|
// false |
666 |
|
// ...since str.to.int(x) can contain at most 1 character from "2aaaa", |
667 |
|
// leaving 4 characters |
668 |
|
// which is larger that the upper bound for length of str.substr(y,0,3), |
669 |
|
// which is 3. |
670 |
134321 |
return changed; |
671 |
|
} |
672 |
|
|
673 |
178741 |
Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter) |
674 |
|
{ |
675 |
178741 |
NodeManager* nm = NodeManager::currentNM(); |
676 |
357482 |
Node ctn = nm->mkNode(STRING_STRCTN, a, b); |
677 |
|
|
678 |
178741 |
if (fullRewriter) |
679 |
|
{ |
680 |
11290 |
ctn = Rewriter::rewrite(ctn); |
681 |
|
} |
682 |
|
else |
683 |
|
{ |
684 |
334902 |
Node prev; |
685 |
170 |
do |
686 |
|
{ |
687 |
167621 |
prev = ctn; |
688 |
167621 |
ctn = d_rewriter.rewriteContains(ctn); |
689 |
167621 |
} while (prev != ctn && ctn.getKind() == STRING_STRCTN); |
690 |
|
} |
691 |
|
|
692 |
178741 |
Assert(ctn.getType().isBoolean()); |
693 |
357482 |
return ctn.isConst() ? ctn : Node::null(); |
694 |
|
} |
695 |
|
|
696 |
20335 |
bool StringsEntail::checkNonEmpty(Node a) |
697 |
|
{ |
698 |
40670 |
Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a); |
699 |
20335 |
len = Rewriter::rewrite(len); |
700 |
40670 |
return ArithEntail::check(len, true); |
701 |
|
} |
702 |
|
|
703 |
42610 |
bool StringsEntail::checkLengthOne(Node s, bool strict) |
704 |
|
{ |
705 |
42610 |
NodeManager* nm = NodeManager::currentNM(); |
706 |
85220 |
Node one = nm->mkConst(Rational(1)); |
707 |
85220 |
Node len = nm->mkNode(STRING_LENGTH, s); |
708 |
42610 |
len = Rewriter::rewrite(len); |
709 |
170440 |
return ArithEntail::check(one, len) |
710 |
213050 |
&& (!strict || ArithEntail::check(len, true)); |
711 |
|
} |
712 |
|
|
713 |
154995 |
bool StringsEntail::checkMultisetSubset(Node a, Node b) |
714 |
|
{ |
715 |
309990 |
std::vector<Node> avec; |
716 |
154995 |
utils::getConcat(getMultisetApproximation(a), avec); |
717 |
309990 |
std::vector<Node> bvec; |
718 |
154995 |
utils::getConcat(b, bvec); |
719 |
|
|
720 |
309990 |
std::map<Node, unsigned> num_nconst[2]; |
721 |
309990 |
std::map<Node, unsigned> num_const[2]; |
722 |
464985 |
for (unsigned j = 0; j < 2; j++) |
723 |
|
{ |
724 |
309990 |
std::vector<Node>& jvec = j == 0 ? avec : bvec; |
725 |
773127 |
for (const Node& cc : jvec) |
726 |
|
{ |
727 |
463137 |
if (cc.isConst()) |
728 |
|
{ |
729 |
124554 |
num_const[j][cc]++; |
730 |
|
} |
731 |
|
else |
732 |
|
{ |
733 |
338583 |
num_nconst[j][cc]++; |
734 |
|
} |
735 |
|
} |
736 |
|
} |
737 |
154995 |
bool ms_success = true; |
738 |
163692 |
for (std::pair<const Node, unsigned>& nncp : num_nconst[0]) |
739 |
|
{ |
740 |
108340 |
if (nncp.second > num_nconst[1][nncp.first]) |
741 |
|
{ |
742 |
99643 |
ms_success = false; |
743 |
99643 |
break; |
744 |
|
} |
745 |
|
} |
746 |
154995 |
if (ms_success) |
747 |
|
{ |
748 |
|
// count the number of constant characters in the first argument |
749 |
166056 |
std::map<Node, unsigned> count_const[2]; |
750 |
110660 |
std::vector<Node> chars; |
751 |
166056 |
for (unsigned j = 0; j < 2; j++) |
752 |
|
{ |
753 |
168467 |
for (std::pair<const Node, unsigned>& ncp : num_const[j]) |
754 |
|
{ |
755 |
115526 |
Node cn = ncp.first; |
756 |
57763 |
Assert(cn.isConst()); |
757 |
115526 |
std::vector<Node> cnChars = Word::getChars(cn); |
758 |
182116 |
for (const Node& ch : cnChars) |
759 |
|
{ |
760 |
124353 |
count_const[j][ch] += ncp.second; |
761 |
124353 |
if (std::find(chars.begin(), chars.end(), ch) == chars.end()) |
762 |
|
{ |
763 |
83812 |
chars.push_back(ch); |
764 |
|
} |
765 |
|
} |
766 |
|
} |
767 |
|
} |
768 |
110704 |
Trace("strings-entail-ms-ss") |
769 |
55352 |
<< "For " << a << " and " << b << " : " << std::endl; |
770 |
139100 |
for (const Node& ch : chars) |
771 |
|
{ |
772 |
83792 |
Trace("strings-entail-ms-ss") << " # occurrences of substring "; |
773 |
83792 |
Trace("strings-entail-ms-ss") << ch << " in arguments is "; |
774 |
167584 |
Trace("strings-entail-ms-ss") |
775 |
83792 |
<< count_const[0][ch] << " / " << count_const[1][ch] << std::endl; |
776 |
83792 |
if (count_const[0][ch] < count_const[1][ch]) |
777 |
|
{ |
778 |
44 |
return true; |
779 |
|
} |
780 |
|
} |
781 |
|
|
782 |
|
// TODO (#1180): count the number of 2,3,4,.. character substrings |
783 |
|
// for example: |
784 |
|
// str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false |
785 |
|
// since the second argument contains more occurrences of "bb". |
786 |
|
// note this is orthogonal reasoning to inductive reasoning |
787 |
|
// via regular membership reduction in Liang et al CAV 2015. |
788 |
|
} |
789 |
154951 |
return false; |
790 |
|
} |
791 |
|
|
792 |
135370 |
Node StringsEntail::checkHomogeneousString(Node a) |
793 |
|
{ |
794 |
270740 |
std::vector<Node> avec; |
795 |
135370 |
utils::getConcat(getMultisetApproximation(a), avec); |
796 |
|
|
797 |
135370 |
bool cValid = false; |
798 |
270740 |
Node c; |
799 |
205731 |
for (const Node& ac : avec) |
800 |
|
{ |
801 |
139155 |
if (ac.isConst()) |
802 |
|
{ |
803 |
153148 |
std::vector<Node> acv = Word::getChars(ac); |
804 |
123802 |
for (const Node& cc : acv) |
805 |
|
{ |
806 |
53441 |
if (!cValid) |
807 |
|
{ |
808 |
36912 |
cValid = true; |
809 |
36912 |
c = cc; |
810 |
|
} |
811 |
16529 |
else if (c != cc) |
812 |
|
{ |
813 |
|
// Found a different character |
814 |
12426 |
return Node::null(); |
815 |
|
} |
816 |
|
} |
817 |
|
} |
818 |
|
else |
819 |
|
{ |
820 |
|
// Could produce a different character |
821 |
56368 |
return Node::null(); |
822 |
|
} |
823 |
|
} |
824 |
|
|
825 |
66576 |
if (!cValid) |
826 |
|
{ |
827 |
44660 |
return Word::mkEmptyWord(a.getType()); |
828 |
|
} |
829 |
|
|
830 |
21916 |
return c; |
831 |
|
} |
832 |
|
|
833 |
429966 |
Node StringsEntail::getMultisetApproximation(Node a) |
834 |
|
{ |
835 |
429966 |
NodeManager* nm = NodeManager::currentNM(); |
836 |
429966 |
if (a.getKind() == STRING_SUBSTR) |
837 |
|
{ |
838 |
57970 |
return a[0]; |
839 |
|
} |
840 |
371996 |
else if (a.getKind() == STRING_STRREPL) |
841 |
|
{ |
842 |
7323 |
return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2])); |
843 |
|
} |
844 |
364673 |
else if (a.getKind() == STRING_CONCAT) |
845 |
|
{ |
846 |
95414 |
NodeBuilder nb(STRING_CONCAT); |
847 |
179985 |
for (const Node& ac : a) |
848 |
|
{ |
849 |
132278 |
nb << getMultisetApproximation(ac); |
850 |
|
} |
851 |
47707 |
return nb.constructNode(); |
852 |
|
} |
853 |
|
else |
854 |
|
{ |
855 |
316966 |
return a; |
856 |
|
} |
857 |
|
} |
858 |
|
|
859 |
96307 |
Node StringsEntail::getStringOrEmpty(Node n) |
860 |
|
{ |
861 |
96307 |
Node res; |
862 |
288961 |
while (res.isNull()) |
863 |
|
{ |
864 |
96327 |
switch (n.getKind()) |
865 |
|
{ |
866 |
510 |
case STRING_STRREPL: |
867 |
|
{ |
868 |
510 |
if (Word::isEmpty(n[0])) |
869 |
|
{ |
870 |
|
// (str.replace "" x y) --> y |
871 |
20 |
n = n[2]; |
872 |
20 |
break; |
873 |
|
} |
874 |
|
|
875 |
490 |
if (checkLengthOne(n[0]) && Word::isEmpty(n[2])) |
876 |
|
{ |
877 |
|
// (str.replace "A" x "") --> "A" |
878 |
36 |
res = n[0]; |
879 |
36 |
break; |
880 |
|
} |
881 |
|
|
882 |
454 |
res = n; |
883 |
454 |
break; |
884 |
|
} |
885 |
6215 |
case STRING_SUBSTR: |
886 |
|
{ |
887 |
6215 |
if (checkLengthOne(n[0])) |
888 |
|
{ |
889 |
|
// (str.substr "A" x y) --> "A" |
890 |
549 |
res = n[0]; |
891 |
549 |
break; |
892 |
|
} |
893 |
5666 |
res = n; |
894 |
5666 |
break; |
895 |
|
} |
896 |
89602 |
default: |
897 |
|
{ |
898 |
89602 |
res = n; |
899 |
89602 |
break; |
900 |
|
} |
901 |
|
} |
902 |
|
} |
903 |
96307 |
return res; |
904 |
|
} |
905 |
|
|
906 |
1663 |
Node StringsEntail::inferEqsFromContains(Node x, Node y) |
907 |
|
{ |
908 |
1663 |
NodeManager* nm = NodeManager::currentNM(); |
909 |
3326 |
Node emp = Word::mkEmptyWord(x.getType()); |
910 |
1663 |
Assert(x.getType() == y.getType()); |
911 |
3326 |
TypeNode stype = x.getType(); |
912 |
|
|
913 |
3326 |
Node xLen = nm->mkNode(STRING_LENGTH, x); |
914 |
3326 |
std::vector<Node> yLens; |
915 |
1663 |
if (y.getKind() != STRING_CONCAT) |
916 |
|
{ |
917 |
90 |
yLens.push_back(nm->mkNode(STRING_LENGTH, y)); |
918 |
|
} |
919 |
|
else |
920 |
|
{ |
921 |
5715 |
for (const Node& yi : y) |
922 |
|
{ |
923 |
4142 |
yLens.push_back(nm->mkNode(STRING_LENGTH, yi)); |
924 |
|
} |
925 |
|
} |
926 |
|
|
927 |
3326 |
std::vector<Node> zeroLens; |
928 |
1663 |
if (x == emp) |
929 |
|
{ |
930 |
|
// If x is the empty string, then all ys must be empty, too, and we can |
931 |
|
// skip the expensive checks. Note that this is just a performance |
932 |
|
// optimization. |
933 |
315 |
zeroLens.swap(yLens); |
934 |
|
} |
935 |
|
else |
936 |
|
{ |
937 |
|
// Check if we can infer that str.len(x) <= str.len(y). If that is the |
938 |
|
// case, try to minimize the sum in str.len(x) <= str.len(y1) + ... + |
939 |
|
// str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality |
940 |
|
// true. The terms that can have length zero without making the inequality |
941 |
|
// false must be all be empty if (str.contains x y) is true. |
942 |
1348 |
if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens)) |
943 |
|
{ |
944 |
|
// We could not prove that the inequality holds |
945 |
436 |
return Node::null(); |
946 |
|
} |
947 |
912 |
else if (yLens.size() == y.getNumChildren()) |
948 |
|
{ |
949 |
|
// We could only prove that the inequality holds but not that any of the |
950 |
|
// ys must be empty |
951 |
443 |
return nm->mkNode(EQUAL, x, y); |
952 |
|
} |
953 |
|
} |
954 |
|
|
955 |
784 |
if (y.getKind() != STRING_CONCAT) |
956 |
|
{ |
957 |
8 |
if (zeroLens.size() == 1) |
958 |
|
{ |
959 |
|
// y is not a concatenation and we found that it must be empty, so just |
960 |
|
// return (= y "") |
961 |
|
Assert(zeroLens[0][0] == y); |
962 |
|
return nm->mkNode(EQUAL, y, emp); |
963 |
|
} |
964 |
|
else |
965 |
|
{ |
966 |
8 |
Assert(yLens.size() == 1 && yLens[0][0] == y); |
967 |
8 |
return nm->mkNode(EQUAL, x, y); |
968 |
|
} |
969 |
|
} |
970 |
|
|
971 |
1552 |
std::vector<Node> cs; |
972 |
1309 |
for (const Node& yiLen : yLens) |
973 |
|
{ |
974 |
533 |
Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end()); |
975 |
533 |
cs.push_back(yiLen[0]); |
976 |
|
} |
977 |
|
|
978 |
1552 |
NodeBuilder nb(AND); |
979 |
|
// (= x (str.++ y1' ... ym')) |
980 |
776 |
if (!cs.empty()) |
981 |
|
{ |
982 |
461 |
nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype)); |
983 |
|
} |
984 |
|
// (= y1'' "") ... (= yk'' "") |
985 |
2308 |
for (const Node& zeroLen : zeroLens) |
986 |
|
{ |
987 |
1532 |
Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end()); |
988 |
1532 |
nb << nm->mkNode(EQUAL, zeroLen[0], emp); |
989 |
|
} |
990 |
|
|
991 |
|
// (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) |
992 |
776 |
return nb.constructNode(); |
993 |
|
} |
994 |
|
|
995 |
|
} // namespace strings |
996 |
|
} // namespace theory |
997 |
28191 |
} // namespace cvc5 |