1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Aina Niemetz |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of entailment tests involving strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/strings_entail.h" |
17 |
|
|
18 |
|
#include "expr/node_builder.h" |
19 |
|
#include "theory/rewriter.h" |
20 |
|
#include "theory/strings/arith_entail.h" |
21 |
|
#include "theory/strings/sequences_rewriter.h" |
22 |
|
#include "theory/strings/theory_strings_utils.h" |
23 |
|
#include "theory/strings/word.h" |
24 |
|
#include "util/rational.h" |
25 |
|
#include "util/string.h" |
26 |
|
|
27 |
|
using namespace cvc5::kind; |
28 |
|
|
29 |
|
namespace cvc5 { |
30 |
|
namespace theory { |
31 |
|
namespace strings { |
32 |
|
|
33 |
10231 |
StringsEntail::StringsEntail(SequencesRewriter& rewriter) : d_rewriter(rewriter) |
34 |
|
{ |
35 |
10231 |
} |
36 |
|
|
37 |
735 |
bool StringsEntail::canConstantContainConcat(Node c, |
38 |
|
Node n, |
39 |
|
int& firstc, |
40 |
|
int& lastc) |
41 |
|
{ |
42 |
735 |
Assert(c.isConst()); |
43 |
735 |
Assert(n.getKind() == STRING_CONCAT); |
44 |
|
// must find constant components in order |
45 |
735 |
size_t pos = 0; |
46 |
735 |
firstc = -1; |
47 |
735 |
lastc = -1; |
48 |
2704 |
for (unsigned i = 0; i < n.getNumChildren(); i++) |
49 |
|
{ |
50 |
1996 |
if (n[i].isConst()) |
51 |
|
{ |
52 |
405 |
firstc = firstc == -1 ? i : firstc; |
53 |
405 |
lastc = i; |
54 |
405 |
size_t new_pos = Word::find(c, n[i], pos); |
55 |
405 |
if (new_pos == std::string::npos) |
56 |
|
{ |
57 |
19 |
return false; |
58 |
|
} |
59 |
|
else |
60 |
|
{ |
61 |
386 |
pos = new_pos + Word::getLength(n[i]); |
62 |
|
} |
63 |
|
} |
64 |
1591 |
else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0])) |
65 |
|
{ |
66 |
20 |
Assert(c.getType().isString()); // string-only |
67 |
20 |
const std::vector<unsigned>& tvec = c.getConst<String>().getVec(); |
68 |
|
// find the first occurrence of a digit starting at pos |
69 |
116 |
while (pos < tvec.size() && !String::isDigit(tvec[pos])) |
70 |
|
{ |
71 |
48 |
pos++; |
72 |
|
} |
73 |
20 |
if (pos == tvec.size()) |
74 |
|
{ |
75 |
8 |
return false; |
76 |
|
} |
77 |
|
// must consume at least one digit here |
78 |
12 |
pos++; |
79 |
|
} |
80 |
|
} |
81 |
708 |
return true; |
82 |
|
} |
83 |
|
|
84 |
49046 |
bool StringsEntail::canConstantContainList(Node c, |
85 |
|
std::vector<Node>& l, |
86 |
|
int& firstc, |
87 |
|
int& lastc) |
88 |
|
{ |
89 |
49046 |
Assert(c.isConst()); |
90 |
|
// must find constant components in order |
91 |
49046 |
size_t pos = 0; |
92 |
49046 |
firstc = -1; |
93 |
49046 |
lastc = -1; |
94 |
141911 |
for (unsigned i = 0; i < l.size(); i++) |
95 |
|
{ |
96 |
93386 |
if (l[i].isConst()) |
97 |
|
{ |
98 |
82315 |
firstc = firstc == -1 ? i : firstc; |
99 |
82315 |
lastc = i; |
100 |
82315 |
size_t new_pos = Word::find(c, l[i], pos); |
101 |
82315 |
if (new_pos == std::string::npos) |
102 |
|
{ |
103 |
521 |
return false; |
104 |
|
} |
105 |
|
else |
106 |
|
{ |
107 |
81794 |
pos = new_pos + Word::getLength(l[i]); |
108 |
|
} |
109 |
|
} |
110 |
|
} |
111 |
48525 |
return true; |
112 |
|
} |
113 |
|
|
114 |
99481 |
bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1, |
115 |
|
std::vector<Node>& nr, |
116 |
|
int dir, |
117 |
|
Node& curr, |
118 |
|
bool strict) |
119 |
|
{ |
120 |
99481 |
Assert(dir == 1 || dir == -1); |
121 |
99481 |
Assert(nr.empty()); |
122 |
99481 |
NodeManager* nm = NodeManager::currentNM(); |
123 |
198962 |
Node zero = nm->mkConst(cvc5::Rational(0)); |
124 |
99481 |
bool ret = false; |
125 |
99481 |
bool success = true; |
126 |
99481 |
unsigned sindex = 0; |
127 |
278223 |
while (success && curr != zero && sindex < n1.size()) |
128 |
|
{ |
129 |
89371 |
Assert(!curr.isNull()); |
130 |
89371 |
success = false; |
131 |
89371 |
unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); |
132 |
89371 |
if (n1[sindex_use].isConst()) |
133 |
|
{ |
134 |
|
// could strip part of a constant |
135 |
36502 |
Node lowerBound = ArithEntail::getConstantBound(Rewriter::rewrite(curr)); |
136 |
18251 |
if (!lowerBound.isNull()) |
137 |
|
{ |
138 |
7530 |
Assert(lowerBound.isConst()); |
139 |
15060 |
Rational lbr = lowerBound.getConst<Rational>(); |
140 |
7530 |
if (lbr.sgn() > 0) |
141 |
|
{ |
142 |
5104 |
Assert(ArithEntail::check(curr, true)); |
143 |
10208 |
Node s = n1[sindex_use]; |
144 |
5104 |
size_t slen = Word::getLength(s); |
145 |
10208 |
Node ncl = nm->mkConst(cvc5::Rational(slen)); |
146 |
10208 |
Node next_s = nm->mkNode(MINUS, lowerBound, ncl); |
147 |
5104 |
next_s = Rewriter::rewrite(next_s); |
148 |
5104 |
Assert(next_s.isConst()); |
149 |
|
// we can remove the entire constant |
150 |
5104 |
if (next_s.getConst<Rational>().sgn() >= 0) |
151 |
|
{ |
152 |
3309 |
curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl)); |
153 |
3309 |
success = true; |
154 |
3309 |
sindex++; |
155 |
|
} |
156 |
|
else |
157 |
|
{ |
158 |
|
// we can remove part of the constant |
159 |
|
// lower bound minus the length of a concrete string is negative, |
160 |
|
// hence lowerBound cannot be larger than long max |
161 |
1795 |
Assert(lbr < Rational(String::maxSize())); |
162 |
1795 |
curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound)); |
163 |
1795 |
uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); |
164 |
1795 |
Assert(lbsize < slen); |
165 |
1795 |
if (dir == 1) |
166 |
|
{ |
167 |
|
// strip partially from the front |
168 |
1603 |
nr.push_back(Word::prefix(s, lbsize)); |
169 |
1603 |
n1[sindex_use] = Word::suffix(s, slen - lbsize); |
170 |
|
} |
171 |
|
else |
172 |
|
{ |
173 |
|
// strip partially from the back |
174 |
192 |
nr.push_back(Word::suffix(s, lbsize)); |
175 |
192 |
n1[sindex_use] = Word::prefix(s, slen - lbsize); |
176 |
|
} |
177 |
1795 |
ret = true; |
178 |
|
} |
179 |
5104 |
Assert(ArithEntail::check(curr)); |
180 |
|
} |
181 |
|
else |
182 |
|
{ |
183 |
|
// we cannot remove the constant |
184 |
|
} |
185 |
|
} |
186 |
|
} |
187 |
|
else |
188 |
|
{ |
189 |
|
Node next_s = NodeManager::currentNM()->mkNode( |
190 |
|
MINUS, |
191 |
|
curr, |
192 |
142240 |
NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use])); |
193 |
71120 |
next_s = Rewriter::rewrite(next_s); |
194 |
71120 |
if (ArithEntail::check(next_s)) |
195 |
|
{ |
196 |
1522 |
success = true; |
197 |
1522 |
curr = next_s; |
198 |
1522 |
sindex++; |
199 |
|
} |
200 |
|
} |
201 |
|
} |
202 |
99481 |
if (sindex > 0 && (!strict || curr == zero)) |
203 |
|
{ |
204 |
3993 |
if (dir == 1) |
205 |
|
{ |
206 |
3646 |
nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex); |
207 |
3646 |
n1.erase(n1.begin(), n1.begin() + sindex); |
208 |
|
} |
209 |
|
else |
210 |
|
{ |
211 |
347 |
nr.insert(nr.end(), n1.end() - sindex, n1.end()); |
212 |
347 |
n1.erase(n1.end() - sindex, n1.end()); |
213 |
|
} |
214 |
3993 |
ret = true; |
215 |
|
} |
216 |
198962 |
return ret; |
217 |
|
} |
218 |
|
|
219 |
102905 |
int StringsEntail::componentContains(std::vector<Node>& n1, |
220 |
|
std::vector<Node>& n2, |
221 |
|
std::vector<Node>& nb, |
222 |
|
std::vector<Node>& ne, |
223 |
|
bool computeRemainder, |
224 |
|
int remainderDir) |
225 |
|
{ |
226 |
102905 |
Assert(nb.empty()); |
227 |
102905 |
Assert(ne.empty()); |
228 |
102905 |
Trace("strings-entail") << "Component contains: " << std::endl; |
229 |
102905 |
Trace("strings-entail") << "n1 = " << n1 << std::endl; |
230 |
102905 |
Trace("strings-entail") << "n2 = " << n2 << std::endl; |
231 |
|
// if n2 is a singleton, we can do optimized version here |
232 |
102905 |
if (n2.size() == 1) |
233 |
|
{ |
234 |
202189 |
for (unsigned i = 0; i < n1.size(); i++) |
235 |
|
{ |
236 |
206469 |
Node n1rb; |
237 |
206469 |
Node n1re; |
238 |
104800 |
if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder)) |
239 |
|
{ |
240 |
3131 |
if (computeRemainder) |
241 |
|
{ |
242 |
556 |
n1[i] = n2[0]; |
243 |
556 |
if (remainderDir != -1) |
244 |
|
{ |
245 |
556 |
if (!n1re.isNull()) |
246 |
|
{ |
247 |
53 |
ne.push_back(n1re); |
248 |
|
} |
249 |
556 |
ne.insert(ne.end(), n1.begin() + i + 1, n1.end()); |
250 |
556 |
n1.erase(n1.begin() + i + 1, n1.end()); |
251 |
|
} |
252 |
|
else if (!n1re.isNull()) |
253 |
|
{ |
254 |
|
n1[i] = Rewriter::rewrite( |
255 |
|
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re)); |
256 |
|
} |
257 |
556 |
if (remainderDir != 1) |
258 |
|
{ |
259 |
|
nb.insert(nb.end(), n1.begin(), n1.begin() + i); |
260 |
|
n1.erase(n1.begin(), n1.begin() + i); |
261 |
|
if (!n1rb.isNull()) |
262 |
|
{ |
263 |
|
nb.push_back(n1rb); |
264 |
|
} |
265 |
|
} |
266 |
556 |
else if (!n1rb.isNull()) |
267 |
|
{ |
268 |
106 |
n1[i] = Rewriter::rewrite( |
269 |
212 |
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i])); |
270 |
|
} |
271 |
|
} |
272 |
3131 |
return i; |
273 |
|
} |
274 |
|
} |
275 |
|
} |
276 |
2385 |
else if (n1.size() >= n2.size()) |
277 |
|
{ |
278 |
786 |
unsigned diff = n1.size() - n2.size(); |
279 |
1528 |
for (unsigned i = 0; i <= diff; i++) |
280 |
|
{ |
281 |
1826 |
Node n1rb_first; |
282 |
1826 |
Node n1re_first; |
283 |
|
// first component of n2 must be a suffix |
284 |
2168 |
if (componentContainsBase(n1[i], |
285 |
1084 |
n2[0], |
286 |
|
n1rb_first, |
287 |
|
n1re_first, |
288 |
|
1, |
289 |
1084 |
computeRemainder && remainderDir != 1)) |
290 |
|
{ |
291 |
365 |
Assert(n1re_first.isNull()); |
292 |
610 |
for (unsigned j = 1; j < n2.size(); j++) |
293 |
|
{ |
294 |
|
// are we in the last component? |
295 |
610 |
if (j + 1 == n2.size()) |
296 |
|
{ |
297 |
355 |
Node n1rb_last; |
298 |
355 |
Node n1re_last; |
299 |
|
// last component of n2 must be a prefix |
300 |
710 |
if (componentContainsBase(n1[i + j], |
301 |
355 |
n2[j], |
302 |
|
n1rb_last, |
303 |
|
n1re_last, |
304 |
|
-1, |
305 |
355 |
computeRemainder && remainderDir != -1)) |
306 |
|
{ |
307 |
684 |
Trace("strings-entail-debug") |
308 |
342 |
<< "Last remainder begin is " << n1rb_last << std::endl; |
309 |
684 |
Trace("strings-entail-debug") |
310 |
342 |
<< "Last remainder end is " << n1re_last << std::endl; |
311 |
342 |
Assert(n1rb_last.isNull()); |
312 |
342 |
if (computeRemainder) |
313 |
|
{ |
314 |
172 |
if (remainderDir != -1) |
315 |
|
{ |
316 |
172 |
if (!n1re_last.isNull()) |
317 |
|
{ |
318 |
2 |
ne.push_back(n1re_last); |
319 |
|
} |
320 |
172 |
ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end()); |
321 |
172 |
n1.erase(n1.begin() + i + j + 1, n1.end()); |
322 |
172 |
n1[i + j] = n2[j]; |
323 |
|
} |
324 |
172 |
if (remainderDir != 1) |
325 |
|
{ |
326 |
|
n1[i] = n2[0]; |
327 |
|
nb.insert(nb.end(), n1.begin(), n1.begin() + i); |
328 |
|
n1.erase(n1.begin(), n1.begin() + i); |
329 |
|
if (!n1rb_first.isNull()) |
330 |
|
{ |
331 |
|
nb.push_back(n1rb_first); |
332 |
|
} |
333 |
|
} |
334 |
|
} |
335 |
342 |
Trace("strings-entail-debug") << "ne = " << ne << std::endl; |
336 |
342 |
Trace("strings-entail-debug") << "nb = " << nb << std::endl; |
337 |
342 |
Trace("strings-entail-debug") << "...return " << i << std::endl; |
338 |
342 |
return i; |
339 |
|
} |
340 |
|
else |
341 |
|
{ |
342 |
13 |
break; |
343 |
|
} |
344 |
|
} |
345 |
255 |
else if (n1[i + j] != n2[j]) |
346 |
|
{ |
347 |
10 |
break; |
348 |
|
} |
349 |
|
} |
350 |
|
} |
351 |
|
} |
352 |
|
} |
353 |
99432 |
return -1; |
354 |
|
} |
355 |
|
|
356 |
106239 |
bool StringsEntail::componentContainsBase( |
357 |
|
Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder) |
358 |
|
{ |
359 |
106239 |
Assert(n1rb.isNull()); |
360 |
106239 |
Assert(n1re.isNull()); |
361 |
|
|
362 |
106239 |
NodeManager* nm = NodeManager::currentNM(); |
363 |
|
|
364 |
106239 |
if (n1 == n2) |
365 |
|
{ |
366 |
1525 |
return true; |
367 |
|
} |
368 |
|
else |
369 |
|
{ |
370 |
104714 |
if (n1.isConst() && n2.isConst()) |
371 |
|
{ |
372 |
634 |
size_t len1 = Word::getLength(n1); |
373 |
634 |
size_t len2 = Word::getLength(n2); |
374 |
634 |
if (len2 < len1) |
375 |
|
{ |
376 |
277 |
if (dir == 1) |
377 |
|
{ |
378 |
20 |
if (Word::suffix(n1, len2) == n2) |
379 |
|
{ |
380 |
20 |
if (computeRemainder) |
381 |
|
{ |
382 |
|
n1rb = Word::prefix(n1, len1 - len2); |
383 |
|
} |
384 |
20 |
return true; |
385 |
|
} |
386 |
|
} |
387 |
257 |
else if (dir == -1) |
388 |
|
{ |
389 |
4 |
if (Word::prefix(n1, len2) == n2) |
390 |
|
{ |
391 |
4 |
if (computeRemainder) |
392 |
|
{ |
393 |
|
n1re = Word::suffix(n1, len1 - len2); |
394 |
|
} |
395 |
4 |
return true; |
396 |
|
} |
397 |
|
} |
398 |
|
else |
399 |
|
{ |
400 |
253 |
size_t f = Word::find(n1, n2); |
401 |
253 |
if (f != std::string::npos) |
402 |
|
{ |
403 |
239 |
if (computeRemainder) |
404 |
|
{ |
405 |
139 |
if (f > 0) |
406 |
|
{ |
407 |
102 |
n1rb = Word::prefix(n1, f); |
408 |
|
} |
409 |
139 |
if (len1 > f + len2) |
410 |
|
{ |
411 |
49 |
n1re = Word::suffix(n1, len1 - (f + len2)); |
412 |
|
} |
413 |
|
} |
414 |
239 |
return true; |
415 |
|
} |
416 |
|
} |
417 |
|
} |
418 |
|
} |
419 |
|
else |
420 |
|
{ |
421 |
|
// cases for: |
422 |
|
// n1 = x containing n2 = substr( x, n2[1], n2[2] ) |
423 |
104080 |
if (n2.getKind() == STRING_SUBSTR) |
424 |
|
{ |
425 |
53024 |
if (n2[0] == n1) |
426 |
|
{ |
427 |
826 |
bool success = true; |
428 |
826 |
Node start_pos = n2[1]; |
429 |
826 |
Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]); |
430 |
826 |
Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]); |
431 |
826 |
if (dir == 1) |
432 |
|
{ |
433 |
|
// To be a suffix, start + length must be greater than |
434 |
|
// or equal to the length of the string. |
435 |
4 |
success = ArithEntail::check(end_pos, len_n2s); |
436 |
|
} |
437 |
822 |
else if (dir == -1) |
438 |
|
{ |
439 |
|
// To be a prefix, must literally start at 0, since |
440 |
|
// if we knew it started at <0, it should be rewritten to "", |
441 |
|
// if we knew it started at 0, then n2[1] should be rewritten to |
442 |
|
// 0. |
443 |
16 |
success = start_pos.isConst() |
444 |
8 |
&& start_pos.getConst<Rational>().sgn() == 0; |
445 |
|
} |
446 |
826 |
if (success) |
447 |
|
{ |
448 |
826 |
if (computeRemainder) |
449 |
|
{ |
450 |
|
// we can only compute the remainder if start_pos and end_pos |
451 |
|
// are known to be non-negative. |
452 |
54 |
if (!ArithEntail::check(start_pos) |
453 |
54 |
|| !ArithEntail::check(end_pos)) |
454 |
|
{ |
455 |
12 |
return false; |
456 |
|
} |
457 |
6 |
if (dir != -1) |
458 |
|
{ |
459 |
12 |
n1rb = nm->mkNode( |
460 |
8 |
STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos); |
461 |
|
} |
462 |
6 |
if (dir != 1) |
463 |
|
{ |
464 |
6 |
n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s); |
465 |
|
} |
466 |
|
} |
467 |
814 |
return true; |
468 |
|
} |
469 |
|
} |
470 |
|
} |
471 |
|
|
472 |
103254 |
if (!computeRemainder && dir == 0) |
473 |
|
{ |
474 |
101467 |
if (n1.getKind() == STRING_REPLACE) |
475 |
|
{ |
476 |
|
// (str.contains (str.replace x y z) w) ---> true |
477 |
|
// if (str.contains x w) --> true and (str.contains z w) ---> true |
478 |
3376 |
Node xCtnW = checkContains(n1[0], n2); |
479 |
2306 |
if (!xCtnW.isNull() && xCtnW.getConst<bool>()) |
480 |
|
{ |
481 |
1782 |
Node zCtnW = checkContains(n1[2], n2); |
482 |
1509 |
if (!zCtnW.isNull() && zCtnW.getConst<bool>()) |
483 |
|
{ |
484 |
1236 |
return true; |
485 |
|
} |
486 |
|
} |
487 |
|
} |
488 |
|
} |
489 |
|
} |
490 |
|
} |
491 |
102389 |
return false; |
492 |
|
} |
493 |
|
|
494 |
103345 |
bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, |
495 |
|
std::vector<Node>& n2, |
496 |
|
std::vector<Node>& nb, |
497 |
|
std::vector<Node>& ne, |
498 |
|
int dir) |
499 |
|
{ |
500 |
103345 |
Assert(nb.empty()); |
501 |
103345 |
Assert(ne.empty()); |
502 |
103345 |
bool changed = false; |
503 |
|
// for ( forwards, backwards ) direction |
504 |
238965 |
for (unsigned r = 0; r < 2; r++) |
505 |
|
{ |
506 |
171168 |
if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1)) |
507 |
|
{ |
508 |
167642 |
unsigned index0 = r == 0 ? 0 : n1.size() - 1; |
509 |
167642 |
unsigned index1 = r == 0 ? 0 : n2.size() - 1; |
510 |
167642 |
bool removeComponent = false; |
511 |
299736 |
Node n1cmp = n1[index0]; |
512 |
|
|
513 |
167642 |
if (n1cmp.isConst() && Word::isEmpty(n1cmp)) |
514 |
|
{ |
515 |
35480 |
return false; |
516 |
|
} |
517 |
|
|
518 |
264256 |
std::vector<Node> sss; |
519 |
264256 |
std::vector<Node> sls; |
520 |
132162 |
n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls); |
521 |
264324 |
Trace("strings-rewrite-debug2") |
522 |
132162 |
<< "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1] |
523 |
132162 |
<< ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl; |
524 |
132162 |
if (n1cmp.isConst()) |
525 |
|
{ |
526 |
122340 |
Node s = n1cmp; |
527 |
61170 |
size_t slen = Word::getLength(s); |
528 |
|
// overlap is an overapproximation of the number of characters |
529 |
|
// n2[index1] can match in s |
530 |
61170 |
unsigned overlap = Word::getLength(s); |
531 |
61170 |
if (n2[index1].isConst()) |
532 |
|
{ |
533 |
20012 |
Node t = n2[index1]; |
534 |
10006 |
std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t); |
535 |
10006 |
if (ret == std::string::npos) |
536 |
|
{ |
537 |
395 |
if (n1.size() == 1) |
538 |
|
{ |
539 |
|
// can remove everything |
540 |
|
// e.g. str.contains( "abc", str.++( "ba", x ) ) --> |
541 |
|
// str.contains( "", str.++( "ba", x ) ) |
542 |
|
// or std.contains( str.substr( "abc", x, y ), "d" ) ---> |
543 |
|
// str.contains( "", "d" ) |
544 |
14 |
removeComponent = true; |
545 |
|
} |
546 |
381 |
else if (sss.empty()) // only if not substr |
547 |
|
{ |
548 |
|
// check how much overlap there is |
549 |
|
// This is used to partially strip off the endpoint |
550 |
|
// e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) --> |
551 |
|
// str.contains( str.++( "c", x ), str.++( "cd", y ) ) |
552 |
369 |
overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s); |
553 |
|
} |
554 |
|
// note that we cannot process substring here, since t may |
555 |
|
// match only part of s. Consider: |
556 |
|
// (str.++ "C" (str.substr "AB" x y)), "CB" |
557 |
|
// where "AB" and "CB" have no overlap, but "C" is not part of what |
558 |
|
// is matched with "AB". |
559 |
|
} |
560 |
9611 |
else if (sss.empty()) // only if not substr |
561 |
|
{ |
562 |
659 |
Assert(ret < slen); |
563 |
|
// can strip off up to the find position, e.g. |
564 |
|
// str.contains( str.++( "abc", x ), str.++( "b", y ) ) --> |
565 |
|
// str.contains( str.++( "bc", x ), str.++( "b", y ) ), |
566 |
|
// and |
567 |
|
// str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) --> |
568 |
|
// str.contains( str.++( x, "abb" ), str.++( y, "b" ) ) |
569 |
659 |
overlap = slen - ret; |
570 |
|
} |
571 |
|
} |
572 |
|
else |
573 |
|
{ |
574 |
|
// inconclusive |
575 |
|
} |
576 |
61170 |
Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl; |
577 |
|
// process the overlap |
578 |
61170 |
if (overlap < slen) |
579 |
|
{ |
580 |
153 |
changed = true; |
581 |
153 |
if (overlap == 0) |
582 |
|
{ |
583 |
122 |
removeComponent = true; |
584 |
|
} |
585 |
|
else |
586 |
|
{ |
587 |
|
// can drop the prefix (resp. suffix) from the first (resp. last) |
588 |
|
// component |
589 |
31 |
if (r == 0) |
590 |
|
{ |
591 |
15 |
nb.push_back(Word::prefix(s, slen - overlap)); |
592 |
15 |
n1[index0] = Word::suffix(s, overlap); |
593 |
|
} |
594 |
|
else |
595 |
|
{ |
596 |
16 |
ne.push_back(Word::suffix(s, slen - overlap)); |
597 |
16 |
n1[index0] = Word::prefix(s, overlap); |
598 |
|
} |
599 |
|
} |
600 |
|
} |
601 |
|
} |
602 |
70992 |
else if (n1cmp.getKind() == STRING_ITOS) |
603 |
|
{ |
604 |
300 |
if (n2[index1].isConst()) |
605 |
|
{ |
606 |
228 |
Assert(n2[index1].getType().isString()); // string-only |
607 |
456 |
cvc5::String t = n2[index1].getConst<String>(); |
608 |
228 |
if (n1.size() == 1) |
609 |
|
{ |
610 |
|
// if n1.size()==1, then if n2[index1] is not a number, we can drop |
611 |
|
// the entire component |
612 |
|
// e.g. str.contains( int.to.str(x), "123a45") --> false |
613 |
218 |
if (!t.isNumber()) |
614 |
|
{ |
615 |
54 |
removeComponent = true; |
616 |
|
} |
617 |
|
} |
618 |
|
else |
619 |
|
{ |
620 |
10 |
const std::vector<unsigned>& tvec = t.getVec(); |
621 |
10 |
Assert(tvec.size() > 0); |
622 |
|
|
623 |
|
// if n1.size()>1, then if the first (resp. last) character of |
624 |
|
// n2[index1] |
625 |
|
// is not a digit, we can drop the entire component, e.g.: |
626 |
|
// str.contains( str.++( int.to.str(x), y ), "a12") --> |
627 |
|
// str.contains( y, "a12" ) |
628 |
|
// str.contains( str.++( y, int.to.str(x) ), "a0b") --> |
629 |
|
// str.contains( y, "a0b" ) |
630 |
10 |
unsigned i = r == 0 ? 0 : (tvec.size() - 1); |
631 |
10 |
if (!String::isDigit(tvec[i])) |
632 |
|
{ |
633 |
8 |
removeComponent = true; |
634 |
|
} |
635 |
|
} |
636 |
|
} |
637 |
|
} |
638 |
132162 |
if (removeComponent) |
639 |
|
{ |
640 |
198 |
Trace("strings-rewrite-debug2") << "...remove component" << std::endl; |
641 |
|
// can drop entire first (resp. last) component |
642 |
198 |
if (r == 0) |
643 |
|
{ |
644 |
137 |
nb.push_back(n1[index0]); |
645 |
137 |
n1.erase(n1.begin(), n1.begin() + 1); |
646 |
|
} |
647 |
|
else |
648 |
|
{ |
649 |
61 |
ne.push_back(n1[index0]); |
650 |
61 |
n1.pop_back(); |
651 |
|
} |
652 |
198 |
if (n1.empty()) |
653 |
|
{ |
654 |
|
// if we've removed everything, just return (we will rewrite to false) |
655 |
68 |
return true; |
656 |
|
} |
657 |
|
else |
658 |
|
{ |
659 |
130 |
changed = true; |
660 |
|
} |
661 |
|
} |
662 |
|
} |
663 |
|
} |
664 |
|
// TODO (#1180) : computing the maximal overlap in this function may be |
665 |
|
// important. |
666 |
|
// str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) ---> |
667 |
|
// false |
668 |
|
// ...since str.to.int(x) can contain at most 1 character from "2aaaa", |
669 |
|
// leaving 4 characters |
670 |
|
// which is larger that the upper bound for length of str.substr(y,0,3), |
671 |
|
// which is 3. |
672 |
67797 |
return changed; |
673 |
|
} |
674 |
|
|
675 |
15218 |
Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter) |
676 |
|
{ |
677 |
15218 |
NodeManager* nm = NodeManager::currentNM(); |
678 |
30436 |
Node ctn = nm->mkNode(STRING_CONTAINS, a, b); |
679 |
|
|
680 |
15218 |
if (fullRewriter) |
681 |
|
{ |
682 |
15218 |
ctn = Rewriter::rewrite(ctn); |
683 |
|
} |
684 |
|
else |
685 |
|
{ |
686 |
|
Node prev; |
687 |
|
do |
688 |
|
{ |
689 |
|
prev = ctn; |
690 |
|
ctn = d_rewriter.rewriteContains(ctn); |
691 |
|
if (ctn != prev) |
692 |
|
{ |
693 |
|
ctn = d_rewriter.postProcessRewrite(prev, ctn); |
694 |
|
} |
695 |
|
} while (prev != ctn && ctn.getKind() == STRING_CONTAINS); |
696 |
|
} |
697 |
|
|
698 |
15218 |
Assert(ctn.getType().isBoolean()); |
699 |
30436 |
return ctn.isConst() ? ctn : Node::null(); |
700 |
|
} |
701 |
|
|
702 |
35644 |
bool StringsEntail::checkNonEmpty(Node a) |
703 |
|
{ |
704 |
71288 |
Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a); |
705 |
35644 |
len = Rewriter::rewrite(len); |
706 |
71288 |
return ArithEntail::check(len, true); |
707 |
|
} |
708 |
|
|
709 |
59295 |
bool StringsEntail::checkLengthOne(Node s, bool strict) |
710 |
|
{ |
711 |
59295 |
NodeManager* nm = NodeManager::currentNM(); |
712 |
118590 |
Node one = nm->mkConst(Rational(1)); |
713 |
118590 |
Node len = nm->mkNode(STRING_LENGTH, s); |
714 |
59295 |
len = Rewriter::rewrite(len); |
715 |
237180 |
return ArithEntail::check(one, len) |
716 |
296475 |
&& (!strict || ArithEntail::check(len, true)); |
717 |
|
} |
718 |
|
|
719 |
98105 |
bool StringsEntail::checkMultisetSubset(Node a, Node b) |
720 |
|
{ |
721 |
196210 |
std::vector<Node> avec; |
722 |
98105 |
utils::getConcat(getMultisetApproximation(a), avec); |
723 |
196210 |
std::vector<Node> bvec; |
724 |
98105 |
utils::getConcat(b, bvec); |
725 |
|
|
726 |
196210 |
std::map<Node, unsigned> num_nconst[2]; |
727 |
196210 |
std::map<Node, unsigned> num_const[2]; |
728 |
294315 |
for (unsigned j = 0; j < 2; j++) |
729 |
|
{ |
730 |
196210 |
std::vector<Node>& jvec = j == 0 ? avec : bvec; |
731 |
411202 |
for (const Node& cc : jvec) |
732 |
|
{ |
733 |
214992 |
if (cc.isConst()) |
734 |
|
{ |
735 |
107601 |
num_const[j][cc]++; |
736 |
|
} |
737 |
|
else |
738 |
|
{ |
739 |
107391 |
num_nconst[j][cc]++; |
740 |
|
} |
741 |
|
} |
742 |
|
} |
743 |
98105 |
bool ms_success = true; |
744 |
99204 |
for (std::pair<const Node, unsigned>& nncp : num_nconst[0]) |
745 |
|
{ |
746 |
34191 |
if (nncp.second > num_nconst[1][nncp.first]) |
747 |
|
{ |
748 |
33092 |
ms_success = false; |
749 |
33092 |
break; |
750 |
|
} |
751 |
|
} |
752 |
98105 |
if (ms_success) |
753 |
|
{ |
754 |
|
// count the number of constant characters in the first argument |
755 |
195039 |
std::map<Node, unsigned> count_const[2]; |
756 |
130006 |
std::vector<Node> chars; |
757 |
195039 |
for (unsigned j = 0; j < 2; j++) |
758 |
|
{ |
759 |
200031 |
for (std::pair<const Node, unsigned>& ncp : num_const[j]) |
760 |
|
{ |
761 |
140010 |
Node cn = ncp.first; |
762 |
70005 |
Assert(cn.isConst()); |
763 |
140010 |
std::vector<Node> cnChars = Word::getChars(cn); |
764 |
167583 |
for (const Node& ch : cnChars) |
765 |
|
{ |
766 |
97578 |
count_const[j][ch] += ncp.second; |
767 |
97578 |
if (std::find(chars.begin(), chars.end(), ch) == chars.end()) |
768 |
|
{ |
769 |
70539 |
chars.push_back(ch); |
770 |
|
} |
771 |
|
} |
772 |
|
} |
773 |
|
} |
774 |
130026 |
Trace("strings-entail-ms-ss") |
775 |
65013 |
<< "For " << a << " and " << b << " : " << std::endl; |
776 |
135532 |
for (const Node& ch : chars) |
777 |
|
{ |
778 |
70539 |
Trace("strings-entail-ms-ss") << " # occurrences of substring "; |
779 |
70539 |
Trace("strings-entail-ms-ss") << ch << " in arguments is "; |
780 |
141078 |
Trace("strings-entail-ms-ss") |
781 |
70539 |
<< count_const[0][ch] << " / " << count_const[1][ch] << std::endl; |
782 |
70539 |
if (count_const[0][ch] < count_const[1][ch]) |
783 |
|
{ |
784 |
20 |
return true; |
785 |
|
} |
786 |
|
} |
787 |
|
|
788 |
|
// TODO (#1180): count the number of 2,3,4,.. character substrings |
789 |
|
// for example: |
790 |
|
// str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false |
791 |
|
// since the second argument contains more occurrences of "bb". |
792 |
|
// note this is orthogonal reasoning to inductive reasoning |
793 |
|
// via regular membership reduction in Liang et al CAV 2015. |
794 |
|
} |
795 |
98085 |
return false; |
796 |
|
} |
797 |
|
|
798 |
128778 |
Node StringsEntail::checkHomogeneousString(Node a) |
799 |
|
{ |
800 |
257556 |
std::vector<Node> avec; |
801 |
128778 |
utils::getConcat(getMultisetApproximation(a), avec); |
802 |
|
|
803 |
128778 |
bool cValid = false; |
804 |
257556 |
Node c; |
805 |
194913 |
for (const Node& ac : avec) |
806 |
|
{ |
807 |
134068 |
if (ac.isConst()) |
808 |
|
{ |
809 |
146878 |
std::vector<Node> acv = Word::getChars(ac); |
810 |
130124 |
for (const Node& cc : acv) |
811 |
|
{ |
812 |
63989 |
if (!cValid) |
813 |
|
{ |
814 |
44583 |
cValid = true; |
815 |
44583 |
c = cc; |
816 |
|
} |
817 |
19406 |
else if (c != cc) |
818 |
|
{ |
819 |
|
// Found a different character |
820 |
14608 |
return Node::null(); |
821 |
|
} |
822 |
|
} |
823 |
|
} |
824 |
|
else |
825 |
|
{ |
826 |
|
// Could produce a different character |
827 |
53325 |
return Node::null(); |
828 |
|
} |
829 |
|
} |
830 |
|
|
831 |
60845 |
if (!cValid) |
832 |
|
{ |
833 |
35275 |
return Word::mkEmptyWord(a.getType()); |
834 |
|
} |
835 |
|
|
836 |
25570 |
return c; |
837 |
|
} |
838 |
|
|
839 |
251776 |
Node StringsEntail::getMultisetApproximation(Node a) |
840 |
|
{ |
841 |
251776 |
NodeManager* nm = NodeManager::currentNM(); |
842 |
251776 |
if (a.getKind() == STRING_SUBSTR) |
843 |
|
{ |
844 |
83085 |
return a[0]; |
845 |
|
} |
846 |
168691 |
else if (a.getKind() == STRING_REPLACE) |
847 |
|
{ |
848 |
5163 |
return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2])); |
849 |
|
} |
850 |
163528 |
else if (a.getKind() == STRING_CONCAT) |
851 |
|
{ |
852 |
17710 |
NodeBuilder nb(STRING_CONCAT); |
853 |
28585 |
for (const Node& ac : a) |
854 |
|
{ |
855 |
19730 |
nb << getMultisetApproximation(ac); |
856 |
|
} |
857 |
8855 |
return nb.constructNode(); |
858 |
|
} |
859 |
|
else |
860 |
|
{ |
861 |
154673 |
return a; |
862 |
|
} |
863 |
|
} |
864 |
|
|
865 |
186526 |
Node StringsEntail::getStringOrEmpty(Node n) |
866 |
|
{ |
867 |
186526 |
Node res; |
868 |
559614 |
while (res.isNull()) |
869 |
|
{ |
870 |
186544 |
switch (n.getKind()) |
871 |
|
{ |
872 |
1778 |
case STRING_REPLACE: |
873 |
|
{ |
874 |
1778 |
if (Word::isEmpty(n[0])) |
875 |
|
{ |
876 |
|
// (str.replace "" x y) --> y |
877 |
18 |
n = n[2]; |
878 |
18 |
break; |
879 |
|
} |
880 |
|
|
881 |
1760 |
if (checkLengthOne(n[0]) && Word::isEmpty(n[2])) |
882 |
|
{ |
883 |
|
// (str.replace "A" x "") --> "A" |
884 |
60 |
res = n[0]; |
885 |
60 |
break; |
886 |
|
} |
887 |
|
|
888 |
1700 |
res = n; |
889 |
1700 |
break; |
890 |
|
} |
891 |
13039 |
case STRING_SUBSTR: |
892 |
|
{ |
893 |
13039 |
if (checkLengthOne(n[0])) |
894 |
|
{ |
895 |
|
// (str.substr "A" x y) --> "A" |
896 |
611 |
res = n[0]; |
897 |
611 |
break; |
898 |
|
} |
899 |
12428 |
res = n; |
900 |
12428 |
break; |
901 |
|
} |
902 |
171727 |
default: |
903 |
|
{ |
904 |
171727 |
res = n; |
905 |
171727 |
break; |
906 |
|
} |
907 |
|
} |
908 |
|
} |
909 |
186526 |
return res; |
910 |
|
} |
911 |
|
|
912 |
1019 |
Node StringsEntail::inferEqsFromContains(Node x, Node y) |
913 |
|
{ |
914 |
1019 |
NodeManager* nm = NodeManager::currentNM(); |
915 |
2038 |
Node emp = Word::mkEmptyWord(x.getType()); |
916 |
1019 |
Assert(x.getType() == y.getType()); |
917 |
2038 |
TypeNode stype = x.getType(); |
918 |
|
|
919 |
2038 |
Node xLen = nm->mkNode(STRING_LENGTH, x); |
920 |
2038 |
std::vector<Node> yLens; |
921 |
1019 |
if (y.getKind() != STRING_CONCAT) |
922 |
|
{ |
923 |
124 |
yLens.push_back(nm->mkNode(STRING_LENGTH, y)); |
924 |
|
} |
925 |
|
else |
926 |
|
{ |
927 |
3329 |
for (const Node& yi : y) |
928 |
|
{ |
929 |
2434 |
yLens.push_back(nm->mkNode(STRING_LENGTH, yi)); |
930 |
|
} |
931 |
|
} |
932 |
|
|
933 |
2038 |
std::vector<Node> zeroLens; |
934 |
1019 |
if (x == emp) |
935 |
|
{ |
936 |
|
// If x is the empty string, then all ys must be empty, too, and we can |
937 |
|
// skip the expensive checks. Note that this is just a performance |
938 |
|
// optimization. |
939 |
93 |
zeroLens.swap(yLens); |
940 |
|
} |
941 |
|
else |
942 |
|
{ |
943 |
|
// Check if we can infer that str.len(x) <= str.len(y). If that is the |
944 |
|
// case, try to minimize the sum in str.len(x) <= str.len(y1) + ... + |
945 |
|
// str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality |
946 |
|
// true. The terms that can have length zero without making the inequality |
947 |
|
// false must be all be empty if (str.contains x y) is true. |
948 |
926 |
if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens)) |
949 |
|
{ |
950 |
|
// We could not prove that the inequality holds |
951 |
824 |
return Node::null(); |
952 |
|
} |
953 |
102 |
else if (yLens.size() == y.getNumChildren()) |
954 |
|
{ |
955 |
|
// We could only prove that the inequality holds but not that any of the |
956 |
|
// ys must be empty |
957 |
49 |
return nm->mkNode(EQUAL, x, y); |
958 |
|
} |
959 |
|
} |
960 |
|
|
961 |
146 |
if (y.getKind() != STRING_CONCAT) |
962 |
|
{ |
963 |
20 |
if (zeroLens.size() == 1) |
964 |
|
{ |
965 |
|
// y is not a concatenation and we found that it must be empty, so just |
966 |
|
// return (= y "") |
967 |
|
Assert(zeroLens[0][0] == y); |
968 |
|
return nm->mkNode(EQUAL, y, emp); |
969 |
|
} |
970 |
|
else |
971 |
|
{ |
972 |
20 |
Assert(yLens.size() == 1 && yLens[0][0] == y); |
973 |
20 |
return nm->mkNode(EQUAL, x, y); |
974 |
|
} |
975 |
|
} |
976 |
|
|
977 |
252 |
std::vector<Node> cs; |
978 |
169 |
for (const Node& yiLen : yLens) |
979 |
|
{ |
980 |
43 |
Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end()); |
981 |
43 |
cs.push_back(yiLen[0]); |
982 |
|
} |
983 |
|
|
984 |
252 |
NodeBuilder nb(AND); |
985 |
|
// (= x (str.++ y1' ... ym')) |
986 |
126 |
if (!cs.empty()) |
987 |
|
{ |
988 |
33 |
nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype)); |
989 |
|
} |
990 |
|
// (= y1'' "") ... (= yk'' "") |
991 |
384 |
for (const Node& zeroLen : zeroLens) |
992 |
|
{ |
993 |
258 |
Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end()); |
994 |
258 |
nb << nm->mkNode(EQUAL, zeroLen[0], emp); |
995 |
|
} |
996 |
|
|
997 |
|
// (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) |
998 |
126 |
return nb.constructNode(); |
999 |
|
} |
1000 |
|
|
1001 |
|
} // namespace strings |
1002 |
|
} // namespace theory |
1003 |
29502 |
} // namespace cvc5 |