1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Aina Niemetz |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of entailment tests involving strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/strings_entail.h" |
17 |
|
|
18 |
|
#include "expr/node_builder.h" |
19 |
|
#include "theory/rewriter.h" |
20 |
|
#include "theory/strings/arith_entail.h" |
21 |
|
#include "theory/strings/sequences_rewriter.h" |
22 |
|
#include "theory/strings/theory_strings_utils.h" |
23 |
|
#include "theory/strings/word.h" |
24 |
|
#include "util/rational.h" |
25 |
|
#include "util/string.h" |
26 |
|
|
27 |
|
using namespace cvc5::kind; |
28 |
|
|
29 |
|
namespace cvc5 { |
30 |
|
namespace theory { |
31 |
|
namespace strings { |
32 |
|
|
33 |
6535 |
StringsEntail::StringsEntail(Rewriter* r, |
34 |
|
ArithEntail& aent, |
35 |
6535 |
SequencesRewriter& rewriter) |
36 |
6535 |
: d_rr(r), d_arithEntail(aent), d_rewriter(rewriter) |
37 |
|
{ |
38 |
6535 |
} |
39 |
|
|
40 |
600 |
bool StringsEntail::canConstantContainConcat(Node c, |
41 |
|
Node n, |
42 |
|
int& firstc, |
43 |
|
int& lastc) |
44 |
|
{ |
45 |
600 |
Assert(c.isConst()); |
46 |
600 |
Assert(n.getKind() == STRING_CONCAT); |
47 |
|
// must find constant components in order |
48 |
600 |
size_t pos = 0; |
49 |
600 |
firstc = -1; |
50 |
600 |
lastc = -1; |
51 |
2219 |
for (unsigned i = 0; i < n.getNumChildren(); i++) |
52 |
|
{ |
53 |
1636 |
if (n[i].isConst()) |
54 |
|
{ |
55 |
318 |
firstc = firstc == -1 ? i : firstc; |
56 |
318 |
lastc = i; |
57 |
318 |
size_t new_pos = Word::find(c, n[i], pos); |
58 |
318 |
if (new_pos == std::string::npos) |
59 |
|
{ |
60 |
15 |
return false; |
61 |
|
} |
62 |
|
else |
63 |
|
{ |
64 |
303 |
pos = new_pos + Word::getLength(n[i]); |
65 |
|
} |
66 |
|
} |
67 |
1318 |
else if (n[i].getKind() == STRING_ITOS && d_arithEntail.check(n[i][0])) |
68 |
|
{ |
69 |
5 |
Assert(c.getType().isString()); // string-only |
70 |
5 |
const std::vector<unsigned>& tvec = c.getConst<String>().getVec(); |
71 |
|
// find the first occurrence of a digit starting at pos |
72 |
29 |
while (pos < tvec.size() && !String::isDigit(tvec[pos])) |
73 |
|
{ |
74 |
12 |
pos++; |
75 |
|
} |
76 |
5 |
if (pos == tvec.size()) |
77 |
|
{ |
78 |
2 |
return false; |
79 |
|
} |
80 |
|
// must consume at least one digit here |
81 |
3 |
pos++; |
82 |
|
} |
83 |
|
} |
84 |
583 |
return true; |
85 |
|
} |
86 |
|
|
87 |
34323 |
bool StringsEntail::canConstantContainList(Node c, |
88 |
|
std::vector<Node>& l, |
89 |
|
int& firstc, |
90 |
|
int& lastc) |
91 |
|
{ |
92 |
34323 |
Assert(c.isConst()); |
93 |
|
// must find constant components in order |
94 |
34323 |
size_t pos = 0; |
95 |
34323 |
firstc = -1; |
96 |
34323 |
lastc = -1; |
97 |
99660 |
for (unsigned i = 0; i < l.size(); i++) |
98 |
|
{ |
99 |
65656 |
if (l[i].isConst()) |
100 |
|
{ |
101 |
57017 |
firstc = firstc == -1 ? i : firstc; |
102 |
57017 |
lastc = i; |
103 |
57017 |
size_t new_pos = Word::find(c, l[i], pos); |
104 |
57017 |
if (new_pos == std::string::npos) |
105 |
|
{ |
106 |
319 |
return false; |
107 |
|
} |
108 |
|
else |
109 |
|
{ |
110 |
56698 |
pos = new_pos + Word::getLength(l[i]); |
111 |
|
} |
112 |
|
} |
113 |
|
} |
114 |
34004 |
return true; |
115 |
|
} |
116 |
|
|
117 |
67666 |
bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1, |
118 |
|
std::vector<Node>& nr, |
119 |
|
int dir, |
120 |
|
Node& curr, |
121 |
|
bool strict) |
122 |
|
{ |
123 |
67666 |
Assert(dir == 1 || dir == -1); |
124 |
67666 |
Assert(nr.empty()); |
125 |
67666 |
NodeManager* nm = NodeManager::currentNM(); |
126 |
135332 |
Node zero = nm->mkConst(cvc5::Rational(0)); |
127 |
67666 |
bool ret = false; |
128 |
67666 |
bool success = true; |
129 |
67666 |
unsigned sindex = 0; |
130 |
193120 |
while (success && curr != zero && sindex < n1.size()) |
131 |
|
{ |
132 |
62727 |
Assert(!curr.isNull()); |
133 |
62727 |
success = false; |
134 |
62727 |
unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex); |
135 |
62727 |
if (n1[sindex_use].isConst()) |
136 |
|
{ |
137 |
|
// could strip part of a constant |
138 |
29124 |
Node lowerBound = d_arithEntail.getConstantBound(d_rr->rewrite(curr)); |
139 |
14562 |
if (!lowerBound.isNull()) |
140 |
|
{ |
141 |
6131 |
Assert(lowerBound.isConst()); |
142 |
12262 |
Rational lbr = lowerBound.getConst<Rational>(); |
143 |
6131 |
if (lbr.sgn() > 0) |
144 |
|
{ |
145 |
4453 |
Assert(d_arithEntail.check(curr, true)); |
146 |
8906 |
Node s = n1[sindex_use]; |
147 |
4453 |
size_t slen = Word::getLength(s); |
148 |
8906 |
Node ncl = nm->mkConst(cvc5::Rational(slen)); |
149 |
8906 |
Node next_s = nm->mkNode(MINUS, lowerBound, ncl); |
150 |
4453 |
next_s = d_rr->rewrite(next_s); |
151 |
4453 |
Assert(next_s.isConst()); |
152 |
|
// we can remove the entire constant |
153 |
4453 |
if (next_s.getConst<Rational>().sgn() >= 0) |
154 |
|
{ |
155 |
2811 |
curr = d_rr->rewrite(nm->mkNode(MINUS, curr, ncl)); |
156 |
2811 |
success = true; |
157 |
2811 |
sindex++; |
158 |
|
} |
159 |
|
else |
160 |
|
{ |
161 |
|
// we can remove part of the constant |
162 |
|
// lower bound minus the length of a concrete string is negative, |
163 |
|
// hence lowerBound cannot be larger than long max |
164 |
1642 |
Assert(lbr < Rational(String::maxSize())); |
165 |
1642 |
curr = d_rr->rewrite(nm->mkNode(MINUS, curr, lowerBound)); |
166 |
1642 |
uint32_t lbsize = lbr.getNumerator().toUnsignedInt(); |
167 |
1642 |
Assert(lbsize < slen); |
168 |
1642 |
if (dir == 1) |
169 |
|
{ |
170 |
|
// strip partially from the front |
171 |
1527 |
nr.push_back(Word::prefix(s, lbsize)); |
172 |
1527 |
n1[sindex_use] = Word::suffix(s, slen - lbsize); |
173 |
|
} |
174 |
|
else |
175 |
|
{ |
176 |
|
// strip partially from the back |
177 |
115 |
nr.push_back(Word::suffix(s, lbsize)); |
178 |
115 |
n1[sindex_use] = Word::prefix(s, slen - lbsize); |
179 |
|
} |
180 |
1642 |
ret = true; |
181 |
|
} |
182 |
4453 |
Assert(d_arithEntail.check(curr)); |
183 |
|
} |
184 |
|
else |
185 |
|
{ |
186 |
|
// we cannot remove the constant |
187 |
|
} |
188 |
|
} |
189 |
|
} |
190 |
|
else |
191 |
|
{ |
192 |
|
Node next_s = NodeManager::currentNM()->mkNode( |
193 |
|
MINUS, |
194 |
|
curr, |
195 |
96330 |
NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use])); |
196 |
48165 |
next_s = d_rr->rewrite(next_s); |
197 |
48165 |
if (d_arithEntail.check(next_s)) |
198 |
|
{ |
199 |
1321 |
success = true; |
200 |
1321 |
curr = next_s; |
201 |
1321 |
sindex++; |
202 |
|
} |
203 |
|
} |
204 |
|
} |
205 |
67666 |
if (sindex > 0 && (!strict || curr == zero)) |
206 |
|
{ |
207 |
3366 |
if (dir == 1) |
208 |
|
{ |
209 |
3064 |
nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex); |
210 |
3064 |
n1.erase(n1.begin(), n1.begin() + sindex); |
211 |
|
} |
212 |
|
else |
213 |
|
{ |
214 |
302 |
nr.insert(nr.end(), n1.end() - sindex, n1.end()); |
215 |
302 |
n1.erase(n1.end() - sindex, n1.end()); |
216 |
|
} |
217 |
3366 |
ret = true; |
218 |
|
} |
219 |
135332 |
return ret; |
220 |
|
} |
221 |
|
|
222 |
78310 |
int StringsEntail::componentContains(std::vector<Node>& n1, |
223 |
|
std::vector<Node>& n2, |
224 |
|
std::vector<Node>& nb, |
225 |
|
std::vector<Node>& ne, |
226 |
|
bool computeRemainder, |
227 |
|
int remainderDir) |
228 |
|
{ |
229 |
78310 |
Assert(nb.empty()); |
230 |
78310 |
Assert(ne.empty()); |
231 |
78310 |
Trace("strings-entail") << "Component contains: " << std::endl; |
232 |
78310 |
Trace("strings-entail") << "n1 = " << n1 << std::endl; |
233 |
78310 |
Trace("strings-entail") << "n2 = " << n2 << std::endl; |
234 |
|
// if n2 is a singleton, we can do optimized version here |
235 |
78310 |
if (n2.size() == 1) |
236 |
|
{ |
237 |
154975 |
for (unsigned i = 0; i < n1.size(); i++) |
238 |
|
{ |
239 |
157708 |
Node n1rb; |
240 |
157708 |
Node n1re; |
241 |
79685 |
if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder)) |
242 |
|
{ |
243 |
1662 |
if (computeRemainder) |
244 |
|
{ |
245 |
321 |
n1[i] = n2[0]; |
246 |
321 |
if (remainderDir != -1) |
247 |
|
{ |
248 |
321 |
if (!n1re.isNull()) |
249 |
|
{ |
250 |
36 |
ne.push_back(n1re); |
251 |
|
} |
252 |
321 |
ne.insert(ne.end(), n1.begin() + i + 1, n1.end()); |
253 |
321 |
n1.erase(n1.begin() + i + 1, n1.end()); |
254 |
|
} |
255 |
|
else if (!n1re.isNull()) |
256 |
|
{ |
257 |
|
n1[i] = d_rr->rewrite( |
258 |
|
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re)); |
259 |
|
} |
260 |
321 |
if (remainderDir != 1) |
261 |
|
{ |
262 |
|
nb.insert(nb.end(), n1.begin(), n1.begin() + i); |
263 |
|
n1.erase(n1.begin(), n1.begin() + i); |
264 |
|
if (!n1rb.isNull()) |
265 |
|
{ |
266 |
|
nb.push_back(n1rb); |
267 |
|
} |
268 |
|
} |
269 |
321 |
else if (!n1rb.isNull()) |
270 |
|
{ |
271 |
103 |
n1[i] = d_rr->rewrite( |
272 |
206 |
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i])); |
273 |
|
} |
274 |
|
} |
275 |
1662 |
return i; |
276 |
|
} |
277 |
|
} |
278 |
|
} |
279 |
1358 |
else if (n1.size() >= n2.size()) |
280 |
|
{ |
281 |
334 |
unsigned diff = n1.size() - n2.size(); |
282 |
643 |
for (unsigned i = 0; i <= diff; i++) |
283 |
|
{ |
284 |
726 |
Node n1rb_first; |
285 |
726 |
Node n1re_first; |
286 |
|
// first component of n2 must be a suffix |
287 |
834 |
if (componentContainsBase(n1[i], |
288 |
417 |
n2[0], |
289 |
|
n1rb_first, |
290 |
|
n1re_first, |
291 |
|
1, |
292 |
417 |
computeRemainder && remainderDir != 1)) |
293 |
|
{ |
294 |
121 |
Assert(n1re_first.isNull()); |
295 |
188 |
for (unsigned j = 1; j < n2.size(); j++) |
296 |
|
{ |
297 |
|
// are we in the last component? |
298 |
188 |
if (j + 1 == n2.size()) |
299 |
|
{ |
300 |
117 |
Node n1rb_last; |
301 |
117 |
Node n1re_last; |
302 |
|
// last component of n2 must be a prefix |
303 |
234 |
if (componentContainsBase(n1[i + j], |
304 |
117 |
n2[j], |
305 |
|
n1rb_last, |
306 |
|
n1re_last, |
307 |
|
-1, |
308 |
117 |
computeRemainder && remainderDir != -1)) |
309 |
|
{ |
310 |
216 |
Trace("strings-entail-debug") |
311 |
108 |
<< "Last remainder begin is " << n1rb_last << std::endl; |
312 |
216 |
Trace("strings-entail-debug") |
313 |
108 |
<< "Last remainder end is " << n1re_last << std::endl; |
314 |
108 |
Assert(n1rb_last.isNull()); |
315 |
108 |
if (computeRemainder) |
316 |
|
{ |
317 |
56 |
if (remainderDir != -1) |
318 |
|
{ |
319 |
56 |
if (!n1re_last.isNull()) |
320 |
|
{ |
321 |
2 |
ne.push_back(n1re_last); |
322 |
|
} |
323 |
56 |
ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end()); |
324 |
56 |
n1.erase(n1.begin() + i + j + 1, n1.end()); |
325 |
56 |
n1[i + j] = n2[j]; |
326 |
|
} |
327 |
56 |
if (remainderDir != 1) |
328 |
|
{ |
329 |
|
n1[i] = n2[0]; |
330 |
|
nb.insert(nb.end(), n1.begin(), n1.begin() + i); |
331 |
|
n1.erase(n1.begin(), n1.begin() + i); |
332 |
|
if (!n1rb_first.isNull()) |
333 |
|
{ |
334 |
|
nb.push_back(n1rb_first); |
335 |
|
} |
336 |
|
} |
337 |
|
} |
338 |
108 |
Trace("strings-entail-debug") << "ne = " << ne << std::endl; |
339 |
108 |
Trace("strings-entail-debug") << "nb = " << nb << std::endl; |
340 |
108 |
Trace("strings-entail-debug") << "...return " << i << std::endl; |
341 |
108 |
return i; |
342 |
|
} |
343 |
|
else |
344 |
|
{ |
345 |
9 |
break; |
346 |
|
} |
347 |
|
} |
348 |
71 |
else if (n1[i + j] != n2[j]) |
349 |
|
{ |
350 |
4 |
break; |
351 |
|
} |
352 |
|
} |
353 |
|
} |
354 |
|
} |
355 |
|
} |
356 |
76540 |
return -1; |
357 |
|
} |
358 |
|
|
359 |
80219 |
bool StringsEntail::componentContainsBase( |
360 |
|
Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder) |
361 |
|
{ |
362 |
80219 |
Assert(n1rb.isNull()); |
363 |
80219 |
Assert(n1re.isNull()); |
364 |
|
|
365 |
80219 |
NodeManager* nm = NodeManager::currentNM(); |
366 |
|
|
367 |
80219 |
if (n1 == n2) |
368 |
|
{ |
369 |
563 |
return true; |
370 |
|
} |
371 |
|
else |
372 |
|
{ |
373 |
79656 |
if (n1.isConst() && n2.isConst()) |
374 |
|
{ |
375 |
485 |
size_t len1 = Word::getLength(n1); |
376 |
485 |
size_t len2 = Word::getLength(n2); |
377 |
485 |
if (len2 < len1) |
378 |
|
{ |
379 |
212 |
if (dir == 1) |
380 |
|
{ |
381 |
17 |
if (Word::suffix(n1, len2) == n2) |
382 |
|
{ |
383 |
17 |
if (computeRemainder) |
384 |
|
{ |
385 |
|
n1rb = Word::prefix(n1, len1 - len2); |
386 |
|
} |
387 |
17 |
return true; |
388 |
|
} |
389 |
|
} |
390 |
195 |
else if (dir == -1) |
391 |
|
{ |
392 |
1 |
if (Word::prefix(n1, len2) == n2) |
393 |
|
{ |
394 |
1 |
if (computeRemainder) |
395 |
|
{ |
396 |
|
n1re = Word::suffix(n1, len1 - len2); |
397 |
|
} |
398 |
1 |
return true; |
399 |
|
} |
400 |
|
} |
401 |
|
else |
402 |
|
{ |
403 |
194 |
size_t f = Word::find(n1, n2); |
404 |
194 |
if (f != std::string::npos) |
405 |
|
{ |
406 |
183 |
if (computeRemainder) |
407 |
|
{ |
408 |
125 |
if (f > 0) |
409 |
|
{ |
410 |
102 |
n1rb = Word::prefix(n1, f); |
411 |
|
} |
412 |
125 |
if (len1 > f + len2) |
413 |
|
{ |
414 |
35 |
n1re = Word::suffix(n1, len1 - (f + len2)); |
415 |
|
} |
416 |
|
} |
417 |
183 |
return true; |
418 |
|
} |
419 |
|
} |
420 |
|
} |
421 |
|
} |
422 |
|
else |
423 |
|
{ |
424 |
|
// cases for: |
425 |
|
// n1 = x containing n2 = substr( x, n2[1], n2[2] ) |
426 |
79171 |
if (n2.getKind() == STRING_SUBSTR) |
427 |
|
{ |
428 |
43553 |
if (n2[0] == n1) |
429 |
|
{ |
430 |
672 |
bool success = true; |
431 |
672 |
Node start_pos = n2[1]; |
432 |
672 |
Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]); |
433 |
672 |
Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]); |
434 |
672 |
if (dir == 1) |
435 |
|
{ |
436 |
|
// To be a suffix, start + length must be greater than |
437 |
|
// or equal to the length of the string. |
438 |
1 |
success = d_arithEntail.check(end_pos, len_n2s); |
439 |
|
} |
440 |
671 |
else if (dir == -1) |
441 |
|
{ |
442 |
|
// To be a prefix, must literally start at 0, since |
443 |
|
// if we knew it started at <0, it should be rewritten to "", |
444 |
|
// if we knew it started at 0, then n2[1] should be rewritten to |
445 |
|
// 0. |
446 |
10 |
success = start_pos.isConst() |
447 |
5 |
&& start_pos.getConst<Rational>().sgn() == 0; |
448 |
|
} |
449 |
672 |
if (success) |
450 |
|
{ |
451 |
672 |
if (computeRemainder) |
452 |
|
{ |
453 |
|
// we can only compute the remainder if start_pos and end_pos |
454 |
|
// are known to be non-negative. |
455 |
45 |
if (!d_arithEntail.check(start_pos) |
456 |
45 |
|| !d_arithEntail.check(end_pos)) |
457 |
|
{ |
458 |
12 |
return false; |
459 |
|
} |
460 |
3 |
if (dir != -1) |
461 |
|
{ |
462 |
3 |
n1rb = nm->mkNode( |
463 |
2 |
STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos); |
464 |
|
} |
465 |
3 |
if (dir != 1) |
466 |
|
{ |
467 |
3 |
n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s); |
468 |
|
} |
469 |
|
} |
470 |
660 |
return true; |
471 |
|
} |
472 |
|
} |
473 |
|
} |
474 |
|
|
475 |
78499 |
if (!computeRemainder && dir == 0) |
476 |
|
{ |
477 |
77598 |
if (n1.getKind() == STRING_REPLACE) |
478 |
|
{ |
479 |
|
// (str.contains (str.replace x y z) w) ---> true |
480 |
|
// if (str.contains x w) --> true and (str.contains z w) ---> true |
481 |
1553 |
Node xCtnW = checkContains(n1[0], n2); |
482 |
1010 |
if (!xCtnW.isNull() && xCtnW.getConst<bool>()) |
483 |
|
{ |
484 |
825 |
Node zCtnW = checkContains(n1[2], n2); |
485 |
646 |
if (!zCtnW.isNull() && zCtnW.getConst<bool>()) |
486 |
|
{ |
487 |
467 |
return true; |
488 |
|
} |
489 |
|
} |
490 |
|
} |
491 |
|
} |
492 |
|
} |
493 |
|
} |
494 |
78316 |
return false; |
495 |
|
} |
496 |
|
|
497 |
78611 |
bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1, |
498 |
|
std::vector<Node>& n2, |
499 |
|
std::vector<Node>& nb, |
500 |
|
std::vector<Node>& ne, |
501 |
|
int dir) |
502 |
|
{ |
503 |
78611 |
Assert(nb.empty()); |
504 |
78611 |
Assert(ne.empty()); |
505 |
78611 |
bool changed = false; |
506 |
|
// for ( forwards, backwards ) direction |
507 |
179800 |
for (unsigned r = 0; r < 2; r++) |
508 |
|
{ |
509 |
129213 |
if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1)) |
510 |
|
{ |
511 |
127377 |
unsigned index0 = r == 0 ? 0 : n1.size() - 1; |
512 |
127377 |
unsigned index1 = r == 0 ? 0 : n2.size() - 1; |
513 |
127377 |
bool removeComponent = false; |
514 |
226730 |
Node n1cmp = n1[index0]; |
515 |
|
|
516 |
127377 |
if (n1cmp.isConst() && Word::isEmpty(n1cmp)) |
517 |
|
{ |
518 |
27978 |
return false; |
519 |
|
} |
520 |
|
|
521 |
198752 |
std::vector<Node> sss; |
522 |
198752 |
std::vector<Node> sls; |
523 |
99399 |
n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls); |
524 |
198798 |
Trace("strings-rewrite-debug2") |
525 |
99399 |
<< "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1] |
526 |
99399 |
<< ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl; |
527 |
99399 |
if (n1cmp.isConst()) |
528 |
|
{ |
529 |
99080 |
Node s = n1cmp; |
530 |
49540 |
size_t slen = Word::getLength(s); |
531 |
|
// overlap is an overapproximation of the number of characters |
532 |
|
// n2[index1] can match in s |
533 |
49540 |
unsigned overlap = Word::getLength(s); |
534 |
49540 |
if (n2[index1].isConst()) |
535 |
|
{ |
536 |
16996 |
Node t = n2[index1]; |
537 |
8498 |
std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t); |
538 |
8498 |
if (ret == std::string::npos) |
539 |
|
{ |
540 |
280 |
if (n1.size() == 1) |
541 |
|
{ |
542 |
|
// can remove everything |
543 |
|
// e.g. str.contains( "abc", str.++( "ba", x ) ) --> |
544 |
|
// str.contains( "", str.++( "ba", x ) ) |
545 |
|
// or std.contains( str.substr( "abc", x, y ), "d" ) ---> |
546 |
|
// str.contains( "", "d" ) |
547 |
11 |
removeComponent = true; |
548 |
|
} |
549 |
269 |
else if (sss.empty()) // only if not substr |
550 |
|
{ |
551 |
|
// check how much overlap there is |
552 |
|
// This is used to partially strip off the endpoint |
553 |
|
// e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) --> |
554 |
|
// str.contains( str.++( "c", x ), str.++( "cd", y ) ) |
555 |
257 |
overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s); |
556 |
|
} |
557 |
|
// note that we cannot process substring here, since t may |
558 |
|
// match only part of s. Consider: |
559 |
|
// (str.++ "C" (str.substr "AB" x y)), "CB" |
560 |
|
// where "AB" and "CB" have no overlap, but "C" is not part of what |
561 |
|
// is matched with "AB". |
562 |
|
} |
563 |
8218 |
else if (sss.empty()) // only if not substr |
564 |
|
{ |
565 |
490 |
Assert(ret < slen); |
566 |
|
// can strip off up to the find position, e.g. |
567 |
|
// str.contains( str.++( "abc", x ), str.++( "b", y ) ) --> |
568 |
|
// str.contains( str.++( "bc", x ), str.++( "b", y ) ), |
569 |
|
// and |
570 |
|
// str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) --> |
571 |
|
// str.contains( str.++( x, "abb" ), str.++( y, "b" ) ) |
572 |
490 |
overlap = slen - ret; |
573 |
|
} |
574 |
|
} |
575 |
|
else |
576 |
|
{ |
577 |
|
// inconclusive |
578 |
|
} |
579 |
49540 |
Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl; |
580 |
|
// process the overlap |
581 |
49540 |
if (overlap < slen) |
582 |
|
{ |
583 |
111 |
changed = true; |
584 |
111 |
if (overlap == 0) |
585 |
|
{ |
586 |
88 |
removeComponent = true; |
587 |
|
} |
588 |
|
else |
589 |
|
{ |
590 |
|
// can drop the prefix (resp. suffix) from the first (resp. last) |
591 |
|
// component |
592 |
23 |
if (r == 0) |
593 |
|
{ |
594 |
7 |
nb.push_back(Word::prefix(s, slen - overlap)); |
595 |
7 |
n1[index0] = Word::suffix(s, overlap); |
596 |
|
} |
597 |
|
else |
598 |
|
{ |
599 |
16 |
ne.push_back(Word::suffix(s, slen - overlap)); |
600 |
16 |
n1[index0] = Word::prefix(s, overlap); |
601 |
|
} |
602 |
|
} |
603 |
|
} |
604 |
|
} |
605 |
49859 |
else if (n1cmp.getKind() == STRING_ITOS) |
606 |
|
{ |
607 |
244 |
if (n2[index1].isConst()) |
608 |
|
{ |
609 |
206 |
Assert(n2[index1].getType().isString()); // string-only |
610 |
412 |
cvc5::String t = n2[index1].getConst<String>(); |
611 |
206 |
if (n1.size() == 1) |
612 |
|
{ |
613 |
|
// if n1.size()==1, then if n2[index1] is not a number, we can drop |
614 |
|
// the entire component |
615 |
|
// e.g. str.contains( int.to.str(x), "123a45") --> false |
616 |
199 |
if (!t.isNumber()) |
617 |
|
{ |
618 |
35 |
removeComponent = true; |
619 |
|
} |
620 |
|
} |
621 |
|
else |
622 |
|
{ |
623 |
7 |
const std::vector<unsigned>& tvec = t.getVec(); |
624 |
7 |
Assert(tvec.size() > 0); |
625 |
|
|
626 |
|
// if n1.size()>1, then if the first (resp. last) character of |
627 |
|
// n2[index1] |
628 |
|
// is not a digit, we can drop the entire component, e.g.: |
629 |
|
// str.contains( str.++( int.to.str(x), y ), "a12") --> |
630 |
|
// str.contains( y, "a12" ) |
631 |
|
// str.contains( str.++( y, int.to.str(x) ), "a0b") --> |
632 |
|
// str.contains( y, "a0b" ) |
633 |
7 |
unsigned i = r == 0 ? 0 : (tvec.size() - 1); |
634 |
7 |
if (!String::isDigit(tvec[i])) |
635 |
|
{ |
636 |
5 |
removeComponent = true; |
637 |
|
} |
638 |
|
} |
639 |
|
} |
640 |
|
} |
641 |
99399 |
if (removeComponent) |
642 |
|
{ |
643 |
139 |
Trace("strings-rewrite-debug2") << "...remove component" << std::endl; |
644 |
|
// can drop entire first (resp. last) component |
645 |
139 |
if (r == 0) |
646 |
|
{ |
647 |
101 |
nb.push_back(n1[index0]); |
648 |
101 |
n1.erase(n1.begin(), n1.begin() + 1); |
649 |
|
} |
650 |
|
else |
651 |
|
{ |
652 |
38 |
ne.push_back(n1[index0]); |
653 |
38 |
n1.pop_back(); |
654 |
|
} |
655 |
139 |
if (n1.empty()) |
656 |
|
{ |
657 |
|
// if we've removed everything, just return (we will rewrite to false) |
658 |
46 |
return true; |
659 |
|
} |
660 |
|
else |
661 |
|
{ |
662 |
93 |
changed = true; |
663 |
|
} |
664 |
|
} |
665 |
|
} |
666 |
|
} |
667 |
|
// TODO (#1180) : computing the maximal overlap in this function may be |
668 |
|
// important. |
669 |
|
// str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) ---> |
670 |
|
// false |
671 |
|
// ...since str.to.int(x) can contain at most 1 character from "2aaaa", |
672 |
|
// leaving 4 characters |
673 |
|
// which is larger that the upper bound for length of str.substr(y,0,3), |
674 |
|
// which is 3. |
675 |
50587 |
return changed; |
676 |
|
} |
677 |
|
|
678 |
6825 |
Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter) |
679 |
|
{ |
680 |
6825 |
NodeManager* nm = NodeManager::currentNM(); |
681 |
13650 |
Node ctn = nm->mkNode(STRING_CONTAINS, a, b); |
682 |
|
|
683 |
6825 |
if (fullRewriter) |
684 |
|
{ |
685 |
6825 |
ctn = d_rr->rewrite(ctn); |
686 |
|
} |
687 |
|
else |
688 |
|
{ |
689 |
|
Node prev; |
690 |
|
do |
691 |
|
{ |
692 |
|
prev = ctn; |
693 |
|
ctn = d_rewriter.rewriteContains(ctn); |
694 |
|
if (ctn != prev) |
695 |
|
{ |
696 |
|
ctn = d_rewriter.postProcessRewrite(prev, ctn); |
697 |
|
} |
698 |
|
} while (prev != ctn && ctn.getKind() == STRING_CONTAINS); |
699 |
|
} |
700 |
|
|
701 |
6825 |
Assert(ctn.getType().isBoolean()); |
702 |
13650 |
return ctn.isConst() ? ctn : Node::null(); |
703 |
|
} |
704 |
|
|
705 |
26902 |
bool StringsEntail::checkNonEmpty(Node a) |
706 |
|
{ |
707 |
53804 |
Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a); |
708 |
26902 |
len = d_rr->rewrite(len); |
709 |
53804 |
return d_arithEntail.check(len, true); |
710 |
|
} |
711 |
|
|
712 |
44371 |
bool StringsEntail::checkLengthOne(Node s, bool strict) |
713 |
|
{ |
714 |
44371 |
NodeManager* nm = NodeManager::currentNM(); |
715 |
88742 |
Node one = nm->mkConst(Rational(1)); |
716 |
88742 |
Node len = nm->mkNode(STRING_LENGTH, s); |
717 |
44371 |
len = d_rr->rewrite(len); |
718 |
177484 |
return d_arithEntail.check(one, len) |
719 |
221855 |
&& (!strict || d_arithEntail.check(len, true)); |
720 |
|
} |
721 |
|
|
722 |
75942 |
bool StringsEntail::checkMultisetSubset(Node a, Node b) |
723 |
|
{ |
724 |
151884 |
std::vector<Node> avec; |
725 |
75942 |
utils::getConcat(getMultisetApproximation(a), avec); |
726 |
151884 |
std::vector<Node> bvec; |
727 |
75942 |
utils::getConcat(b, bvec); |
728 |
|
|
729 |
151884 |
std::map<Node, unsigned> num_nconst[2]; |
730 |
151884 |
std::map<Node, unsigned> num_const[2]; |
731 |
227826 |
for (unsigned j = 0; j < 2; j++) |
732 |
|
{ |
733 |
151884 |
std::vector<Node>& jvec = j == 0 ? avec : bvec; |
734 |
315921 |
for (const Node& cc : jvec) |
735 |
|
{ |
736 |
164037 |
if (cc.isConst()) |
737 |
|
{ |
738 |
82884 |
num_const[j][cc]++; |
739 |
|
} |
740 |
|
else |
741 |
|
{ |
742 |
81153 |
num_nconst[j][cc]++; |
743 |
|
} |
744 |
|
} |
745 |
|
} |
746 |
75942 |
bool ms_success = true; |
747 |
76589 |
for (std::pair<const Node, unsigned>& nncp : num_nconst[0]) |
748 |
|
{ |
749 |
24346 |
if (nncp.second > num_nconst[1][nncp.first]) |
750 |
|
{ |
751 |
23699 |
ms_success = false; |
752 |
23699 |
break; |
753 |
|
} |
754 |
|
} |
755 |
75942 |
if (ms_success) |
756 |
|
{ |
757 |
|
// count the number of constant characters in the first argument |
758 |
156729 |
std::map<Node, unsigned> count_const[2]; |
759 |
104471 |
std::vector<Node> chars; |
760 |
156729 |
for (unsigned j = 0; j < 2; j++) |
761 |
|
{ |
762 |
160955 |
for (std::pair<const Node, unsigned>& ncp : num_const[j]) |
763 |
|
{ |
764 |
112938 |
Node cn = ncp.first; |
765 |
56469 |
Assert(cn.isConst()); |
766 |
112938 |
std::vector<Node> cnChars = Word::getChars(cn); |
767 |
140675 |
for (const Node& ch : cnChars) |
768 |
|
{ |
769 |
84206 |
count_const[j][ch] += ncp.second; |
770 |
84206 |
if (std::find(chars.begin(), chars.end(), ch) == chars.end()) |
771 |
|
{ |
772 |
61224 |
chars.push_back(ch); |
773 |
|
} |
774 |
|
} |
775 |
|
} |
776 |
|
} |
777 |
104486 |
Trace("strings-entail-ms-ss") |
778 |
52243 |
<< "For " << a << " and " << b << " : " << std::endl; |
779 |
113452 |
for (const Node& ch : chars) |
780 |
|
{ |
781 |
61224 |
Trace("strings-entail-ms-ss") << " # occurrences of substring "; |
782 |
61224 |
Trace("strings-entail-ms-ss") << ch << " in arguments is "; |
783 |
122448 |
Trace("strings-entail-ms-ss") |
784 |
61224 |
<< count_const[0][ch] << " / " << count_const[1][ch] << std::endl; |
785 |
61224 |
if (count_const[0][ch] < count_const[1][ch]) |
786 |
|
{ |
787 |
15 |
return true; |
788 |
|
} |
789 |
|
} |
790 |
|
|
791 |
|
// TODO (#1180): count the number of 2,3,4,.. character substrings |
792 |
|
// for example: |
793 |
|
// str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false |
794 |
|
// since the second argument contains more occurrences of "bb". |
795 |
|
// note this is orthogonal reasoning to inductive reasoning |
796 |
|
// via regular membership reduction in Liang et al CAV 2015. |
797 |
|
} |
798 |
75927 |
return false; |
799 |
|
} |
800 |
|
|
801 |
99810 |
Node StringsEntail::checkHomogeneousString(Node a) |
802 |
|
{ |
803 |
199620 |
std::vector<Node> avec; |
804 |
99810 |
utils::getConcat(getMultisetApproximation(a), avec); |
805 |
|
|
806 |
99810 |
bool cValid = false; |
807 |
199620 |
Node c; |
808 |
150100 |
for (const Node& ac : avec) |
809 |
|
{ |
810 |
102587 |
if (ac.isConst()) |
811 |
|
{ |
812 |
111816 |
std::vector<Node> acv = Word::getChars(ac); |
813 |
98310 |
for (const Node& cc : acv) |
814 |
|
{ |
815 |
48020 |
if (!cValid) |
816 |
|
{ |
817 |
33385 |
cValid = true; |
818 |
33385 |
c = cc; |
819 |
|
} |
820 |
14635 |
else if (c != cc) |
821 |
|
{ |
822 |
|
// Found a different character |
823 |
11236 |
return Node::null(); |
824 |
|
} |
825 |
|
} |
826 |
|
} |
827 |
|
else |
828 |
|
{ |
829 |
|
// Could produce a different character |
830 |
41061 |
return Node::null(); |
831 |
|
} |
832 |
|
} |
833 |
|
|
834 |
47513 |
if (!cValid) |
835 |
|
{ |
836 |
27846 |
return Word::mkEmptyWord(a.getType()); |
837 |
|
} |
838 |
|
|
839 |
19667 |
return c; |
840 |
|
} |
841 |
|
|
842 |
188807 |
Node StringsEntail::getMultisetApproximation(Node a) |
843 |
|
{ |
844 |
188807 |
NodeManager* nm = NodeManager::currentNM(); |
845 |
188807 |
if (a.getKind() == STRING_SUBSTR) |
846 |
|
{ |
847 |
64920 |
return a[0]; |
848 |
|
} |
849 |
123887 |
else if (a.getKind() == STRING_REPLACE) |
850 |
|
{ |
851 |
2400 |
return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2])); |
852 |
|
} |
853 |
121487 |
else if (a.getKind() == STRING_CONCAT) |
854 |
|
{ |
855 |
9120 |
NodeBuilder nb(STRING_CONCAT); |
856 |
15215 |
for (const Node& ac : a) |
857 |
|
{ |
858 |
10655 |
nb << getMultisetApproximation(ac); |
859 |
|
} |
860 |
4560 |
return nb.constructNode(); |
861 |
|
} |
862 |
|
else |
863 |
|
{ |
864 |
116927 |
return a; |
865 |
|
} |
866 |
|
} |
867 |
|
|
868 |
127566 |
Node StringsEntail::getStringOrEmpty(Node n) |
869 |
|
{ |
870 |
127566 |
Node res; |
871 |
382734 |
while (res.isNull()) |
872 |
|
{ |
873 |
127584 |
switch (n.getKind()) |
874 |
|
{ |
875 |
530 |
case STRING_REPLACE: |
876 |
|
{ |
877 |
530 |
if (Word::isEmpty(n[0])) |
878 |
|
{ |
879 |
|
// (str.replace "" x y) --> y |
880 |
18 |
n = n[2]; |
881 |
18 |
break; |
882 |
|
} |
883 |
512 |
if (checkLengthOne(n[0]) && Word::isEmpty(n[2])) |
884 |
|
{ |
885 |
|
// (str.replace "A" x "") --> "A" |
886 |
30 |
res = n[0]; |
887 |
30 |
break; |
888 |
|
} |
889 |
|
|
890 |
482 |
res = n; |
891 |
482 |
break; |
892 |
|
} |
893 |
8475 |
case STRING_SUBSTR: |
894 |
|
{ |
895 |
8475 |
if (checkLengthOne(n[0])) |
896 |
|
{ |
897 |
|
// (str.substr "A" x y) --> "A" |
898 |
173 |
res = n[0]; |
899 |
173 |
break; |
900 |
|
} |
901 |
8302 |
res = n; |
902 |
8302 |
break; |
903 |
|
} |
904 |
118579 |
default: |
905 |
|
{ |
906 |
118579 |
res = n; |
907 |
118579 |
break; |
908 |
|
} |
909 |
|
} |
910 |
|
} |
911 |
127566 |
return res; |
912 |
|
} |
913 |
|
|
914 |
687 |
Node StringsEntail::inferEqsFromContains(Node x, Node y) |
915 |
|
{ |
916 |
687 |
NodeManager* nm = NodeManager::currentNM(); |
917 |
1374 |
Node emp = Word::mkEmptyWord(x.getType()); |
918 |
687 |
Assert(x.getType() == y.getType()); |
919 |
1374 |
TypeNode stype = x.getType(); |
920 |
|
|
921 |
1374 |
Node xLen = nm->mkNode(STRING_LENGTH, x); |
922 |
1374 |
std::vector<Node> yLens; |
923 |
687 |
if (y.getKind() != STRING_CONCAT) |
924 |
|
{ |
925 |
66 |
yLens.push_back(nm->mkNode(STRING_LENGTH, y)); |
926 |
|
} |
927 |
|
else |
928 |
|
{ |
929 |
2384 |
for (const Node& yi : y) |
930 |
|
{ |
931 |
1763 |
yLens.push_back(nm->mkNode(STRING_LENGTH, yi)); |
932 |
|
} |
933 |
|
} |
934 |
|
|
935 |
1374 |
std::vector<Node> zeroLens; |
936 |
687 |
if (x == emp) |
937 |
|
{ |
938 |
|
// If x is the empty string, then all ys must be empty, too, and we can |
939 |
|
// skip the expensive checks. Note that this is just a performance |
940 |
|
// optimization. |
941 |
43 |
zeroLens.swap(yLens); |
942 |
|
} |
943 |
|
else |
944 |
|
{ |
945 |
|
// Check if we can infer that str.len(x) <= str.len(y). If that is the |
946 |
|
// case, try to minimize the sum in str.len(x) <= str.len(y1) + ... + |
947 |
|
// str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality |
948 |
|
// true. The terms that can have length zero without making the inequality |
949 |
|
// false must be all be empty if (str.contains x y) is true. |
950 |
644 |
if (!d_arithEntail.inferZerosInSumGeq(xLen, yLens, zeroLens)) |
951 |
|
{ |
952 |
|
// We could not prove that the inequality holds |
953 |
566 |
return Node::null(); |
954 |
|
} |
955 |
78 |
else if (yLens.size() == y.getNumChildren()) |
956 |
|
{ |
957 |
|
// We could only prove that the inequality holds but not that any of the |
958 |
|
// ys must be empty |
959 |
40 |
return nm->mkNode(EQUAL, x, y); |
960 |
|
} |
961 |
|
} |
962 |
|
|
963 |
81 |
if (y.getKind() != STRING_CONCAT) |
964 |
|
{ |
965 |
16 |
if (zeroLens.size() == 1) |
966 |
|
{ |
967 |
|
// y is not a concatenation and we found that it must be empty, so just |
968 |
|
// return (= y "") |
969 |
|
Assert(zeroLens[0][0] == y); |
970 |
|
return nm->mkNode(EQUAL, y, emp); |
971 |
|
} |
972 |
|
else |
973 |
|
{ |
974 |
16 |
Assert(yLens.size() == 1 && yLens[0][0] == y); |
975 |
16 |
return nm->mkNode(EQUAL, x, y); |
976 |
|
} |
977 |
|
} |
978 |
|
|
979 |
130 |
std::vector<Node> cs; |
980 |
92 |
for (const Node& yiLen : yLens) |
981 |
|
{ |
982 |
27 |
Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end()); |
983 |
27 |
cs.push_back(yiLen[0]); |
984 |
|
} |
985 |
|
|
986 |
130 |
NodeBuilder nb(AND); |
987 |
|
// (= x (str.++ y1' ... ym')) |
988 |
65 |
if (!cs.empty()) |
989 |
|
{ |
990 |
22 |
nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype)); |
991 |
|
} |
992 |
|
// (= y1'' "") ... (= yk'' "") |
993 |
189 |
for (const Node& zeroLen : zeroLens) |
994 |
|
{ |
995 |
124 |
Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end()); |
996 |
124 |
nb << nm->mkNode(EQUAL, zeroLen[0], emp); |
997 |
|
} |
998 |
|
|
999 |
|
// (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' "")) |
1000 |
65 |
return nb.constructNode(); |
1001 |
|
} |
1002 |
|
|
1003 |
|
} // namespace strings |
1004 |
|
} // namespace theory |
1005 |
22746 |
} // namespace cvc5 |