1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Aina Niemetz |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of entailment tests involving regular expressions. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/regexp_entail.h" |
17 |
|
|
18 |
|
#include "theory/rewriter.h" |
19 |
|
#include "theory/strings/theory_strings_utils.h" |
20 |
|
#include "theory/strings/word.h" |
21 |
|
#include "util/rational.h" |
22 |
|
#include "util/string.h" |
23 |
|
|
24 |
|
using namespace std; |
25 |
|
using namespace cvc5::kind; |
26 |
|
|
27 |
|
namespace cvc5 { |
28 |
|
namespace theory { |
29 |
|
namespace strings { |
30 |
|
|
31 |
15167 |
Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, |
32 |
|
std::vector<Node>& children, |
33 |
|
int dir) |
34 |
|
{ |
35 |
30334 |
Trace("regexp-ext-rewrite-debug") |
36 |
15167 |
<< "Simple reg exp consume, dir=" << dir << ":" << std::endl; |
37 |
30334 |
Trace("regexp-ext-rewrite-debug") |
38 |
15167 |
<< " mchildren : " << mchildren << std::endl; |
39 |
15167 |
Trace("regexp-ext-rewrite-debug") << " children : " << children << std::endl; |
40 |
15167 |
NodeManager* nm = NodeManager::currentNM(); |
41 |
15167 |
unsigned tmin = dir < 0 ? 0 : dir; |
42 |
15167 |
unsigned tmax = dir < 0 ? 1 : dir; |
43 |
|
// try to remove off front and back |
44 |
42804 |
for (unsigned t = 0; t < 2; t++) |
45 |
|
{ |
46 |
29764 |
if (tmin <= t && t <= tmax) |
47 |
|
{ |
48 |
19379 |
bool do_next = true; |
49 |
57511 |
while (!children.empty() && !mchildren.empty() && do_next) |
50 |
|
{ |
51 |
21193 |
do_next = false; |
52 |
40259 |
Node xc = mchildren[mchildren.size() - 1]; |
53 |
40259 |
Node rc = children[children.size() - 1]; |
54 |
42386 |
Trace("regexp-ext-rewrite-debug") |
55 |
21193 |
<< "* " << xc << " in " << rc << std::endl; |
56 |
21193 |
Assert(rc.getKind() != REGEXP_CONCAT); |
57 |
21193 |
Assert(xc.getKind() != STRING_CONCAT); |
58 |
21193 |
if (rc.getKind() == STRING_TO_REGEXP) |
59 |
|
{ |
60 |
8643 |
if (xc == rc[0]) |
61 |
|
{ |
62 |
1256 |
children.pop_back(); |
63 |
1256 |
mchildren.pop_back(); |
64 |
1256 |
do_next = true; |
65 |
1256 |
Trace("regexp-ext-rewrite-debug") << "- strip equal" << std::endl; |
66 |
|
} |
67 |
7387 |
else if (rc[0].isConst() && Word::isEmpty(rc[0])) |
68 |
|
{ |
69 |
224 |
Trace("regexp-ext-rewrite-debug") |
70 |
112 |
<< "- ignore empty RE" << std::endl; |
71 |
|
// ignore and continue |
72 |
112 |
children.pop_back(); |
73 |
112 |
do_next = true; |
74 |
|
} |
75 |
7275 |
else if (xc.isConst() && rc[0].isConst()) |
76 |
|
{ |
77 |
|
// split the constant |
78 |
|
size_t index; |
79 |
3148 |
Node s = Word::splitConstant(xc, rc[0], index, t == 0); |
80 |
4936 |
Trace("regexp-ext-rewrite-debug") |
81 |
4936 |
<< "- CRE: Regexp const split : " << xc << " " << rc[0] |
82 |
2468 |
<< " -> " << s << " " << index << " " << t << std::endl; |
83 |
2468 |
if (s.isNull()) |
84 |
|
{ |
85 |
3576 |
Trace("regexp-ext-rewrite-debug") |
86 |
1788 |
<< "...return false" << std::endl; |
87 |
1788 |
return nm->mkConst(false); |
88 |
|
} |
89 |
|
else |
90 |
|
{ |
91 |
1360 |
Trace("regexp-ext-rewrite-debug") |
92 |
680 |
<< "- strip equal const" << std::endl; |
93 |
680 |
children.pop_back(); |
94 |
680 |
mchildren.pop_back(); |
95 |
680 |
if (index == 0) |
96 |
|
{ |
97 |
630 |
mchildren.push_back(s); |
98 |
|
} |
99 |
|
else |
100 |
|
{ |
101 |
50 |
children.push_back(nm->mkNode(STRING_TO_REGEXP, s)); |
102 |
|
} |
103 |
|
} |
104 |
680 |
Trace("regexp-ext-rewrite-debug") << "- split const" << std::endl; |
105 |
680 |
do_next = true; |
106 |
|
} |
107 |
|
} |
108 |
12550 |
else if (xc.isConst()) |
109 |
|
{ |
110 |
|
// check for constants |
111 |
8233 |
cvc5::String s = xc.getConst<String>(); |
112 |
4286 |
if (Word::isEmpty(xc)) |
113 |
|
{ |
114 |
80 |
Trace("regexp-ext-rewrite-debug") << "- ignore empty" << std::endl; |
115 |
|
// ignore and continue |
116 |
80 |
mchildren.pop_back(); |
117 |
80 |
do_next = true; |
118 |
|
} |
119 |
4206 |
else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA) |
120 |
|
{ |
121 |
1318 |
if (!isConstRegExp(rc)) |
122 |
|
{ |
123 |
|
// if a non-standard re.range term, abort |
124 |
225 |
return Node::null(); |
125 |
|
} |
126 |
2411 |
std::vector<unsigned> ssVec; |
127 |
1318 |
ssVec.push_back(t == 0 ? s.back() : s.front()); |
128 |
2411 |
cvc5::String ss(ssVec); |
129 |
1318 |
if (testConstStringInRegExp(ss, 0, rc)) |
130 |
|
{ |
131 |
|
// strip off one character |
132 |
1093 |
mchildren.pop_back(); |
133 |
1093 |
if (s.size() > 1) |
134 |
|
{ |
135 |
923 |
if (t == 0) |
136 |
|
{ |
137 |
747 |
mchildren.push_back(nm->mkConst(s.substr(0, s.size() - 1))); |
138 |
|
} |
139 |
|
else |
140 |
|
{ |
141 |
176 |
mchildren.push_back(nm->mkConst(s.substr(1))); |
142 |
|
} |
143 |
|
} |
144 |
1093 |
children.pop_back(); |
145 |
1093 |
do_next = true; |
146 |
|
} |
147 |
|
else |
148 |
|
{ |
149 |
450 |
Trace("regexp-ext-rewrite-debug") |
150 |
225 |
<< "...return false" << std::endl; |
151 |
225 |
return nm->mkConst(false); |
152 |
|
} |
153 |
|
} |
154 |
2888 |
else if (rc.getKind() == REGEXP_INTER || rc.getKind() == REGEXP_UNION) |
155 |
|
{ |
156 |
|
// see if any/each child does not work |
157 |
1196 |
bool result_valid = true; |
158 |
2278 |
Node result; |
159 |
2278 |
Node emp_s = nm->mkConst(String("")); |
160 |
3726 |
for (unsigned i = 0; i < rc.getNumChildren(); i++) |
161 |
|
{ |
162 |
5060 |
std::vector<Node> mchildren_s; |
163 |
5060 |
std::vector<Node> children_s; |
164 |
2530 |
mchildren_s.push_back(xc); |
165 |
2530 |
utils::getConcat(rc[i], children_s); |
166 |
2530 |
Trace("regexp-ext-rewrite-debug") << push; |
167 |
5060 |
Node ret = simpleRegexpConsume(mchildren_s, children_s, t); |
168 |
2530 |
Trace("regexp-ext-rewrite-debug") << pop; |
169 |
2530 |
if (!ret.isNull()) |
170 |
|
{ |
171 |
|
// one conjunct cannot be satisfied, return false |
172 |
1332 |
if (rc.getKind() == REGEXP_INTER) |
173 |
|
{ |
174 |
|
Trace("regexp-ext-rewrite-debug") |
175 |
|
<< "...return " << ret << std::endl; |
176 |
|
return ret; |
177 |
|
} |
178 |
|
} |
179 |
|
else |
180 |
|
{ |
181 |
1198 |
if (children_s.empty()) |
182 |
|
{ |
183 |
|
// if we were able to fully consume, store the result |
184 |
830 |
Assert(mchildren_s.size() <= 1); |
185 |
830 |
if (mchildren_s.empty()) |
186 |
|
{ |
187 |
378 |
mchildren_s.push_back(emp_s); |
188 |
|
} |
189 |
830 |
if (result.isNull()) |
190 |
|
{ |
191 |
786 |
result = mchildren_s[0]; |
192 |
|
} |
193 |
44 |
else if (result != mchildren_s[0]) |
194 |
|
{ |
195 |
44 |
result_valid = false; |
196 |
|
} |
197 |
|
} |
198 |
|
else |
199 |
|
{ |
200 |
368 |
result_valid = false; |
201 |
|
} |
202 |
|
} |
203 |
|
} |
204 |
1196 |
if (result_valid) |
205 |
|
{ |
206 |
802 |
if (result.isNull()) |
207 |
|
{ |
208 |
|
// all disjuncts cannot be satisfied, return false |
209 |
114 |
Assert(rc.getKind() == REGEXP_UNION); |
210 |
228 |
Trace("regexp-ext-rewrite-debug") |
211 |
114 |
<< "...return false" << std::endl; |
212 |
114 |
return nm->mkConst(false); |
213 |
|
} |
214 |
|
else |
215 |
|
{ |
216 |
1376 |
Trace("regexp-ext-rewrite-debug") |
217 |
688 |
<< "- same result, try again, children now " << children |
218 |
688 |
<< std::endl; |
219 |
|
// all branches led to the same result |
220 |
688 |
children.pop_back(); |
221 |
688 |
mchildren.pop_back(); |
222 |
688 |
if (result != emp_s) |
223 |
|
{ |
224 |
376 |
mchildren.push_back(result); |
225 |
|
} |
226 |
688 |
do_next = true; |
227 |
|
} |
228 |
|
} |
229 |
|
} |
230 |
1692 |
else if (rc.getKind() == REGEXP_STAR) |
231 |
|
{ |
232 |
|
// check if there is no way that this star can be unrolled even once |
233 |
3244 |
std::vector<Node> mchildren_s; |
234 |
1622 |
mchildren_s.insert( |
235 |
3244 |
mchildren_s.end(), mchildren.begin(), mchildren.end()); |
236 |
1622 |
if (t == 1) |
237 |
|
{ |
238 |
683 |
std::reverse(mchildren_s.begin(), mchildren_s.end()); |
239 |
|
} |
240 |
3244 |
std::vector<Node> children_s; |
241 |
1622 |
utils::getConcat(rc[0], children_s); |
242 |
3244 |
Trace("regexp-ext-rewrite-debug") |
243 |
1622 |
<< "- recursive call on body of star" << std::endl; |
244 |
1622 |
Trace("regexp-ext-rewrite-debug") << push; |
245 |
3244 |
Node ret = simpleRegexpConsume(mchildren_s, children_s, t); |
246 |
1622 |
Trace("regexp-ext-rewrite-debug") << pop; |
247 |
1622 |
if (!ret.isNull()) |
248 |
|
{ |
249 |
396 |
Trace("regexp-ext-rewrite-debug") |
250 |
198 |
<< "- CRE : regexp star infeasable " << xc << " " << rc |
251 |
198 |
<< std::endl; |
252 |
198 |
children.pop_back(); |
253 |
198 |
if (!children.empty()) |
254 |
|
{ |
255 |
158 |
Trace("regexp-ext-rewrite-debug") << "- continue" << std::endl; |
256 |
158 |
do_next = true; |
257 |
|
} |
258 |
|
} |
259 |
|
else |
260 |
|
{ |
261 |
1424 |
if (children_s.empty()) |
262 |
|
{ |
263 |
|
// Check if beyond this, we hit a conflict. In this case, we |
264 |
|
// must repeat. Notice that we do not treat the case where |
265 |
|
// there are no more strings to consume as a failure, since |
266 |
|
// we may be within a recursive call, see issue #5510. |
267 |
1195 |
bool can_skip = true; |
268 |
1195 |
if (children.size() > 1) |
269 |
|
{ |
270 |
2286 |
std::vector<Node> mchildren_ss; |
271 |
1143 |
mchildren_ss.insert( |
272 |
2286 |
mchildren_ss.end(), mchildren.begin(), mchildren.end()); |
273 |
2286 |
std::vector<Node> children_ss; |
274 |
1143 |
children_ss.insert( |
275 |
2286 |
children_ss.end(), children.begin(), children.end() - 1); |
276 |
1143 |
if (t == 1) |
277 |
|
{ |
278 |
323 |
std::reverse(mchildren_ss.begin(), mchildren_ss.end()); |
279 |
323 |
std::reverse(children_ss.begin(), children_ss.end()); |
280 |
|
} |
281 |
2286 |
Trace("regexp-ext-rewrite-debug") |
282 |
1143 |
<< "- recursive call required repeat star" << std::endl; |
283 |
1143 |
Trace("regexp-ext-rewrite-debug") << push; |
284 |
2286 |
Node rets = simpleRegexpConsume(mchildren_ss, children_ss, t); |
285 |
1143 |
Trace("regexp-ext-rewrite-debug") << pop; |
286 |
1143 |
if (!rets.isNull()) |
287 |
|
{ |
288 |
460 |
can_skip = false; |
289 |
|
} |
290 |
|
} |
291 |
1195 |
if (!can_skip) |
292 |
|
{ |
293 |
920 |
TypeNode stype = nm->stringType(); |
294 |
920 |
Node prev = utils::mkConcat(mchildren, stype); |
295 |
920 |
Trace("regexp-ext-rewrite-debug") |
296 |
460 |
<< "- can't skip" << std::endl; |
297 |
|
// take the result of fully consuming once |
298 |
460 |
if (t == 1) |
299 |
|
{ |
300 |
217 |
std::reverse(mchildren_s.begin(), mchildren_s.end()); |
301 |
|
} |
302 |
460 |
mchildren.clear(); |
303 |
460 |
mchildren.insert( |
304 |
920 |
mchildren.end(), mchildren_s.begin(), mchildren_s.end()); |
305 |
920 |
Node curr = utils::mkConcat(mchildren, stype); |
306 |
460 |
do_next = (prev != curr); |
307 |
920 |
Trace("regexp-ext-rewrite-debug") |
308 |
460 |
<< "- do_next = " << do_next << std::endl; |
309 |
|
} |
310 |
|
else |
311 |
|
{ |
312 |
1470 |
Trace("regexp-ext-rewrite-debug") |
313 |
735 |
<< "- can skip " << rc << " from " << xc << std::endl; |
314 |
|
} |
315 |
|
} |
316 |
|
} |
317 |
|
} |
318 |
|
} |
319 |
19066 |
if (!do_next) |
320 |
|
{ |
321 |
29078 |
Trace("regexp-ext-rewrite") |
322 |
14539 |
<< "- cannot consume : " << xc << " " << rc << std::endl; |
323 |
|
} |
324 |
|
} |
325 |
|
} |
326 |
27637 |
if (dir != 0) |
327 |
|
{ |
328 |
18427 |
std::reverse(children.begin(), children.end()); |
329 |
18427 |
std::reverse(mchildren.begin(), mchildren.end()); |
330 |
|
} |
331 |
|
} |
332 |
13040 |
Trace("regexp-ext-rewrite-debug") << "...finished, return null" << std::endl; |
333 |
13040 |
return Node::null(); |
334 |
|
} |
335 |
|
|
336 |
6010 |
bool RegExpEntail::isConstRegExp(TNode t) |
337 |
|
{ |
338 |
12020 |
std::unordered_set<TNode> visited; |
339 |
12020 |
std::vector<TNode> visit; |
340 |
12020 |
TNode cur; |
341 |
6010 |
visit.push_back(t); |
342 |
14476 |
do |
343 |
|
{ |
344 |
20486 |
cur = visit.back(); |
345 |
20486 |
visit.pop_back(); |
346 |
20486 |
if (visited.find(cur) == visited.end()) |
347 |
|
{ |
348 |
17765 |
visited.insert(cur); |
349 |
17765 |
Kind ck = cur.getKind(); |
350 |
17765 |
if (ck == STRING_TO_REGEXP) |
351 |
|
{ |
352 |
6004 |
if (!cur[0].isConst()) |
353 |
|
{ |
354 |
679 |
return false; |
355 |
|
} |
356 |
|
} |
357 |
11761 |
else if (ck == REGEXP_RANGE) |
358 |
|
{ |
359 |
4194 |
for (const Node& cn : cur) |
360 |
|
{ |
361 |
2796 |
if (!cn.isConst() || cn.getConst<String>().size() != 1) |
362 |
|
{ |
363 |
|
return false; |
364 |
|
} |
365 |
|
} |
366 |
|
} |
367 |
10363 |
else if (ck == ITE) |
368 |
|
{ |
369 |
|
return false; |
370 |
|
} |
371 |
10363 |
else if (cur.isVar()) |
372 |
|
{ |
373 |
25 |
return false; |
374 |
|
} |
375 |
|
else |
376 |
|
{ |
377 |
25414 |
for (const Node& cn : cur) |
378 |
|
{ |
379 |
15076 |
visit.push_back(cn); |
380 |
|
} |
381 |
|
} |
382 |
|
} |
383 |
19782 |
} while (!visit.empty()); |
384 |
5306 |
return true; |
385 |
|
} |
386 |
|
|
387 |
878541 |
bool RegExpEntail::testConstStringInRegExp(cvc5::String& s, |
388 |
|
unsigned index_start, |
389 |
|
TNode r) |
390 |
|
{ |
391 |
878541 |
Assert(index_start <= s.size()); |
392 |
1757082 |
Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at " |
393 |
878541 |
<< index_start << std::endl; |
394 |
878541 |
Assert(!r.isVar()); |
395 |
878541 |
Kind k = r.getKind(); |
396 |
878541 |
switch (k) |
397 |
|
{ |
398 |
22303 |
case STRING_TO_REGEXP: |
399 |
|
{ |
400 |
44606 |
cvc5::String s2 = s.substr(index_start, s.size() - index_start); |
401 |
22303 |
if (r[0].isConst()) |
402 |
|
{ |
403 |
22303 |
return (s2 == r[0].getConst<String>()); |
404 |
|
} |
405 |
|
Assert(false) << "RegExp contains variables"; |
406 |
|
return false; |
407 |
|
} |
408 |
3763 |
case REGEXP_CONCAT: |
409 |
|
{ |
410 |
3763 |
if (s.size() != index_start) |
411 |
|
{ |
412 |
5808 |
std::vector<int> vec_k(r.getNumChildren(), -1); |
413 |
2904 |
int start = 0; |
414 |
2904 |
int left = (int)s.size() - index_start; |
415 |
2904 |
int i = 0; |
416 |
26330 |
while (i < (int)r.getNumChildren()) |
417 |
|
{ |
418 |
14617 |
bool flag = true; |
419 |
14617 |
if (i == (int)r.getNumChildren() - 1) |
420 |
|
{ |
421 |
2899 |
if (testConstStringInRegExp(s, index_start + start, r[i])) |
422 |
|
{ |
423 |
1029 |
return true; |
424 |
|
} |
425 |
|
} |
426 |
11718 |
else if (i == -1) |
427 |
|
{ |
428 |
1875 |
return false; |
429 |
|
} |
430 |
|
else |
431 |
|
{ |
432 |
26912 |
for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i]) |
433 |
|
{ |
434 |
39909 |
cvc5::String t = s.substr(index_start + start, vec_k[i]); |
435 |
22840 |
if (testConstStringInRegExp(t, 0, r[i])) |
436 |
|
{ |
437 |
5771 |
start += vec_k[i]; |
438 |
5771 |
left -= vec_k[i]; |
439 |
5771 |
flag = false; |
440 |
5771 |
++i; |
441 |
5771 |
vec_k[i] = -1; |
442 |
5771 |
break; |
443 |
|
} |
444 |
|
} |
445 |
|
} |
446 |
|
|
447 |
11713 |
if (flag) |
448 |
|
{ |
449 |
5942 |
--i; |
450 |
5942 |
if (i >= 0) |
451 |
|
{ |
452 |
4067 |
start -= vec_k[i]; |
453 |
4067 |
left += vec_k[i]; |
454 |
|
} |
455 |
|
} |
456 |
|
} |
457 |
|
return false; |
458 |
|
} |
459 |
|
else |
460 |
|
{ |
461 |
1181 |
for (unsigned i = 0; i < r.getNumChildren(); ++i) |
462 |
|
{ |
463 |
1181 |
if (!testConstStringInRegExp(s, index_start, r[i])) |
464 |
|
{ |
465 |
859 |
return false; |
466 |
|
} |
467 |
|
} |
468 |
|
return true; |
469 |
|
} |
470 |
|
} |
471 |
273757 |
case REGEXP_UNION: |
472 |
|
{ |
473 |
794060 |
for (unsigned i = 0; i < r.getNumChildren(); ++i) |
474 |
|
{ |
475 |
539773 |
if (testConstStringInRegExp(s, index_start, r[i])) |
476 |
|
{ |
477 |
19470 |
return true; |
478 |
|
} |
479 |
|
} |
480 |
254287 |
return false; |
481 |
|
} |
482 |
1009 |
case REGEXP_INTER: |
483 |
|
{ |
484 |
1369 |
for (unsigned i = 0; i < r.getNumChildren(); ++i) |
485 |
|
{ |
486 |
1204 |
if (!testConstStringInRegExp(s, index_start, r[i])) |
487 |
|
{ |
488 |
844 |
return false; |
489 |
|
} |
490 |
|
} |
491 |
165 |
return true; |
492 |
|
} |
493 |
28652 |
case REGEXP_STAR: |
494 |
|
{ |
495 |
28652 |
if (s.size() != index_start) |
496 |
|
{ |
497 |
301334 |
for (unsigned i = s.size() - index_start; i > 0; --i) |
498 |
|
{ |
499 |
559329 |
cvc5::String t = s.substr(index_start, i); |
500 |
284230 |
if (testConstStringInRegExp(t, 0, r[0])) |
501 |
|
{ |
502 |
47614 |
if (index_start + i == s.size() |
503 |
47614 |
|| testConstStringInRegExp(s, index_start + i, r)) |
504 |
|
{ |
505 |
9131 |
return true; |
506 |
|
} |
507 |
|
} |
508 |
|
} |
509 |
17104 |
return false; |
510 |
|
} |
511 |
|
else |
512 |
|
{ |
513 |
2417 |
return true; |
514 |
|
} |
515 |
|
} |
516 |
10 |
case REGEXP_EMPTY: |
517 |
|
{ |
518 |
10 |
return false; |
519 |
|
} |
520 |
10262 |
case REGEXP_SIGMA: |
521 |
|
{ |
522 |
10262 |
if (s.size() == index_start + 1) |
523 |
|
{ |
524 |
4565 |
return true; |
525 |
|
} |
526 |
|
else |
527 |
|
{ |
528 |
5697 |
return false; |
529 |
|
} |
530 |
|
} |
531 |
538444 |
case REGEXP_RANGE: |
532 |
|
{ |
533 |
538444 |
if (s.size() == index_start + 1) |
534 |
|
{ |
535 |
34889 |
unsigned a = r[0].getConst<String>().front(); |
536 |
34889 |
unsigned b = r[1].getConst<String>().front(); |
537 |
34889 |
unsigned c = s.back(); |
538 |
34889 |
return (a <= c && c <= b); |
539 |
|
} |
540 |
|
else |
541 |
|
{ |
542 |
503555 |
return false; |
543 |
|
} |
544 |
|
} |
545 |
|
case REGEXP_LOOP: |
546 |
|
{ |
547 |
|
NodeManager* nm = NodeManager::currentNM(); |
548 |
|
uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt(); |
549 |
|
if (s.size() == index_start) |
550 |
|
{ |
551 |
|
return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]); |
552 |
|
} |
553 |
|
else if (l == 0 && r[1] == r[2]) |
554 |
|
{ |
555 |
|
return false; |
556 |
|
} |
557 |
|
else |
558 |
|
{ |
559 |
|
Assert(r.getNumChildren() == 3) |
560 |
|
<< "String rewriter error: LOOP has 2 children"; |
561 |
|
if (l == 0) |
562 |
|
{ |
563 |
|
// R{0,u} |
564 |
|
uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt(); |
565 |
|
for (unsigned len = s.size() - index_start; len >= 1; len--) |
566 |
|
{ |
567 |
|
cvc5::String t = s.substr(index_start, len); |
568 |
|
if (testConstStringInRegExp(t, 0, r[0])) |
569 |
|
{ |
570 |
|
if (len + index_start == s.size()) |
571 |
|
{ |
572 |
|
return true; |
573 |
|
} |
574 |
|
else |
575 |
|
{ |
576 |
|
Node num2 = nm->mkConst(cvc5::Rational(u - 1)); |
577 |
|
Node r2 = nm->mkNode(REGEXP_LOOP, r[0], r[1], num2); |
578 |
|
if (testConstStringInRegExp(s, index_start + len, r2)) |
579 |
|
{ |
580 |
|
return true; |
581 |
|
} |
582 |
|
} |
583 |
|
} |
584 |
|
} |
585 |
|
return false; |
586 |
|
} |
587 |
|
else |
588 |
|
{ |
589 |
|
// R{l,l} |
590 |
|
Assert(r[1] == r[2]) |
591 |
|
<< "String rewriter error: LOOP nums are not equal"; |
592 |
|
if (l > s.size() - index_start) |
593 |
|
{ |
594 |
|
if (testConstStringInRegExp(s, s.size(), r[0])) |
595 |
|
{ |
596 |
|
l = s.size() - index_start; |
597 |
|
} |
598 |
|
else |
599 |
|
{ |
600 |
|
return false; |
601 |
|
} |
602 |
|
} |
603 |
|
for (unsigned len = 1; len <= s.size() - index_start; len++) |
604 |
|
{ |
605 |
|
cvc5::String t = s.substr(index_start, len); |
606 |
|
if (testConstStringInRegExp(t, 0, r[0])) |
607 |
|
{ |
608 |
|
Node num2 = nm->mkConst(cvc5::Rational(l - 1)); |
609 |
|
Node r2 = nm->mkNode(REGEXP_LOOP, r[0], num2, num2); |
610 |
|
if (testConstStringInRegExp(s, index_start + len, r2)) |
611 |
|
{ |
612 |
|
return true; |
613 |
|
} |
614 |
|
} |
615 |
|
} |
616 |
|
return false; |
617 |
|
} |
618 |
|
} |
619 |
|
} |
620 |
341 |
case REGEXP_COMPLEMENT: |
621 |
|
{ |
622 |
341 |
return !testConstStringInRegExp(s, index_start, r[0]); |
623 |
|
break; |
624 |
|
} |
625 |
|
default: |
626 |
|
{ |
627 |
|
Assert(!utils::isRegExpKind(k)); |
628 |
|
return false; |
629 |
|
} |
630 |
|
} |
631 |
|
} |
632 |
|
|
633 |
256 |
bool RegExpEntail::hasEpsilonNode(TNode node) |
634 |
|
{ |
635 |
776 |
for (const Node& nc : node) |
636 |
|
{ |
637 |
558 |
if (nc.getKind() == STRING_TO_REGEXP && Word::isEmpty(nc[0])) |
638 |
|
{ |
639 |
38 |
return true; |
640 |
|
} |
641 |
|
} |
642 |
218 |
return false; |
643 |
|
} |
644 |
|
|
645 |
2903 |
Node RegExpEntail::getFixedLengthForRegexp(Node n) |
646 |
|
{ |
647 |
2903 |
NodeManager* nm = NodeManager::currentNM(); |
648 |
2903 |
if (n.getKind() == STRING_TO_REGEXP) |
649 |
|
{ |
650 |
1144 |
Node ret = nm->mkNode(STRING_LENGTH, n[0]); |
651 |
1117 |
ret = Rewriter::rewrite(ret); |
652 |
1117 |
if (ret.isConst()) |
653 |
|
{ |
654 |
1090 |
return ret; |
655 |
|
} |
656 |
|
} |
657 |
1786 |
else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE) |
658 |
|
{ |
659 |
350 |
return nm->mkConst(Rational(1)); |
660 |
|
} |
661 |
1436 |
else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER) |
662 |
|
{ |
663 |
982 |
Node ret; |
664 |
1127 |
for (const Node& nc : n) |
665 |
|
{ |
666 |
1588 |
Node flc = getFixedLengthForRegexp(nc); |
667 |
952 |
if (flc.isNull() || (!ret.isNull() && ret != flc)) |
668 |
|
{ |
669 |
316 |
return Node::null(); |
670 |
|
} |
671 |
636 |
else if (ret.isNull()) |
672 |
|
{ |
673 |
|
// first time |
674 |
437 |
ret = flc; |
675 |
|
} |
676 |
|
} |
677 |
175 |
return ret; |
678 |
|
} |
679 |
945 |
else if (n.getKind() == REGEXP_CONCAT) |
680 |
|
{ |
681 |
820 |
NodeBuilder nb(PLUS); |
682 |
780 |
for (const Node& nc : n) |
683 |
|
{ |
684 |
1150 |
Node flc = getFixedLengthForRegexp(nc); |
685 |
780 |
if (flc.isNull()) |
686 |
|
{ |
687 |
410 |
return flc; |
688 |
|
} |
689 |
370 |
nb << flc; |
690 |
|
} |
691 |
|
Node ret = nb.constructNode(); |
692 |
|
ret = Rewriter::rewrite(ret); |
693 |
|
return ret; |
694 |
|
} |
695 |
562 |
return Node::null(); |
696 |
|
} |
697 |
|
|
698 |
672 |
bool RegExpEntail::regExpIncludes(Node r1, Node r2) |
699 |
|
{ |
700 |
672 |
Assert(Rewriter::rewrite(r1) == r1); |
701 |
672 |
Assert(Rewriter::rewrite(r2) == r2); |
702 |
672 |
if (r1 == r2) |
703 |
|
{ |
704 |
16 |
return true; |
705 |
|
} |
706 |
|
|
707 |
|
// This method only works on a fragment of regular expressions |
708 |
656 |
if (!utils::isSimpleRegExp(r1) || !utils::isSimpleRegExp(r2)) |
709 |
|
{ |
710 |
494 |
return false; |
711 |
|
} |
712 |
162 |
NodeManager* nm = NodeManager::currentNM(); |
713 |
324 |
Node sigma = nm->mkNode(REGEXP_SIGMA, std::vector<Node>{}); |
714 |
324 |
Node sigmaStar = nm->mkNode(REGEXP_STAR, sigma); |
715 |
|
|
716 |
324 |
std::vector<Node> v1, v2; |
717 |
162 |
utils::getRegexpComponents(r1, v1); |
718 |
162 |
utils::getRegexpComponents(r2, v2); |
719 |
|
|
720 |
|
// In the following, we iterate over `r2` (the "includee") and try to |
721 |
|
// match it with `r1`. `idxs`/`newIdxs` keep track of all the possible |
722 |
|
// positions in `r1` that we could currently be at. |
723 |
324 |
std::unordered_set<size_t> newIdxs = {0}; |
724 |
324 |
std::unordered_set<size_t> idxs; |
725 |
950 |
for (const Node& n2 : v2) |
726 |
|
{ |
727 |
|
// Transfer elements from `newIdxs` to `idxs`. Out-of-bound indices are |
728 |
|
// removed and for (re.* re.allchar), we additionally include the option of |
729 |
|
// skipping it. Indices must be smaller than the size of `v1`. |
730 |
844 |
idxs.clear(); |
731 |
2756 |
for (size_t idx : newIdxs) |
732 |
|
{ |
733 |
1912 |
if (idx < v1.size()) |
734 |
|
{ |
735 |
1864 |
idxs.insert(idx); |
736 |
1864 |
if (idx + 1 < v1.size() && v1[idx] == sigmaStar) |
737 |
|
{ |
738 |
1078 |
Assert(idx + 1 == v1.size() || v1[idx + 1] != sigmaStar); |
739 |
1078 |
idxs.insert(idx + 1); |
740 |
|
} |
741 |
|
} |
742 |
|
} |
743 |
844 |
newIdxs.clear(); |
744 |
|
|
745 |
844 |
if (n2.getKind() == STRING_TO_REGEXP || n2 == sigma) |
746 |
|
{ |
747 |
608 |
Assert(n2 == sigma |
748 |
|
|| (n2[0].isConst() && n2[0].getConst<String>().size() == 1)); |
749 |
2792 |
for (size_t idx : idxs) |
750 |
|
{ |
751 |
2184 |
if (v1[idx] == sigma || v1[idx] == n2) |
752 |
|
{ |
753 |
|
// Given a character or an re.allchar in `r2`, we can either |
754 |
|
// match it with a corresponding character in `r1` or an |
755 |
|
// re.allchar |
756 |
662 |
newIdxs.insert(idx + 1); |
757 |
|
} |
758 |
|
} |
759 |
|
} |
760 |
|
|
761 |
3786 |
for (size_t idx : idxs) |
762 |
|
{ |
763 |
2942 |
if (v1[idx] == sigmaStar) |
764 |
|
{ |
765 |
|
// (re.* re.allchar) can match an arbitrary amount of `r2` |
766 |
1188 |
newIdxs.insert(idx); |
767 |
|
} |
768 |
1754 |
else if (utils::isUnboundedWildcard(v1, idx)) |
769 |
|
{ |
770 |
|
// If a series of re.allchar is followed by (re.* re.allchar), we |
771 |
|
// can decide not to "waste" the re.allchar because the order of |
772 |
|
// the two wildcards is not observable (i.e. it does not change |
773 |
|
// the sequences matched by the regular expression) |
774 |
444 |
newIdxs.insert(idx); |
775 |
|
} |
776 |
|
} |
777 |
|
|
778 |
844 |
if (newIdxs.empty()) |
779 |
|
{ |
780 |
|
// If there are no candidates, we can't match the remainder of r2 |
781 |
56 |
return false; |
782 |
|
} |
783 |
|
} |
784 |
|
|
785 |
|
// We have processed all of `r2`. We are now looking if there was also a |
786 |
|
// path to the end in `r1`. This makes sure that we don't have leftover |
787 |
|
// bits in `r1` that don't match anything in `r2`. |
788 |
106 |
bool result = false; |
789 |
300 |
for (size_t idx : newIdxs) |
790 |
|
{ |
791 |
252 |
if (idx == v1.size() || (idx == v1.size() - 1 && v1[idx] == sigmaStar)) |
792 |
|
{ |
793 |
58 |
result = true; |
794 |
58 |
break; |
795 |
|
} |
796 |
|
} |
797 |
|
|
798 |
106 |
return result; |
799 |
|
} |
800 |
|
|
801 |
|
} // namespace strings |
802 |
|
} // namespace theory |
803 |
29340 |
} // namespace cvc5 |