1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Aina Niemetz |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of entailment tests involving regular expressions. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/regexp_entail.h" |
17 |
|
|
18 |
|
#include "theory/rewriter.h" |
19 |
|
#include "theory/strings/theory_strings_utils.h" |
20 |
|
#include "theory/strings/word.h" |
21 |
|
|
22 |
|
using namespace std; |
23 |
|
using namespace cvc5::kind; |
24 |
|
|
25 |
|
namespace cvc5 { |
26 |
|
namespace theory { |
27 |
|
namespace strings { |
28 |
|
|
29 |
8185 |
Node RegExpEntail::simpleRegexpConsume(std::vector<Node>& mchildren, |
30 |
|
std::vector<Node>& children, |
31 |
|
int dir) |
32 |
|
{ |
33 |
16370 |
Trace("regexp-ext-rewrite-debug") |
34 |
8185 |
<< "Simple reg exp consume, dir=" << dir << ":" << std::endl; |
35 |
16370 |
Trace("regexp-ext-rewrite-debug") |
36 |
8185 |
<< " mchildren : " << mchildren << std::endl; |
37 |
8185 |
Trace("regexp-ext-rewrite-debug") << " children : " << children << std::endl; |
38 |
8185 |
NodeManager* nm = NodeManager::currentNM(); |
39 |
8185 |
unsigned tmin = dir < 0 ? 0 : dir; |
40 |
8185 |
unsigned tmax = dir < 0 ? 1 : dir; |
41 |
|
// try to remove off front and back |
42 |
23390 |
for (unsigned t = 0; t < 2; t++) |
43 |
|
{ |
44 |
15990 |
if (tmin <= t && t <= tmax) |
45 |
|
{ |
46 |
10784 |
bool do_next = true; |
47 |
32308 |
while (!children.empty() && !mchildren.empty() && do_next) |
48 |
|
{ |
49 |
11547 |
do_next = false; |
50 |
22309 |
Node xc = mchildren[mchildren.size() - 1]; |
51 |
22309 |
Node rc = children[children.size() - 1]; |
52 |
23094 |
Trace("regexp-ext-rewrite-debug") |
53 |
11547 |
<< "* " << xc << " in " << rc << std::endl; |
54 |
11547 |
Assert(rc.getKind() != REGEXP_CONCAT); |
55 |
11547 |
Assert(xc.getKind() != STRING_CONCAT); |
56 |
11547 |
if (rc.getKind() == STRING_TO_REGEXP) |
57 |
|
{ |
58 |
4130 |
if (xc == rc[0]) |
59 |
|
{ |
60 |
352 |
children.pop_back(); |
61 |
352 |
mchildren.pop_back(); |
62 |
352 |
do_next = true; |
63 |
352 |
Trace("regexp-ext-rewrite-debug") << "- strip equal" << std::endl; |
64 |
|
} |
65 |
3778 |
else if (rc[0].isConst() && Word::isEmpty(rc[0])) |
66 |
|
{ |
67 |
116 |
Trace("regexp-ext-rewrite-debug") |
68 |
58 |
<< "- ignore empty RE" << std::endl; |
69 |
|
// ignore and continue |
70 |
58 |
children.pop_back(); |
71 |
58 |
do_next = true; |
72 |
|
} |
73 |
3720 |
else if (xc.isConst() && rc[0].isConst()) |
74 |
|
{ |
75 |
|
// split the constant |
76 |
|
size_t index; |
77 |
804 |
Node s = Word::splitConstant(xc, rc[0], index, t == 0); |
78 |
1348 |
Trace("regexp-ext-rewrite-debug") |
79 |
1348 |
<< "- CRE: Regexp const split : " << xc << " " << rc[0] |
80 |
674 |
<< " -> " << s << " " << index << " " << t << std::endl; |
81 |
674 |
if (s.isNull()) |
82 |
|
{ |
83 |
1088 |
Trace("regexp-ext-rewrite-debug") |
84 |
544 |
<< "...return false" << std::endl; |
85 |
544 |
return nm->mkConst(false); |
86 |
|
} |
87 |
|
else |
88 |
|
{ |
89 |
260 |
Trace("regexp-ext-rewrite-debug") |
90 |
130 |
<< "- strip equal const" << std::endl; |
91 |
130 |
children.pop_back(); |
92 |
130 |
mchildren.pop_back(); |
93 |
130 |
if (index == 0) |
94 |
|
{ |
95 |
116 |
mchildren.push_back(s); |
96 |
|
} |
97 |
|
else |
98 |
|
{ |
99 |
14 |
children.push_back(nm->mkNode(STRING_TO_REGEXP, s)); |
100 |
|
} |
101 |
|
} |
102 |
130 |
Trace("regexp-ext-rewrite-debug") << "- split const" << std::endl; |
103 |
130 |
do_next = true; |
104 |
|
} |
105 |
|
} |
106 |
7417 |
else if (xc.isConst()) |
107 |
|
{ |
108 |
|
// check for constants |
109 |
3683 |
cvc5::String s = xc.getConst<String>(); |
110 |
1962 |
if (Word::isEmpty(xc)) |
111 |
|
{ |
112 |
46 |
Trace("regexp-ext-rewrite-debug") << "- ignore empty" << std::endl; |
113 |
|
// ignore and continue |
114 |
46 |
mchildren.pop_back(); |
115 |
46 |
do_next = true; |
116 |
|
} |
117 |
1916 |
else if (rc.getKind() == REGEXP_RANGE || rc.getKind() == REGEXP_SIGMA) |
118 |
|
{ |
119 |
825 |
if (!isConstRegExp(rc)) |
120 |
|
{ |
121 |
|
// if a non-standard re.range term, abort |
122 |
225 |
return Node::null(); |
123 |
|
} |
124 |
1425 |
std::vector<unsigned> ssVec; |
125 |
825 |
ssVec.push_back(t == 0 ? s.back() : s.front()); |
126 |
1425 |
cvc5::String ss(ssVec); |
127 |
825 |
if (testConstStringInRegExp(ss, 0, rc)) |
128 |
|
{ |
129 |
|
// strip off one character |
130 |
600 |
mchildren.pop_back(); |
131 |
600 |
if (s.size() > 1) |
132 |
|
{ |
133 |
409 |
if (t == 0) |
134 |
|
{ |
135 |
260 |
mchildren.push_back(nm->mkConst(s.substr(0, s.size() - 1))); |
136 |
|
} |
137 |
|
else |
138 |
|
{ |
139 |
149 |
mchildren.push_back(nm->mkConst(s.substr(1))); |
140 |
|
} |
141 |
|
} |
142 |
600 |
children.pop_back(); |
143 |
600 |
do_next = true; |
144 |
|
} |
145 |
|
else |
146 |
|
{ |
147 |
450 |
Trace("regexp-ext-rewrite-debug") |
148 |
225 |
<< "...return false" << std::endl; |
149 |
225 |
return nm->mkConst(false); |
150 |
|
} |
151 |
|
} |
152 |
1091 |
else if (rc.getKind() == REGEXP_INTER || rc.getKind() == REGEXP_UNION) |
153 |
|
{ |
154 |
|
// see if any/each child does not work |
155 |
402 |
bool result_valid = true; |
156 |
788 |
Node result; |
157 |
788 |
Node emp_s = nm->mkConst(String("")); |
158 |
1238 |
for (unsigned i = 0; i < rc.getNumChildren(); i++) |
159 |
|
{ |
160 |
1672 |
std::vector<Node> mchildren_s; |
161 |
1672 |
std::vector<Node> children_s; |
162 |
836 |
mchildren_s.push_back(xc); |
163 |
836 |
utils::getConcat(rc[i], children_s); |
164 |
836 |
Trace("regexp-ext-rewrite-debug") << push; |
165 |
1672 |
Node ret = simpleRegexpConsume(mchildren_s, children_s, t); |
166 |
836 |
Trace("regexp-ext-rewrite-debug") << pop; |
167 |
836 |
if (!ret.isNull()) |
168 |
|
{ |
169 |
|
// one conjunct cannot be satisfied, return false |
170 |
386 |
if (rc.getKind() == REGEXP_INTER) |
171 |
|
{ |
172 |
|
Trace("regexp-ext-rewrite-debug") |
173 |
|
<< "...return " << ret << std::endl; |
174 |
|
return ret; |
175 |
|
} |
176 |
|
} |
177 |
|
else |
178 |
|
{ |
179 |
450 |
if (children_s.empty()) |
180 |
|
{ |
181 |
|
// if we were able to fully consume, store the result |
182 |
316 |
Assert(mchildren_s.size() <= 1); |
183 |
316 |
if (mchildren_s.empty()) |
184 |
|
{ |
185 |
84 |
mchildren_s.push_back(emp_s); |
186 |
|
} |
187 |
316 |
if (result.isNull()) |
188 |
|
{ |
189 |
304 |
result = mchildren_s[0]; |
190 |
|
} |
191 |
12 |
else if (result != mchildren_s[0]) |
192 |
|
{ |
193 |
12 |
result_valid = false; |
194 |
|
} |
195 |
|
} |
196 |
|
else |
197 |
|
{ |
198 |
134 |
result_valid = false; |
199 |
|
} |
200 |
|
} |
201 |
|
} |
202 |
402 |
if (result_valid) |
203 |
|
{ |
204 |
274 |
if (result.isNull()) |
205 |
|
{ |
206 |
|
// all disjuncts cannot be satisfied, return false |
207 |
16 |
Assert(rc.getKind() == REGEXP_UNION); |
208 |
32 |
Trace("regexp-ext-rewrite-debug") |
209 |
16 |
<< "...return false" << std::endl; |
210 |
16 |
return nm->mkConst(false); |
211 |
|
} |
212 |
|
else |
213 |
|
{ |
214 |
516 |
Trace("regexp-ext-rewrite-debug") |
215 |
258 |
<< "- same result, try again, children now " << children |
216 |
258 |
<< std::endl; |
217 |
|
// all branches led to the same result |
218 |
258 |
children.pop_back(); |
219 |
258 |
mchildren.pop_back(); |
220 |
258 |
if (result != emp_s) |
221 |
|
{ |
222 |
194 |
mchildren.push_back(result); |
223 |
|
} |
224 |
258 |
do_next = true; |
225 |
|
} |
226 |
|
} |
227 |
|
} |
228 |
689 |
else if (rc.getKind() == REGEXP_STAR) |
229 |
|
{ |
230 |
|
// check if there is no way that this star can be unrolled even once |
231 |
1238 |
std::vector<Node> mchildren_s; |
232 |
619 |
mchildren_s.insert( |
233 |
1238 |
mchildren_s.end(), mchildren.begin(), mchildren.end()); |
234 |
619 |
if (t == 1) |
235 |
|
{ |
236 |
267 |
std::reverse(mchildren_s.begin(), mchildren_s.end()); |
237 |
|
} |
238 |
1238 |
std::vector<Node> children_s; |
239 |
619 |
utils::getConcat(rc[0], children_s); |
240 |
1238 |
Trace("regexp-ext-rewrite-debug") |
241 |
619 |
<< "- recursive call on body of star" << std::endl; |
242 |
619 |
Trace("regexp-ext-rewrite-debug") << push; |
243 |
1238 |
Node ret = simpleRegexpConsume(mchildren_s, children_s, t); |
244 |
619 |
Trace("regexp-ext-rewrite-debug") << pop; |
245 |
619 |
if (!ret.isNull()) |
246 |
|
{ |
247 |
76 |
Trace("regexp-ext-rewrite-debug") |
248 |
38 |
<< "- CRE : regexp star infeasable " << xc << " " << rc |
249 |
38 |
<< std::endl; |
250 |
38 |
children.pop_back(); |
251 |
38 |
if (!children.empty()) |
252 |
|
{ |
253 |
24 |
Trace("regexp-ext-rewrite-debug") << "- continue" << std::endl; |
254 |
24 |
do_next = true; |
255 |
|
} |
256 |
|
} |
257 |
|
else |
258 |
|
{ |
259 |
581 |
if (children_s.empty()) |
260 |
|
{ |
261 |
|
// Check if beyond this, we hit a conflict. In this case, we |
262 |
|
// must repeat. Notice that we do not treat the case where |
263 |
|
// there are no more strings to consume as a failure, since |
264 |
|
// we may be within a recursive call, see issue #5510. |
265 |
518 |
bool can_skip = true; |
266 |
518 |
if (children.size() > 1) |
267 |
|
{ |
268 |
954 |
std::vector<Node> mchildren_ss; |
269 |
477 |
mchildren_ss.insert( |
270 |
954 |
mchildren_ss.end(), mchildren.begin(), mchildren.end()); |
271 |
954 |
std::vector<Node> children_ss; |
272 |
477 |
children_ss.insert( |
273 |
954 |
children_ss.end(), children.begin(), children.end() - 1); |
274 |
477 |
if (t == 1) |
275 |
|
{ |
276 |
192 |
std::reverse(mchildren_ss.begin(), mchildren_ss.end()); |
277 |
192 |
std::reverse(children_ss.begin(), children_ss.end()); |
278 |
|
} |
279 |
954 |
Trace("regexp-ext-rewrite-debug") |
280 |
477 |
<< "- recursive call required repeat star" << std::endl; |
281 |
477 |
Trace("regexp-ext-rewrite-debug") << push; |
282 |
954 |
Node rets = simpleRegexpConsume(mchildren_ss, children_ss, t); |
283 |
477 |
Trace("regexp-ext-rewrite-debug") << pop; |
284 |
477 |
if (!rets.isNull()) |
285 |
|
{ |
286 |
335 |
can_skip = false; |
287 |
|
} |
288 |
|
} |
289 |
518 |
if (!can_skip) |
290 |
|
{ |
291 |
670 |
TypeNode stype = nm->stringType(); |
292 |
670 |
Node prev = utils::mkConcat(mchildren, stype); |
293 |
670 |
Trace("regexp-ext-rewrite-debug") |
294 |
335 |
<< "- can't skip" << std::endl; |
295 |
|
// take the result of fully consuming once |
296 |
335 |
if (t == 1) |
297 |
|
{ |
298 |
140 |
std::reverse(mchildren_s.begin(), mchildren_s.end()); |
299 |
|
} |
300 |
335 |
mchildren.clear(); |
301 |
335 |
mchildren.insert( |
302 |
670 |
mchildren.end(), mchildren_s.begin(), mchildren_s.end()); |
303 |
670 |
Node curr = utils::mkConcat(mchildren, stype); |
304 |
335 |
do_next = (prev != curr); |
305 |
670 |
Trace("regexp-ext-rewrite-debug") |
306 |
335 |
<< "- do_next = " << do_next << std::endl; |
307 |
|
} |
308 |
|
else |
309 |
|
{ |
310 |
366 |
Trace("regexp-ext-rewrite-debug") |
311 |
183 |
<< "- can skip " << rc << " from " << xc << std::endl; |
312 |
|
} |
313 |
|
} |
314 |
|
} |
315 |
|
} |
316 |
|
} |
317 |
10762 |
if (!do_next) |
318 |
|
{ |
319 |
17918 |
Trace("regexp-ext-rewrite") |
320 |
8959 |
<< "- cannot consume : " << xc << " " << rc << std::endl; |
321 |
|
} |
322 |
|
} |
323 |
|
} |
324 |
15205 |
if (dir != 0) |
325 |
|
{ |
326 |
10315 |
std::reverse(children.begin(), children.end()); |
327 |
10315 |
std::reverse(mchildren.begin(), mchildren.end()); |
328 |
|
} |
329 |
|
} |
330 |
7400 |
Trace("regexp-ext-rewrite-debug") << "...finished, return null" << std::endl; |
331 |
7400 |
return Node::null(); |
332 |
|
} |
333 |
|
|
334 |
3225 |
bool RegExpEntail::isConstRegExp(TNode t) |
335 |
|
{ |
336 |
6450 |
std::unordered_set<TNode> visited; |
337 |
6450 |
std::vector<TNode> visit; |
338 |
6450 |
TNode cur; |
339 |
3225 |
visit.push_back(t); |
340 |
3889 |
do |
341 |
|
{ |
342 |
7114 |
cur = visit.back(); |
343 |
7114 |
visit.pop_back(); |
344 |
7114 |
if (visited.find(cur) == visited.end()) |
345 |
|
{ |
346 |
6360 |
visited.insert(cur); |
347 |
6360 |
Kind ck = cur.getKind(); |
348 |
6360 |
if (ck == STRING_TO_REGEXP) |
349 |
|
{ |
350 |
1667 |
if (!cur[0].isConst()) |
351 |
|
{ |
352 |
127 |
return false; |
353 |
|
} |
354 |
|
} |
355 |
4693 |
else if (ck == REGEXP_RANGE) |
356 |
|
{ |
357 |
4224 |
for (const Node& cn : cur) |
358 |
|
{ |
359 |
2816 |
if (!cn.isConst() || cn.getConst<String>().size() != 1) |
360 |
|
{ |
361 |
|
return false; |
362 |
|
} |
363 |
|
} |
364 |
|
} |
365 |
3285 |
else if (cur.isVar()) |
366 |
|
{ |
367 |
25 |
return false; |
368 |
|
} |
369 |
|
else |
370 |
|
{ |
371 |
7231 |
for (const Node& cn : cur) |
372 |
|
{ |
373 |
3971 |
visit.push_back(cn); |
374 |
|
} |
375 |
|
} |
376 |
|
} |
377 |
6962 |
} while (!visit.empty()); |
378 |
3073 |
return true; |
379 |
|
} |
380 |
|
|
381 |
848475 |
bool RegExpEntail::testConstStringInRegExp(cvc5::String& s, |
382 |
|
unsigned index_start, |
383 |
|
TNode r) |
384 |
|
{ |
385 |
848475 |
Assert(index_start <= s.size()); |
386 |
1696950 |
Trace("regexp-debug") << "Checking " << s << " in " << r << ", starting at " |
387 |
848475 |
<< index_start << std::endl; |
388 |
848475 |
Assert(!r.isVar()); |
389 |
848475 |
Kind k = r.getKind(); |
390 |
848475 |
switch (k) |
391 |
|
{ |
392 |
9244 |
case STRING_TO_REGEXP: |
393 |
|
{ |
394 |
18488 |
cvc5::String s2 = s.substr(index_start, s.size() - index_start); |
395 |
9244 |
if (r[0].isConst()) |
396 |
|
{ |
397 |
9244 |
return (s2 == r[0].getConst<String>()); |
398 |
|
} |
399 |
|
Assert(false) << "RegExp contains variables"; |
400 |
|
return false; |
401 |
|
} |
402 |
1167 |
case REGEXP_CONCAT: |
403 |
|
{ |
404 |
1167 |
if (s.size() != index_start) |
405 |
|
{ |
406 |
1856 |
std::vector<int> vec_k(r.getNumChildren(), -1); |
407 |
928 |
int start = 0; |
408 |
928 |
int left = (int)s.size() - index_start; |
409 |
928 |
int i = 0; |
410 |
10396 |
while (i < (int)r.getNumChildren()) |
411 |
|
{ |
412 |
5662 |
bool flag = true; |
413 |
5662 |
if (i == (int)r.getNumChildren() - 1) |
414 |
|
{ |
415 |
1753 |
if (testConstStringInRegExp(s, index_start + start, r[i])) |
416 |
|
{ |
417 |
420 |
return true; |
418 |
|
} |
419 |
|
} |
420 |
3909 |
else if (i == -1) |
421 |
|
{ |
422 |
508 |
return false; |
423 |
|
} |
424 |
|
else |
425 |
|
{ |
426 |
11951 |
for (vec_k[i] = vec_k[i] + 1; vec_k[i] <= left; ++vec_k[i]) |
427 |
|
{ |
428 |
19519 |
cvc5::String t = s.substr(index_start + start, vec_k[i]); |
429 |
10969 |
if (testConstStringInRegExp(t, 0, r[i])) |
430 |
|
{ |
431 |
2419 |
start += vec_k[i]; |
432 |
2419 |
left -= vec_k[i]; |
433 |
2419 |
flag = false; |
434 |
2419 |
++i; |
435 |
2419 |
vec_k[i] = -1; |
436 |
2419 |
break; |
437 |
|
} |
438 |
|
} |
439 |
|
} |
440 |
|
|
441 |
4734 |
if (flag) |
442 |
|
{ |
443 |
2315 |
--i; |
444 |
2315 |
if (i >= 0) |
445 |
|
{ |
446 |
1807 |
start -= vec_k[i]; |
447 |
1807 |
left += vec_k[i]; |
448 |
|
} |
449 |
|
} |
450 |
|
} |
451 |
|
return false; |
452 |
|
} |
453 |
|
else |
454 |
|
{ |
455 |
258 |
for (unsigned i = 0; i < r.getNumChildren(); ++i) |
456 |
|
{ |
457 |
258 |
if (!testConstStringInRegExp(s, index_start, r[i])) |
458 |
|
{ |
459 |
239 |
return false; |
460 |
|
} |
461 |
|
} |
462 |
|
return true; |
463 |
|
} |
464 |
|
} |
465 |
271824 |
case REGEXP_UNION: |
466 |
|
{ |
467 |
789200 |
for (unsigned i = 0; i < r.getNumChildren(); ++i) |
468 |
|
{ |
469 |
536198 |
if (testConstStringInRegExp(s, index_start, r[i])) |
470 |
|
{ |
471 |
18822 |
return true; |
472 |
|
} |
473 |
|
} |
474 |
253002 |
return false; |
475 |
|
} |
476 |
207 |
case REGEXP_INTER: |
477 |
|
{ |
478 |
253 |
for (unsigned i = 0; i < r.getNumChildren(); ++i) |
479 |
|
{ |
480 |
232 |
if (!testConstStringInRegExp(s, index_start, r[i])) |
481 |
|
{ |
482 |
186 |
return false; |
483 |
|
} |
484 |
|
} |
485 |
21 |
return true; |
486 |
|
} |
487 |
23406 |
case REGEXP_STAR: |
488 |
|
{ |
489 |
23406 |
if (s.size() != index_start) |
490 |
|
{ |
491 |
293380 |
for (unsigned i = s.size() - index_start; i > 0; --i) |
492 |
|
{ |
493 |
547540 |
cvc5::String t = s.substr(index_start, i); |
494 |
276935 |
if (testConstStringInRegExp(t, 0, r[0])) |
495 |
|
{ |
496 |
41836 |
if (index_start + i == s.size() |
497 |
41836 |
|| testConstStringInRegExp(s, index_start + i, r)) |
498 |
|
{ |
499 |
6330 |
return true; |
500 |
|
} |
501 |
|
} |
502 |
|
} |
503 |
16445 |
return false; |
504 |
|
} |
505 |
|
else |
506 |
|
{ |
507 |
631 |
return true; |
508 |
|
} |
509 |
|
} |
510 |
10 |
case REGEXP_EMPTY: |
511 |
|
{ |
512 |
10 |
return false; |
513 |
|
} |
514 |
3653 |
case REGEXP_SIGMA: |
515 |
|
{ |
516 |
3653 |
if (s.size() == index_start + 1) |
517 |
|
{ |
518 |
1403 |
return true; |
519 |
|
} |
520 |
|
else |
521 |
|
{ |
522 |
2250 |
return false; |
523 |
|
} |
524 |
|
} |
525 |
538892 |
case REGEXP_RANGE: |
526 |
|
{ |
527 |
538892 |
if (s.size() == index_start + 1) |
528 |
|
{ |
529 |
35131 |
unsigned a = r[0].getConst<String>().front(); |
530 |
35131 |
unsigned b = r[1].getConst<String>().front(); |
531 |
35131 |
unsigned c = s.back(); |
532 |
35131 |
return (a <= c && c <= b); |
533 |
|
} |
534 |
|
else |
535 |
|
{ |
536 |
503761 |
return false; |
537 |
|
} |
538 |
|
} |
539 |
|
case REGEXP_LOOP: |
540 |
|
{ |
541 |
|
NodeManager* nm = NodeManager::currentNM(); |
542 |
|
uint32_t l = r[1].getConst<Rational>().getNumerator().toUnsignedInt(); |
543 |
|
if (s.size() == index_start) |
544 |
|
{ |
545 |
|
return l == 0 ? true : testConstStringInRegExp(s, index_start, r[0]); |
546 |
|
} |
547 |
|
else if (l == 0 && r[1] == r[2]) |
548 |
|
{ |
549 |
|
return false; |
550 |
|
} |
551 |
|
else |
552 |
|
{ |
553 |
|
Assert(r.getNumChildren() == 3) |
554 |
|
<< "String rewriter error: LOOP has 2 children"; |
555 |
|
if (l == 0) |
556 |
|
{ |
557 |
|
// R{0,u} |
558 |
|
uint32_t u = r[2].getConst<Rational>().getNumerator().toUnsignedInt(); |
559 |
|
for (unsigned len = s.size() - index_start; len >= 1; len--) |
560 |
|
{ |
561 |
|
cvc5::String t = s.substr(index_start, len); |
562 |
|
if (testConstStringInRegExp(t, 0, r[0])) |
563 |
|
{ |
564 |
|
if (len + index_start == s.size()) |
565 |
|
{ |
566 |
|
return true; |
567 |
|
} |
568 |
|
else |
569 |
|
{ |
570 |
|
Node num2 = nm->mkConst(cvc5::Rational(u - 1)); |
571 |
|
Node r2 = nm->mkNode(REGEXP_LOOP, r[0], r[1], num2); |
572 |
|
if (testConstStringInRegExp(s, index_start + len, r2)) |
573 |
|
{ |
574 |
|
return true; |
575 |
|
} |
576 |
|
} |
577 |
|
} |
578 |
|
} |
579 |
|
return false; |
580 |
|
} |
581 |
|
else |
582 |
|
{ |
583 |
|
// R{l,l} |
584 |
|
Assert(r[1] == r[2]) |
585 |
|
<< "String rewriter error: LOOP nums are not equal"; |
586 |
|
if (l > s.size() - index_start) |
587 |
|
{ |
588 |
|
if (testConstStringInRegExp(s, s.size(), r[0])) |
589 |
|
{ |
590 |
|
l = s.size() - index_start; |
591 |
|
} |
592 |
|
else |
593 |
|
{ |
594 |
|
return false; |
595 |
|
} |
596 |
|
} |
597 |
|
for (unsigned len = 1; len <= s.size() - index_start; len++) |
598 |
|
{ |
599 |
|
cvc5::String t = s.substr(index_start, len); |
600 |
|
if (testConstStringInRegExp(t, 0, r[0])) |
601 |
|
{ |
602 |
|
Node num2 = nm->mkConst(cvc5::Rational(l - 1)); |
603 |
|
Node r2 = nm->mkNode(REGEXP_LOOP, r[0], num2, num2); |
604 |
|
if (testConstStringInRegExp(s, index_start + len, r2)) |
605 |
|
{ |
606 |
|
return true; |
607 |
|
} |
608 |
|
} |
609 |
|
} |
610 |
|
return false; |
611 |
|
} |
612 |
|
} |
613 |
|
} |
614 |
72 |
case REGEXP_COMPLEMENT: |
615 |
|
{ |
616 |
72 |
return !testConstStringInRegExp(s, index_start, r[0]); |
617 |
|
break; |
618 |
|
} |
619 |
|
default: |
620 |
|
{ |
621 |
|
Assert(!utils::isRegExpKind(k)); |
622 |
|
return false; |
623 |
|
} |
624 |
|
} |
625 |
|
} |
626 |
|
|
627 |
168 |
bool RegExpEntail::hasEpsilonNode(TNode node) |
628 |
|
{ |
629 |
520 |
for (const Node& nc : node) |
630 |
|
{ |
631 |
374 |
if (nc.getKind() == STRING_TO_REGEXP && Word::isEmpty(nc[0])) |
632 |
|
{ |
633 |
22 |
return true; |
634 |
|
} |
635 |
|
} |
636 |
146 |
return false; |
637 |
|
} |
638 |
|
|
639 |
1049 |
Node RegExpEntail::getFixedLengthForRegexp(Node n) |
640 |
|
{ |
641 |
1049 |
NodeManager* nm = NodeManager::currentNM(); |
642 |
1049 |
if (n.getKind() == STRING_TO_REGEXP) |
643 |
|
{ |
644 |
297 |
Node ret = nm->mkNode(STRING_LENGTH, n[0]); |
645 |
297 |
ret = Rewriter::rewrite(ret); |
646 |
297 |
if (ret.isConst()) |
647 |
|
{ |
648 |
297 |
return ret; |
649 |
|
} |
650 |
|
} |
651 |
752 |
else if (n.getKind() == REGEXP_SIGMA || n.getKind() == REGEXP_RANGE) |
652 |
|
{ |
653 |
282 |
return nm->mkConst(Rational(1)); |
654 |
|
} |
655 |
470 |
else if (n.getKind() == REGEXP_UNION || n.getKind() == REGEXP_INTER) |
656 |
|
{ |
657 |
328 |
Node ret; |
658 |
373 |
for (const Node& nc : n) |
659 |
|
{ |
660 |
495 |
Node flc = getFixedLengthForRegexp(nc); |
661 |
286 |
if (flc.isNull() || (!ret.isNull() && ret != flc)) |
662 |
|
{ |
663 |
77 |
return Node::null(); |
664 |
|
} |
665 |
209 |
else if (ret.isNull()) |
666 |
|
{ |
667 |
|
// first time |
668 |
122 |
ret = flc; |
669 |
|
} |
670 |
|
} |
671 |
87 |
return ret; |
672 |
|
} |
673 |
306 |
else if (n.getKind() == REGEXP_CONCAT) |
674 |
|
{ |
675 |
244 |
NodeBuilder nb(PLUS); |
676 |
206 |
for (const Node& nc : n) |
677 |
|
{ |
678 |
290 |
Node flc = getFixedLengthForRegexp(nc); |
679 |
206 |
if (flc.isNull()) |
680 |
|
{ |
681 |
122 |
return flc; |
682 |
|
} |
683 |
84 |
nb << flc; |
684 |
|
} |
685 |
|
Node ret = nb.constructNode(); |
686 |
|
ret = Rewriter::rewrite(ret); |
687 |
|
return ret; |
688 |
|
} |
689 |
184 |
return Node::null(); |
690 |
|
} |
691 |
|
|
692 |
461 |
bool RegExpEntail::regExpIncludes(Node r1, Node r2) |
693 |
|
{ |
694 |
461 |
Assert(Rewriter::rewrite(r1) == r1); |
695 |
461 |
Assert(Rewriter::rewrite(r2) == r2); |
696 |
461 |
if (r1 == r2) |
697 |
|
{ |
698 |
11 |
return true; |
699 |
|
} |
700 |
|
|
701 |
|
// This method only works on a fragment of regular expressions |
702 |
450 |
if (!utils::isSimpleRegExp(r1) || !utils::isSimpleRegExp(r2)) |
703 |
|
{ |
704 |
312 |
return false; |
705 |
|
} |
706 |
138 |
NodeManager* nm = NodeManager::currentNM(); |
707 |
276 |
Node sigma = nm->mkNode(REGEXP_SIGMA, std::vector<Node>{}); |
708 |
276 |
Node sigmaStar = nm->mkNode(REGEXP_STAR, sigma); |
709 |
|
|
710 |
276 |
std::vector<Node> v1, v2; |
711 |
138 |
utils::getRegexpComponents(r1, v1); |
712 |
138 |
utils::getRegexpComponents(r2, v2); |
713 |
|
|
714 |
|
// In the following, we iterate over `r2` (the "includee") and try to |
715 |
|
// match it with `r1`. `idxs`/`newIdxs` keep track of all the possible |
716 |
|
// positions in `r1` that we could currently be at. |
717 |
276 |
std::unordered_set<size_t> newIdxs = {0}; |
718 |
276 |
std::unordered_set<size_t> idxs; |
719 |
906 |
for (const Node& n2 : v2) |
720 |
|
{ |
721 |
|
// Transfer elements from `newIdxs` to `idxs`. Out-of-bound indices are |
722 |
|
// removed and for (re.* re.allchar), we additionally include the option of |
723 |
|
// skipping it. Indices must be smaller than the size of `v1`. |
724 |
804 |
idxs.clear(); |
725 |
2644 |
for (size_t idx : newIdxs) |
726 |
|
{ |
727 |
1840 |
if (idx < v1.size()) |
728 |
|
{ |
729 |
1820 |
idxs.insert(idx); |
730 |
1820 |
if (idx + 1 < v1.size() && v1[idx] == sigmaStar) |
731 |
|
{ |
732 |
1038 |
Assert(idx + 1 == v1.size() || v1[idx + 1] != sigmaStar); |
733 |
1038 |
idxs.insert(idx + 1); |
734 |
|
} |
735 |
|
} |
736 |
|
} |
737 |
804 |
newIdxs.clear(); |
738 |
|
|
739 |
804 |
if (n2.getKind() == STRING_TO_REGEXP || n2 == sigma) |
740 |
|
{ |
741 |
596 |
Assert(n2 == sigma |
742 |
|
|| (n2[0].isConst() && n2[0].getConst<String>().size() == 1)); |
743 |
2736 |
for (size_t idx : idxs) |
744 |
|
{ |
745 |
2140 |
if (v1[idx] == sigma || v1[idx] == n2) |
746 |
|
{ |
747 |
|
// Given a character or an re.allchar in `r2`, we can either |
748 |
|
// match it with a corresponding character in `r1` or an |
749 |
|
// re.allchar |
750 |
646 |
newIdxs.insert(idx + 1); |
751 |
|
} |
752 |
|
} |
753 |
|
} |
754 |
|
|
755 |
3662 |
for (size_t idx : idxs) |
756 |
|
{ |
757 |
2858 |
if (v1[idx] == sigmaStar) |
758 |
|
{ |
759 |
|
// (re.* re.allchar) can match an arbitrary amount of `r2` |
760 |
1144 |
newIdxs.insert(idx); |
761 |
|
} |
762 |
1714 |
else if (utils::isUnboundedWildcard(v1, idx)) |
763 |
|
{ |
764 |
|
// If a series of re.allchar is followed by (re.* re.allchar), we |
765 |
|
// can decide not to "waste" the re.allchar because the order of |
766 |
|
// the two wildcards is not observable (i.e. it does not change |
767 |
|
// the sequences matched by the regular expression) |
768 |
444 |
newIdxs.insert(idx); |
769 |
|
} |
770 |
|
} |
771 |
|
|
772 |
804 |
if (newIdxs.empty()) |
773 |
|
{ |
774 |
|
// If there are no candidates, we can't match the remainder of r2 |
775 |
36 |
return false; |
776 |
|
} |
777 |
|
} |
778 |
|
|
779 |
|
// We have processed all of `r2`. We are now looking if there was also a |
780 |
|
// path to the end in `r1`. This makes sure that we don't have leftover |
781 |
|
// bits in `r1` that don't match anything in `r2`. |
782 |
102 |
bool result = false; |
783 |
288 |
for (size_t idx : newIdxs) |
784 |
|
{ |
785 |
240 |
if (idx == v1.size() || (idx == v1.size() - 1 && v1[idx] == sigmaStar)) |
786 |
|
{ |
787 |
54 |
result = true; |
788 |
54 |
break; |
789 |
|
} |
790 |
|
} |
791 |
|
|
792 |
102 |
return result; |
793 |
|
} |
794 |
|
|
795 |
|
} // namespace strings |
796 |
|
} // namespace theory |
797 |
28191 |
} // namespace cvc5 |