1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Tianyi Liang, Andrew Reynolds, Mathias Preiner |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Symbolic Regular Expresion Operations |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/regexp_operation.h" |
17 |
|
|
18 |
|
#include <sstream> |
19 |
|
|
20 |
|
#include "expr/node_algorithm.h" |
21 |
|
#include "options/strings_options.h" |
22 |
|
#include "theory/rewriter.h" |
23 |
|
#include "theory/strings/regexp_entail.h" |
24 |
|
#include "theory/strings/theory_strings_utils.h" |
25 |
|
#include "theory/strings/word.h" |
26 |
|
#include "util/regexp.h" |
27 |
|
|
28 |
|
using namespace cvc5::kind; |
29 |
|
|
30 |
|
namespace cvc5 { |
31 |
|
namespace theory { |
32 |
|
namespace strings { |
33 |
|
|
34 |
9917 |
RegExpOpr::RegExpOpr(SkolemCache* sc) |
35 |
|
: d_true(NodeManager::currentNM()->mkConst(true)), |
36 |
|
d_false(NodeManager::currentNM()->mkConst(false)), |
37 |
|
d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY, |
38 |
19834 |
std::vector<Node>{})), |
39 |
19834 |
d_zero(NodeManager::currentNM()->mkConst(::cvc5::Rational(0))), |
40 |
19834 |
d_one(NodeManager::currentNM()->mkConst(::cvc5::Rational(1))), |
41 |
|
d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA, |
42 |
19834 |
std::vector<Node>{})), |
43 |
|
d_sigma_star( |
44 |
|
NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)), |
45 |
89253 |
d_sc(sc) |
46 |
|
{ |
47 |
9917 |
d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType()); |
48 |
|
|
49 |
9917 |
d_emptySingleton = |
50 |
19834 |
NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString); |
51 |
9917 |
d_lastchar = utils::getAlphabetCardinality() - 1; |
52 |
9917 |
} |
53 |
|
|
54 |
9914 |
RegExpOpr::~RegExpOpr() {} |
55 |
|
|
56 |
937 |
bool RegExpOpr::checkConstRegExp( Node r ) { |
57 |
937 |
Assert(r.getType().isRegExp()); |
58 |
1874 |
Trace("strings-regexp-cstre") |
59 |
937 |
<< "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl; |
60 |
937 |
RegExpConstType rct = getRegExpConstType(r); |
61 |
937 |
return rct != RE_C_VARIABLE; |
62 |
|
} |
63 |
|
|
64 |
1716 |
RegExpConstType RegExpOpr::getRegExpConstType(Node r) |
65 |
|
{ |
66 |
1716 |
Assert(r.getType().isRegExp()); |
67 |
1716 |
std::unordered_map<Node, RegExpConstType>::iterator it; |
68 |
3432 |
std::vector<TNode> visit; |
69 |
3432 |
TNode cur; |
70 |
1716 |
visit.push_back(r); |
71 |
2160 |
do |
72 |
|
{ |
73 |
3876 |
cur = visit.back(); |
74 |
3876 |
visit.pop_back(); |
75 |
3876 |
it = d_constCache.find(cur); |
76 |
|
|
77 |
3876 |
Kind ck = cur.getKind(); |
78 |
3876 |
if (it == d_constCache.end()) |
79 |
|
{ |
80 |
1177 |
if (ck == STRING_TO_REGEXP) |
81 |
|
{ |
82 |
672 |
Node tmp = Rewriter::rewrite(cur[0]); |
83 |
336 |
d_constCache[cur] = |
84 |
336 |
tmp.isConst() ? RE_C_CONRETE_CONSTANT : RE_C_VARIABLE; |
85 |
|
} |
86 |
841 |
else if (ck == REGEXP_SIGMA || ck == REGEXP_RANGE) |
87 |
|
{ |
88 |
120 |
d_constCache[cur] = RE_C_CONSTANT; |
89 |
|
} |
90 |
721 |
else if (!utils::isRegExpKind(ck)) |
91 |
|
{ |
92 |
|
// non-regular expression applications, e.g. function applications |
93 |
|
// with regular expression return type are treated as variables. |
94 |
2 |
d_constCache[cur] = RE_C_VARIABLE; |
95 |
|
} |
96 |
|
else |
97 |
|
{ |
98 |
719 |
d_constCache[cur] = RE_C_UNKNOWN; |
99 |
719 |
visit.push_back(cur); |
100 |
719 |
visit.insert(visit.end(), cur.begin(), cur.end()); |
101 |
|
} |
102 |
|
} |
103 |
2699 |
else if (it->second == RE_C_UNKNOWN) |
104 |
|
{ |
105 |
719 |
RegExpConstType ret = ck == REGEXP_COMPLEMENT ? RE_C_CONSTANT : RE_C_CONRETE_CONSTANT; |
106 |
2160 |
for (const Node& cn : cur) |
107 |
|
{ |
108 |
1441 |
it = d_constCache.find(cn); |
109 |
1441 |
Assert(it != d_constCache.end()); |
110 |
1441 |
if (it->second > ret) |
111 |
|
{ |
112 |
364 |
ret = it->second; |
113 |
|
} |
114 |
|
} |
115 |
719 |
d_constCache[cur] = ret; |
116 |
|
} |
117 |
3876 |
} while (!visit.empty()); |
118 |
1716 |
Assert(d_constCache.find(r) != d_constCache.end()); |
119 |
3432 |
return d_constCache[r]; |
120 |
|
} |
121 |
|
|
122 |
|
// 0-unknown, 1-yes, 2-no |
123 |
1655 |
int RegExpOpr::delta( Node r, Node &exp ) { |
124 |
|
std::map<Node, std::pair<int, Node> >::const_iterator itd = |
125 |
1655 |
d_delta_cache.find(r); |
126 |
1655 |
if (itd != d_delta_cache.end()) |
127 |
|
{ |
128 |
|
// already computed |
129 |
1091 |
exp = itd->second.second; |
130 |
1091 |
return itd->second.first; |
131 |
|
} |
132 |
564 |
Trace("regexp-delta") << "RegExpOpr::delta: " << r << std::endl; |
133 |
564 |
int ret = 0; |
134 |
564 |
NodeManager* nm = NodeManager::currentNM(); |
135 |
564 |
Kind k = r.getKind(); |
136 |
564 |
switch (k) |
137 |
|
{ |
138 |
|
case REGEXP_EMPTY: |
139 |
|
case REGEXP_SIGMA: |
140 |
|
case REGEXP_RANGE: |
141 |
|
{ |
142 |
|
// does not contain empty string |
143 |
|
ret = 2; |
144 |
|
break; |
145 |
|
} |
146 |
182 |
case STRING_TO_REGEXP: |
147 |
|
{ |
148 |
364 |
Node tmp = Rewriter::rewrite(r[0]); |
149 |
182 |
if (tmp.isConst()) |
150 |
|
{ |
151 |
178 |
if (tmp == d_emptyString) |
152 |
|
{ |
153 |
12 |
ret = 1; |
154 |
|
} else { |
155 |
166 |
ret = 2; |
156 |
|
} |
157 |
|
} |
158 |
|
else |
159 |
|
{ |
160 |
4 |
ret = 0; |
161 |
4 |
if (tmp.getKind() == STRING_CONCAT) |
162 |
|
{ |
163 |
|
for (const Node& tmpc : tmp) |
164 |
|
{ |
165 |
|
if (tmpc.isConst()) |
166 |
|
{ |
167 |
|
ret = 2; |
168 |
|
break; |
169 |
|
} |
170 |
|
} |
171 |
|
} |
172 |
4 |
if (ret == 0) |
173 |
|
{ |
174 |
4 |
exp = r[0].eqNode(d_emptyString); |
175 |
|
} |
176 |
|
} |
177 |
182 |
break; |
178 |
|
} |
179 |
291 |
case REGEXP_CONCAT: |
180 |
|
case REGEXP_UNION: |
181 |
|
case REGEXP_INTER: |
182 |
|
{ |
183 |
|
// has there been an unknown child? |
184 |
291 |
bool hasUnknownChild = false; |
185 |
582 |
std::vector<Node> vec; |
186 |
291 |
int checkTmp = k == REGEXP_UNION ? 1 : 2; |
187 |
291 |
int retTmp = k == REGEXP_UNION ? 2 : 1; |
188 |
415 |
for (const Node& rc : r) |
189 |
|
{ |
190 |
515 |
Node exp2; |
191 |
391 |
int tmp = delta(rc, exp2); |
192 |
391 |
if (tmp == checkTmp) |
193 |
|
{ |
194 |
|
// return is implied by the child's return value |
195 |
267 |
ret = checkTmp; |
196 |
267 |
break; |
197 |
|
} |
198 |
124 |
else if (tmp == 0) |
199 |
|
{ |
200 |
|
// unknown if child contains empty string |
201 |
4 |
Assert(!exp2.isNull()); |
202 |
4 |
vec.push_back(exp2); |
203 |
4 |
hasUnknownChild = true; |
204 |
|
} |
205 |
|
} |
206 |
291 |
if (ret != checkTmp) |
207 |
|
{ |
208 |
24 |
if (!hasUnknownChild) |
209 |
|
{ |
210 |
24 |
ret = retTmp; |
211 |
|
} else { |
212 |
|
Kind kr = k == REGEXP_UNION ? OR : AND; |
213 |
|
exp = vec.size() == 1 ? vec[0] : nm->mkNode(kr, vec); |
214 |
|
} |
215 |
|
} |
216 |
291 |
break; |
217 |
|
} |
218 |
87 |
case REGEXP_STAR: |
219 |
|
case REGEXP_OPT: |
220 |
|
{ |
221 |
|
// contains empty string |
222 |
87 |
ret = 1; |
223 |
87 |
break; |
224 |
|
} |
225 |
|
case REGEXP_PLUS: |
226 |
|
{ |
227 |
|
ret = delta(r[0], exp); |
228 |
|
break; |
229 |
|
} |
230 |
|
case REGEXP_LOOP: |
231 |
|
{ |
232 |
|
uint32_t lo = utils::getLoopMinOccurrences(r); |
233 |
|
if (lo == 0) |
234 |
|
{ |
235 |
|
ret = 1; |
236 |
|
} |
237 |
|
else |
238 |
|
{ |
239 |
|
ret = delta(r[0], exp); |
240 |
|
} |
241 |
|
break; |
242 |
|
} |
243 |
4 |
case REGEXP_COMPLEMENT: |
244 |
|
{ |
245 |
4 |
int tmp = delta(r[0], exp); |
246 |
|
// flip the result if known |
247 |
4 |
ret = tmp == 0 ? 0 : (3 - tmp); |
248 |
4 |
exp = exp.isNull() ? exp : exp.negate(); |
249 |
4 |
break; |
250 |
|
} |
251 |
|
default: |
252 |
|
{ |
253 |
|
Assert(!utils::isRegExpKind(k)); |
254 |
|
break; |
255 |
|
} |
256 |
|
} |
257 |
564 |
if (!exp.isNull()) |
258 |
|
{ |
259 |
4 |
exp = Rewriter::rewrite(exp); |
260 |
|
} |
261 |
1128 |
std::pair<int, Node> p(ret, exp); |
262 |
564 |
d_delta_cache[r] = p; |
263 |
1128 |
Trace("regexp-delta") << "RegExpOpr::delta returns " << ret << " for " << r |
264 |
564 |
<< ", expr = " << exp << std::endl; |
265 |
564 |
return ret; |
266 |
|
} |
267 |
|
|
268 |
|
// 0-unknown, 1-yes, 2-no |
269 |
24 |
int RegExpOpr::derivativeS(Node r, cvc5::String c, Node& retNode) |
270 |
|
{ |
271 |
24 |
Assert(c.size() < 2); |
272 |
24 |
Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl; |
273 |
|
|
274 |
24 |
int ret = 1; |
275 |
24 |
retNode = d_emptyRegexp; |
276 |
24 |
NodeManager* nm = NodeManager::currentNM(); |
277 |
24 |
SkolemManager* sm = nm->getSkolemManager(); |
278 |
|
|
279 |
48 |
PairNodeStr dv = std::make_pair( r, c ); |
280 |
24 |
if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) { |
281 |
4 |
retNode = d_deriv_cache[dv].first; |
282 |
4 |
ret = d_deriv_cache[dv].second; |
283 |
|
} |
284 |
20 |
else if (c.empty()) |
285 |
|
{ |
286 |
|
Node expNode; |
287 |
|
ret = delta( r, expNode ); |
288 |
|
if(ret == 0) { |
289 |
|
retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp); |
290 |
|
} else if(ret == 1) { |
291 |
|
retNode = r; |
292 |
|
} |
293 |
|
std::pair< Node, int > p(retNode, ret); |
294 |
|
d_deriv_cache[dv] = p; |
295 |
|
} else { |
296 |
20 |
switch( r.getKind() ) { |
297 |
|
case kind::REGEXP_EMPTY: { |
298 |
|
ret = 2; |
299 |
|
break; |
300 |
|
} |
301 |
|
case kind::REGEXP_SIGMA: { |
302 |
|
retNode = d_emptySingleton; |
303 |
|
break; |
304 |
|
} |
305 |
|
case kind::REGEXP_RANGE: { |
306 |
|
cvc5::String a = r[0].getConst<String>(); |
307 |
|
cvc5::String b = r[1].getConst<String>(); |
308 |
|
retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp; |
309 |
|
break; |
310 |
|
} |
311 |
8 |
case kind::STRING_TO_REGEXP: { |
312 |
16 |
Node tmp = Rewriter::rewrite(r[0]); |
313 |
8 |
if(tmp.isConst()) { |
314 |
8 |
if(tmp == d_emptyString) { |
315 |
4 |
ret = 2; |
316 |
|
} else { |
317 |
4 |
if (tmp.getConst<String>().front() == c.front()) |
318 |
|
{ |
319 |
|
retNode = |
320 |
|
nm->mkNode(STRING_TO_REGEXP, |
321 |
|
Word::getLength(tmp) == 1 ? d_emptyString |
322 |
|
: Word::substr(tmp, 1)); |
323 |
|
} else { |
324 |
4 |
ret = 2; |
325 |
|
} |
326 |
|
} |
327 |
|
} else { |
328 |
|
ret = 0; |
329 |
|
Node rest; |
330 |
|
if(tmp.getKind() == kind::STRING_CONCAT) { |
331 |
|
Node t2 = tmp[0]; |
332 |
|
if(t2.isConst()) { |
333 |
|
if (t2.getConst<String>().front() == c.front()) |
334 |
|
{ |
335 |
|
Node n = nm->mkNode(STRING_TO_REGEXP, |
336 |
|
Word::getLength(tmp) == 1 |
337 |
|
? d_emptyString |
338 |
|
: Word::substr(tmp, 1)); |
339 |
|
std::vector< Node > vec_nodes; |
340 |
|
vec_nodes.push_back(n); |
341 |
|
for(unsigned i=1; i<tmp.getNumChildren(); i++) { |
342 |
|
vec_nodes.push_back(tmp[i]); |
343 |
|
} |
344 |
|
retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes); |
345 |
|
ret = 1; |
346 |
|
} else { |
347 |
|
ret = 2; |
348 |
|
} |
349 |
|
} else { |
350 |
|
tmp = tmp[0]; |
351 |
|
std::vector< Node > vec_nodes; |
352 |
|
for(unsigned i=1; i<tmp.getNumChildren(); i++) { |
353 |
|
vec_nodes.push_back(tmp[i]); |
354 |
|
} |
355 |
|
rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes); |
356 |
|
} |
357 |
|
} |
358 |
|
if(ret == 0) { |
359 |
|
Node sk = |
360 |
|
sm->mkDummySkolem("rsp", nm->stringType(), "Split RegExp"); |
361 |
|
retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk); |
362 |
|
if(!rest.isNull()) { |
363 |
|
retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest)); |
364 |
|
} |
365 |
|
Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, |
366 |
|
NodeManager::currentNM()->mkConst(c), sk)); |
367 |
|
retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp)); |
368 |
|
} |
369 |
|
} |
370 |
8 |
break; |
371 |
|
} |
372 |
4 |
case kind::REGEXP_CONCAT: { |
373 |
8 |
std::vector< Node > vec_nodes; |
374 |
8 |
std::vector< Node > delta_nodes; |
375 |
8 |
Node dnode = d_true; |
376 |
12 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
377 |
16 |
Node dc; |
378 |
16 |
Node exp2; |
379 |
8 |
int rt = derivativeS(r[i], c, dc); |
380 |
8 |
if(rt != 2) { |
381 |
|
if(rt == 0) { |
382 |
|
ret = 0; |
383 |
|
} |
384 |
|
std::vector< Node > vec_nodes2; |
385 |
|
if(dc != d_emptySingleton) { |
386 |
|
vec_nodes2.push_back( dc ); |
387 |
|
} |
388 |
|
for(unsigned j=i+1; j<r.getNumChildren(); ++j) { |
389 |
|
if(r[j] != d_emptySingleton) { |
390 |
|
vec_nodes2.push_back( r[j] ); |
391 |
|
} |
392 |
|
} |
393 |
|
Node tmp = vec_nodes2.size()==0 ? d_emptySingleton : |
394 |
|
vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 ); |
395 |
|
if(dnode != d_true) { |
396 |
|
tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp)); |
397 |
|
ret = 0; |
398 |
|
} |
399 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) { |
400 |
|
vec_nodes.push_back( tmp ); |
401 |
|
} |
402 |
|
} |
403 |
16 |
Node exp3; |
404 |
8 |
int rt2 = delta( r[i], exp3 ); |
405 |
8 |
if( rt2 == 0 ) { |
406 |
|
dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3)); |
407 |
8 |
} else if( rt2 == 2 ) { |
408 |
|
break; |
409 |
|
} |
410 |
|
} |
411 |
4 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
412 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
413 |
4 |
if(retNode == d_emptyRegexp) { |
414 |
4 |
ret = 2; |
415 |
|
} |
416 |
4 |
break; |
417 |
|
} |
418 |
4 |
case kind::REGEXP_UNION: { |
419 |
8 |
std::vector< Node > vec_nodes; |
420 |
12 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
421 |
16 |
Node dc; |
422 |
8 |
int rt = derivativeS(r[i], c, dc); |
423 |
8 |
if(rt == 0) { |
424 |
|
ret = 0; |
425 |
|
} |
426 |
8 |
if(rt != 2) { |
427 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
428 |
|
vec_nodes.push_back( dc ); |
429 |
|
} |
430 |
|
} |
431 |
|
//Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] " << mkString(r[i]) << " returns " << mkString(dc) << std::endl; |
432 |
|
} |
433 |
4 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
434 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
435 |
4 |
if(retNode == d_emptyRegexp) { |
436 |
4 |
ret = 2; |
437 |
|
} |
438 |
4 |
break; |
439 |
|
} |
440 |
|
case kind::REGEXP_INTER: { |
441 |
|
bool flag = true; |
442 |
|
bool flag_sg = false; |
443 |
|
std::vector< Node > vec_nodes; |
444 |
|
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
445 |
|
Node dc; |
446 |
|
int rt = derivativeS(r[i], c, dc); |
447 |
|
if(rt == 0) { |
448 |
|
ret = 0; |
449 |
|
} else if(rt == 2) { |
450 |
|
flag = false; |
451 |
|
break; |
452 |
|
} |
453 |
|
if(dc == d_sigma_star) { |
454 |
|
flag_sg = true; |
455 |
|
} else { |
456 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
457 |
|
vec_nodes.push_back( dc ); |
458 |
|
} |
459 |
|
} |
460 |
|
} |
461 |
|
if(flag) { |
462 |
|
if(vec_nodes.size() == 0 && flag_sg) { |
463 |
|
retNode = d_sigma_star; |
464 |
|
} else { |
465 |
|
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
466 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) ); |
467 |
|
if(retNode == d_emptyRegexp) { |
468 |
|
ret = 2; |
469 |
|
} |
470 |
|
} |
471 |
|
} else { |
472 |
|
retNode = d_emptyRegexp; |
473 |
|
ret = 2; |
474 |
|
} |
475 |
|
break; |
476 |
|
} |
477 |
4 |
case kind::REGEXP_STAR: { |
478 |
8 |
Node dc; |
479 |
4 |
ret = derivativeS(r[0], c, dc); |
480 |
4 |
retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r )); |
481 |
4 |
break; |
482 |
|
} |
483 |
|
case kind::REGEXP_LOOP: { |
484 |
|
uint32_t l = utils::getLoopMinOccurrences(r); |
485 |
|
uint32_t u = utils::getLoopMaxOccurrences(r); |
486 |
|
if (l == u && l == 0) |
487 |
|
{ |
488 |
|
ret = 2; |
489 |
|
//retNode = d_emptyRegexp; |
490 |
|
} else { |
491 |
|
Node dc; |
492 |
|
ret = derivativeS(r[0], c, dc); |
493 |
|
if(dc==d_emptyRegexp) { |
494 |
|
Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1)); |
495 |
|
Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]); |
496 |
|
retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); |
497 |
|
} else { |
498 |
|
retNode = d_emptyRegexp; |
499 |
|
} |
500 |
|
} |
501 |
|
break; |
502 |
|
} |
503 |
|
case kind::REGEXP_COMPLEMENT: |
504 |
|
{ |
505 |
|
// don't know result |
506 |
|
return 0; |
507 |
|
break; |
508 |
|
} |
509 |
|
default: { |
510 |
|
Assert(!utils::isRegExpKind(r.getKind())); |
511 |
|
return 0; |
512 |
|
break; |
513 |
|
} |
514 |
|
} |
515 |
20 |
if(retNode != d_emptyRegexp) { |
516 |
|
retNode = Rewriter::rewrite( retNode ); |
517 |
|
} |
518 |
40 |
std::pair< Node, int > p(retNode, ret); |
519 |
20 |
d_deriv_cache[dv] = p; |
520 |
|
} |
521 |
|
|
522 |
24 |
Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl; |
523 |
24 |
return ret; |
524 |
|
} |
525 |
|
|
526 |
1410 |
Node RegExpOpr::derivativeSingle(Node r, cvc5::String c) |
527 |
|
{ |
528 |
1410 |
Assert(c.size() < 2); |
529 |
1410 |
Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl; |
530 |
1410 |
Node retNode = d_emptyRegexp; |
531 |
2820 |
PairNodeStr dv = std::make_pair( r, c ); |
532 |
1410 |
NodeManager* nm = NodeManager::currentNM(); |
533 |
1410 |
if( d_dv_cache.find( dv ) != d_dv_cache.end() ) { |
534 |
526 |
retNode = d_dv_cache[dv]; |
535 |
|
} |
536 |
884 |
else if (c.empty()) |
537 |
|
{ |
538 |
|
Node exp; |
539 |
|
int tmp = delta( r, exp ); |
540 |
|
if(tmp == 0) { |
541 |
|
// TODO variable |
542 |
|
retNode = d_emptyRegexp; |
543 |
|
} else if(tmp == 1) { |
544 |
|
retNode = r; |
545 |
|
} else { |
546 |
|
retNode = d_emptyRegexp; |
547 |
|
} |
548 |
|
} else { |
549 |
884 |
Kind k = r.getKind(); |
550 |
884 |
switch( k ) { |
551 |
|
case kind::REGEXP_EMPTY: { |
552 |
|
retNode = d_emptyRegexp; |
553 |
|
break; |
554 |
|
} |
555 |
|
case kind::REGEXP_SIGMA: { |
556 |
|
retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ); |
557 |
|
break; |
558 |
|
} |
559 |
|
case kind::REGEXP_RANGE: { |
560 |
|
cvc5::String a = r[0].getConst<String>(); |
561 |
|
cvc5::String b = r[1].getConst<String>(); |
562 |
|
retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp; |
563 |
|
break; |
564 |
|
} |
565 |
270 |
case kind::STRING_TO_REGEXP: { |
566 |
270 |
if(r[0].isConst()) { |
567 |
270 |
if(r[0] == d_emptyString) { |
568 |
14 |
retNode = d_emptyRegexp; |
569 |
|
} else { |
570 |
256 |
if (r[0].getConst<String>().front() == c.front()) |
571 |
|
{ |
572 |
192 |
retNode = nm->mkNode(STRING_TO_REGEXP, |
573 |
384 |
Word::getLength(r[0]) == 1 |
574 |
384 |
? d_emptyString |
575 |
|
: Word::substr(r[0], 1)); |
576 |
|
} else { |
577 |
64 |
retNode = d_emptyRegexp; |
578 |
|
} |
579 |
|
} |
580 |
|
} else { |
581 |
|
// TODO variable |
582 |
|
retNode = d_emptyRegexp; |
583 |
|
} |
584 |
270 |
break; |
585 |
|
} |
586 |
355 |
case kind::REGEXP_CONCAT: { |
587 |
710 |
Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ); |
588 |
710 |
std::vector< Node > vec_nodes; |
589 |
531 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
590 |
685 |
Node dc = derivativeSingle(r[i], c); |
591 |
509 |
if(dc != d_emptyRegexp) { |
592 |
714 |
std::vector< Node > vec_nodes2; |
593 |
357 |
if(dc != rees) { |
594 |
236 |
vec_nodes2.push_back( dc ); |
595 |
|
} |
596 |
768 |
for(unsigned j=i+1; j<r.getNumChildren(); ++j) { |
597 |
411 |
if(r[j] != rees) { |
598 |
411 |
vec_nodes2.push_back( r[j] ); |
599 |
|
} |
600 |
|
} |
601 |
357 |
Node tmp = vec_nodes2.size()==0 ? rees : |
602 |
714 |
vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 ); |
603 |
357 |
if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) { |
604 |
355 |
vec_nodes.push_back( tmp ); |
605 |
|
} |
606 |
|
} |
607 |
685 |
Node exp; |
608 |
509 |
if( delta( r[i], exp ) != 1 ) { |
609 |
333 |
break; |
610 |
|
} |
611 |
|
} |
612 |
999 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
613 |
644 |
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
614 |
355 |
break; |
615 |
|
} |
616 |
108 |
case kind::REGEXP_UNION: { |
617 |
216 |
std::vector< Node > vec_nodes; |
618 |
346 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
619 |
476 |
Node dc = derivativeSingle(r[i], c); |
620 |
238 |
if(dc != d_emptyRegexp) { |
621 |
100 |
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
622 |
98 |
vec_nodes.push_back( dc ); |
623 |
|
} |
624 |
|
} |
625 |
|
//Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl; |
626 |
|
} |
627 |
286 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
628 |
178 |
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
629 |
108 |
break; |
630 |
|
} |
631 |
|
case kind::REGEXP_INTER: { |
632 |
|
bool flag = true; |
633 |
|
bool flag_sg = false; |
634 |
|
std::vector< Node > vec_nodes; |
635 |
|
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
636 |
|
Node dc = derivativeSingle(r[i], c); |
637 |
|
if(dc != d_emptyRegexp) { |
638 |
|
if(dc == d_sigma_star) { |
639 |
|
flag_sg = true; |
640 |
|
} else { |
641 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
642 |
|
vec_nodes.push_back( dc ); |
643 |
|
} |
644 |
|
} |
645 |
|
} else { |
646 |
|
flag = false; |
647 |
|
break; |
648 |
|
} |
649 |
|
} |
650 |
|
if(flag) { |
651 |
|
if(vec_nodes.size() == 0 && flag_sg) { |
652 |
|
retNode = d_sigma_star; |
653 |
|
} else { |
654 |
|
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
655 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) ); |
656 |
|
} |
657 |
|
} else { |
658 |
|
retNode = d_emptyRegexp; |
659 |
|
} |
660 |
|
break; |
661 |
|
} |
662 |
151 |
case kind::REGEXP_STAR: { |
663 |
302 |
Node dc = derivativeSingle(r[0], c); |
664 |
151 |
if(dc != d_emptyRegexp) { |
665 |
115 |
retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ); |
666 |
|
} else { |
667 |
36 |
retNode = d_emptyRegexp; |
668 |
|
} |
669 |
151 |
break; |
670 |
|
} |
671 |
|
case kind::REGEXP_LOOP: { |
672 |
|
uint32_t l = utils::getLoopMinOccurrences(r); |
673 |
|
uint32_t u = utils::getLoopMaxOccurrences(r); |
674 |
|
if (l == u || l == 0) |
675 |
|
{ |
676 |
|
retNode = d_emptyRegexp; |
677 |
|
} else { |
678 |
|
Node dc = derivativeSingle(r[0], c); |
679 |
|
if(dc != d_emptyRegexp) { |
680 |
|
Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1)); |
681 |
|
Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]); |
682 |
|
retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); |
683 |
|
} else { |
684 |
|
retNode = d_emptyRegexp; |
685 |
|
} |
686 |
|
} |
687 |
|
//Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl; |
688 |
|
break; |
689 |
|
} |
690 |
|
case kind::REGEXP_COMPLEMENT: |
691 |
|
default: { |
692 |
|
Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; |
693 |
|
Unreachable(); |
694 |
|
break; |
695 |
|
} |
696 |
|
} |
697 |
884 |
if(retNode != d_emptyRegexp) { |
698 |
732 |
retNode = Rewriter::rewrite( retNode ); |
699 |
|
} |
700 |
884 |
d_dv_cache[dv] = retNode; |
701 |
|
} |
702 |
1410 |
Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl; |
703 |
2820 |
return retNode; |
704 |
|
} |
705 |
|
|
706 |
898 |
void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset) |
707 |
|
{ |
708 |
898 |
Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl; |
709 |
|
std::map<Node, std::pair<std::set<unsigned>, SetNodes> >::const_iterator itr = |
710 |
898 |
d_fset_cache.find(r); |
711 |
898 |
if(itr != d_fset_cache.end()) { |
712 |
312 |
pcset.insert((itr->second).first.begin(), (itr->second).first.end()); |
713 |
312 |
pvset.insert((itr->second).second.begin(), (itr->second).second.end()); |
714 |
|
} else { |
715 |
|
// cset is code points |
716 |
1172 |
std::set<unsigned> cset; |
717 |
1172 |
SetNodes vset; |
718 |
586 |
Kind k = r.getKind(); |
719 |
586 |
switch( k ) { |
720 |
|
case kind::REGEXP_EMPTY: { |
721 |
|
break; |
722 |
|
} |
723 |
|
case kind::REGEXP_RANGE: { |
724 |
|
unsigned a = r[0].getConst<String>().front(); |
725 |
|
unsigned b = r[1].getConst<String>().front(); |
726 |
|
Assert(a < b); |
727 |
|
Assert(b < std::numeric_limits<unsigned>::max()); |
728 |
|
for (unsigned c = a; c <= b; c++) |
729 |
|
{ |
730 |
|
cset.insert(c); |
731 |
|
} |
732 |
|
break; |
733 |
|
} |
734 |
204 |
case kind::STRING_TO_REGEXP: { |
735 |
408 |
Node st = Rewriter::rewrite(r[0]); |
736 |
204 |
if(st.isConst()) { |
737 |
408 |
String s = st.getConst<String>(); |
738 |
204 |
if(s.size() != 0) { |
739 |
196 |
unsigned sc = s.front(); |
740 |
196 |
cset.insert(sc); |
741 |
|
} |
742 |
|
} |
743 |
|
else if (st.getKind() == kind::STRING_CONCAT) |
744 |
|
{ |
745 |
|
if(st[0].isConst()) { |
746 |
|
String s = st[0].getConst<String>(); |
747 |
|
unsigned sc = s.front(); |
748 |
|
cset.insert(sc); |
749 |
|
} else { |
750 |
|
vset.insert( st[0] ); |
751 |
|
} |
752 |
|
} |
753 |
|
else |
754 |
|
{ |
755 |
|
vset.insert(st); |
756 |
|
} |
757 |
204 |
break; |
758 |
|
} |
759 |
253 |
case kind::REGEXP_CONCAT: { |
760 |
329 |
for(unsigned i=0; i<r.getNumChildren(); i++) { |
761 |
319 |
firstChars(r[i], cset, vset); |
762 |
395 |
Node n = r[i]; |
763 |
395 |
Node exp; |
764 |
319 |
if (delta(n, exp) != 1) |
765 |
|
{ |
766 |
243 |
break; |
767 |
|
} |
768 |
|
} |
769 |
253 |
break; |
770 |
|
} |
771 |
46 |
case kind::REGEXP_UNION: { |
772 |
146 |
for(unsigned i=0; i<r.getNumChildren(); i++) { |
773 |
100 |
firstChars(r[i], cset, vset); |
774 |
|
} |
775 |
46 |
break; |
776 |
|
} |
777 |
|
case kind::REGEXP_INTER: { |
778 |
|
//TODO: Overapproximation for now |
779 |
|
//for(unsigned i=0; i<r.getNumChildren(); i++) { |
780 |
|
// firstChars(r[i], cset, vset); |
781 |
|
//} |
782 |
|
firstChars(r[0], cset, vset); |
783 |
|
break; |
784 |
|
} |
785 |
83 |
case kind::REGEXP_STAR: { |
786 |
83 |
firstChars(r[0], cset, vset); |
787 |
83 |
break; |
788 |
|
} |
789 |
|
case kind::REGEXP_LOOP: { |
790 |
|
firstChars(r[0], cset, vset); |
791 |
|
break; |
792 |
|
} |
793 |
|
case kind::REGEXP_SIGMA: |
794 |
|
case kind::REGEXP_COMPLEMENT: |
795 |
|
default: { |
796 |
|
// we do not expect to call this function on regular expressions that |
797 |
|
// aren't a standard regular expression kind. However, if we do, then |
798 |
|
// the following code is conservative and says that the current |
799 |
|
// regular expression can begin with any character. |
800 |
|
Assert(utils::isRegExpKind(k)); |
801 |
|
// can start with any character |
802 |
|
Assert(d_lastchar < std::numeric_limits<unsigned>::max()); |
803 |
|
for (unsigned i = 0; i <= d_lastchar; i++) |
804 |
|
{ |
805 |
|
cset.insert(i); |
806 |
|
} |
807 |
|
break; |
808 |
|
} |
809 |
|
} |
810 |
586 |
pcset.insert(cset.begin(), cset.end()); |
811 |
586 |
pvset.insert(vset.begin(), vset.end()); |
812 |
1172 |
std::pair<std::set<unsigned>, SetNodes> p(cset, vset); |
813 |
586 |
d_fset_cache[r] = p; |
814 |
|
} |
815 |
|
|
816 |
898 |
if(Trace.isOn("regexp-fset")) { |
817 |
|
Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {"; |
818 |
|
for (std::set<unsigned>::const_iterator it = pcset.begin(); |
819 |
|
it != pcset.end(); |
820 |
|
++it) |
821 |
|
{ |
822 |
|
if (it != pcset.begin()) |
823 |
|
{ |
824 |
|
Trace("regexp-fset") << ","; |
825 |
|
} |
826 |
|
Trace("regexp-fset") << (*it); |
827 |
|
} |
828 |
|
Trace("regexp-fset") << "}" << std::endl; |
829 |
|
} |
830 |
898 |
} |
831 |
|
|
832 |
527 |
Node RegExpOpr::simplify(Node t, bool polarity) |
833 |
|
{ |
834 |
1054 |
Trace("strings-regexp-simpl") |
835 |
527 |
<< "RegExpOpr::simplify: " << t << ", polarity=" << polarity << std::endl; |
836 |
527 |
Assert(t.getKind() == kind::STRING_IN_REGEXP); |
837 |
1054 |
Node tlit = polarity ? t : t.notNode(); |
838 |
1054 |
Node conc; |
839 |
527 |
std::map<Node, Node>::const_iterator itr = d_simpCache.find(tlit); |
840 |
527 |
if (itr != d_simpCache.end()) |
841 |
|
{ |
842 |
8 |
return itr->second; |
843 |
|
} |
844 |
519 |
if (polarity) |
845 |
|
{ |
846 |
768 |
std::vector<Node> newSkolems; |
847 |
384 |
conc = reduceRegExpPos(tlit, d_sc, newSkolems); |
848 |
|
} |
849 |
|
else |
850 |
|
{ |
851 |
|
// see if we can use an optimized version of the reduction for re.++. |
852 |
270 |
Node r = t[1]; |
853 |
135 |
if (r.getKind() == REGEXP_CONCAT) |
854 |
|
{ |
855 |
|
// the index we are removing from the RE concatenation |
856 |
114 |
size_t index = 0; |
857 |
|
// As an optimization to the reduction, if we can determine that |
858 |
|
// all strings in the language of R1 have the same length, say n, |
859 |
|
// then the conclusion of the reduction is quantifier-free: |
860 |
|
// ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2) |
861 |
228 |
Node reLen = getRegExpConcatFixed(r, index); |
862 |
114 |
if (!reLen.isNull()) |
863 |
|
{ |
864 |
102 |
conc = reduceRegExpNegConcatFixed(tlit, reLen, index); |
865 |
|
} |
866 |
|
} |
867 |
135 |
if (conc.isNull()) |
868 |
|
{ |
869 |
33 |
conc = reduceRegExpNeg(tlit); |
870 |
|
} |
871 |
|
} |
872 |
519 |
d_simpCache[tlit] = conc; |
873 |
1038 |
Trace("strings-regexp-simpl") |
874 |
519 |
<< "RegExpOpr::simplify: returns " << conc << std::endl; |
875 |
519 |
return conc; |
876 |
|
} |
877 |
|
|
878 |
142 |
Node RegExpOpr::getRegExpConcatFixed(Node r, size_t& index) |
879 |
|
{ |
880 |
142 |
Assert(r.getKind() == REGEXP_CONCAT); |
881 |
142 |
index = 0; |
882 |
284 |
Node reLen = RegExpEntail::getFixedLengthForRegexp(r[0]); |
883 |
142 |
if (!reLen.isNull()) |
884 |
|
{ |
885 |
96 |
return reLen; |
886 |
|
} |
887 |
|
// try from the opposite end |
888 |
46 |
size_t indexE = r.getNumChildren() - 1; |
889 |
46 |
reLen = RegExpEntail::getFixedLengthForRegexp(r[indexE]); |
890 |
46 |
if (!reLen.isNull()) |
891 |
|
{ |
892 |
33 |
index = indexE; |
893 |
33 |
return reLen; |
894 |
|
} |
895 |
13 |
return Node::null(); |
896 |
|
} |
897 |
|
|
898 |
35 |
Node RegExpOpr::reduceRegExpNeg(Node mem) |
899 |
|
{ |
900 |
35 |
Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP); |
901 |
70 |
Node s = mem[0][0]; |
902 |
70 |
Node r = mem[0][1]; |
903 |
35 |
NodeManager* nm = NodeManager::currentNM(); |
904 |
35 |
Kind k = r.getKind(); |
905 |
70 |
Node zero = nm->mkConst(Rational(0)); |
906 |
35 |
Node conc; |
907 |
35 |
if (k == REGEXP_CONCAT) |
908 |
|
{ |
909 |
|
// do not use length entailment, call regular expression concat |
910 |
28 |
Node reLen; |
911 |
14 |
size_t i = 0; |
912 |
14 |
conc = reduceRegExpNegConcatFixed(mem, reLen, i); |
913 |
|
} |
914 |
21 |
else if (k == REGEXP_STAR) |
915 |
|
{ |
916 |
40 |
Node emp = Word::mkEmptyWord(s.getType()); |
917 |
40 |
Node lens = nm->mkNode(STRING_LENGTH, s); |
918 |
40 |
Node sne = s.eqNode(emp).negate(); |
919 |
40 |
Node b1 = nm->mkBoundVar(nm->integerType()); |
920 |
40 |
Node b1v = nm->mkNode(BOUND_VAR_LIST, b1); |
921 |
|
Node g1 = |
922 |
40 |
nm->mkNode(AND, nm->mkNode(GT, b1, zero), nm->mkNode(GEQ, lens, b1)); |
923 |
|
// internal |
924 |
40 |
Node s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1); |
925 |
40 |
Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1)); |
926 |
40 |
Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[0]).negate(); |
927 |
40 |
Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r).negate(); |
928 |
|
|
929 |
20 |
conc = nm->mkNode(OR, s1r1, s2r2); |
930 |
20 |
conc = nm->mkNode(IMPLIES, g1, conc); |
931 |
|
// must mark as an internal quantifier |
932 |
20 |
conc = utils::mkForallInternal(b1v, conc); |
933 |
20 |
conc = nm->mkNode(AND, sne, conc); |
934 |
|
} |
935 |
|
else |
936 |
|
{ |
937 |
1 |
Assert(!utils::isRegExpKind(k)); |
938 |
|
} |
939 |
70 |
return conc; |
940 |
|
} |
941 |
|
|
942 |
134 |
Node RegExpOpr::reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index) |
943 |
|
{ |
944 |
134 |
Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP); |
945 |
268 |
Node s = mem[0][0]; |
946 |
268 |
Node r = mem[0][1]; |
947 |
134 |
NodeManager* nm = NodeManager::currentNM(); |
948 |
134 |
Assert(r.getKind() == REGEXP_CONCAT); |
949 |
268 |
Node zero = nm->mkConst(Rational(0)); |
950 |
|
// The following simplification states that |
951 |
|
// ~( s in R1 ++ R2 ++... ++ Rn ) |
952 |
|
// is equivalent to |
953 |
|
// forall x. |
954 |
|
// 0 <= x <= len(s) => |
955 |
|
// ~(substr(s,0,x) in R1) OR ~(substr(s,x,len(s)-x) in R2 ++ ... ++ Rn) |
956 |
|
// Index is the child index of r that we are stripping off, which is either |
957 |
|
// from the beginning or the end. |
958 |
134 |
Assert(index == 0 || index == r.getNumChildren() - 1); |
959 |
268 |
Node lens = nm->mkNode(STRING_LENGTH, s); |
960 |
268 |
Node b1; |
961 |
268 |
Node b1v; |
962 |
268 |
Node guard; |
963 |
134 |
if (reLen.isNull()) |
964 |
|
{ |
965 |
14 |
b1 = SkolemCache::mkIndexVar(mem); |
966 |
14 |
b1v = nm->mkNode(BOUND_VAR_LIST, b1); |
967 |
42 |
guard = nm->mkNode(AND, |
968 |
28 |
nm->mkNode(GEQ, b1, zero), |
969 |
28 |
nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1)); |
970 |
|
} |
971 |
|
else |
972 |
|
{ |
973 |
120 |
b1 = reLen; |
974 |
|
} |
975 |
268 |
Node s1; |
976 |
268 |
Node s2; |
977 |
134 |
if (index == 0) |
978 |
|
{ |
979 |
104 |
s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1); |
980 |
104 |
s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1)); |
981 |
|
} |
982 |
|
else |
983 |
|
{ |
984 |
30 |
s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1); |
985 |
30 |
s2 = nm->mkNode(STRING_SUBSTR, s, zero, nm->mkNode(MINUS, lens, b1)); |
986 |
|
} |
987 |
268 |
Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[index]).negate(); |
988 |
268 |
std::vector<Node> nvec; |
989 |
554 |
for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++) |
990 |
|
{ |
991 |
420 |
if (i != index) |
992 |
|
{ |
993 |
286 |
nvec.push_back(r[i]); |
994 |
|
} |
995 |
|
} |
996 |
268 |
Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec); |
997 |
134 |
r2 = Rewriter::rewrite(r2); |
998 |
268 |
Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate(); |
999 |
134 |
Node conc = nm->mkNode(OR, s1r1, s2r2); |
1000 |
134 |
if (!b1v.isNull()) |
1001 |
|
{ |
1002 |
14 |
conc = nm->mkNode(OR, guard.negate(), conc); |
1003 |
|
// must mark as an internal quantifier |
1004 |
14 |
conc = utils::mkForallInternal(b1v, conc); |
1005 |
|
} |
1006 |
268 |
return conc; |
1007 |
|
} |
1008 |
|
|
1009 |
638 |
Node RegExpOpr::reduceRegExpPos(Node mem, |
1010 |
|
SkolemCache* sc, |
1011 |
|
std::vector<Node>& newSkolems) |
1012 |
|
{ |
1013 |
638 |
Assert(mem.getKind() == STRING_IN_REGEXP); |
1014 |
1276 |
Node s = mem[0]; |
1015 |
1276 |
Node r = mem[1]; |
1016 |
638 |
NodeManager* nm = NodeManager::currentNM(); |
1017 |
638 |
Kind k = r.getKind(); |
1018 |
638 |
Node conc; |
1019 |
638 |
if (k == REGEXP_CONCAT) |
1020 |
|
{ |
1021 |
884 |
std::vector<Node> nvec; |
1022 |
884 |
std::vector<Node> cc; |
1023 |
442 |
SkolemManager* sm = nm->getSkolemManager(); |
1024 |
|
// Look up skolems for each of the components. If sc has optimizations |
1025 |
|
// enabled, this will return arguments of str.to_re. |
1026 |
1722 |
for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i) |
1027 |
|
{ |
1028 |
1280 |
if (r[i].getKind() == STRING_TO_REGEXP) |
1029 |
|
{ |
1030 |
|
// optimization, just take the body |
1031 |
391 |
newSkolems.push_back(r[i][0]); |
1032 |
|
} |
1033 |
|
else |
1034 |
|
{ |
1035 |
1778 |
Node ivalue = nm->mkConst(Rational(i)); |
1036 |
|
Node sk = sm->mkSkolemFunction(SkolemFunId::RE_UNFOLD_POS_COMPONENT, |
1037 |
1778 |
s.getType(), |
1038 |
3556 |
{mem[0], mem[1], ivalue}); |
1039 |
889 |
newSkolems.push_back(sk); |
1040 |
889 |
nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i])); |
1041 |
|
} |
1042 |
|
} |
1043 |
|
// (str.in_re x (re.++ R0 .... Rn)) => |
1044 |
|
// (and (= x (str.++ k0 ... kn)) (str.in_re k0 R0) ... (str.in_re kn Rn) ) |
1045 |
884 |
Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems)); |
1046 |
442 |
nvec.insert(nvec.begin(), lem); |
1047 |
442 |
conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec); |
1048 |
|
} |
1049 |
196 |
else if (k == REGEXP_STAR) |
1050 |
|
{ |
1051 |
388 |
Node emp = Word::mkEmptyWord(s.getType()); |
1052 |
388 |
Node se = s.eqNode(emp); |
1053 |
388 |
Node sinr = nm->mkNode(STRING_IN_REGEXP, s, r[0]); |
1054 |
388 |
Node reExpand = nm->mkNode(REGEXP_CONCAT, r[0], r, r[0]); |
1055 |
388 |
Node sinRExp = nm->mkNode(STRING_IN_REGEXP, s, reExpand); |
1056 |
|
// We unfold `x in R*` by considering three cases: `x` is empty, `x` |
1057 |
|
// is matched by `R`, or `x` is matched by two or more `R`s. For the |
1058 |
|
// last case, `x` will break into three pieces, making the beginning |
1059 |
|
// and the end each match `R` and the middle match `R*`. Matching the |
1060 |
|
// beginning and the end with `R` allows us to reason about the |
1061 |
|
// beginning and the end of `x` simultaneously. |
1062 |
|
// |
1063 |
|
// x in R* ---> (x = "") v (x in R) v (x in (re.++ R (re.* R) R)) |
1064 |
|
|
1065 |
|
// We also immediately unfold the last disjunct for re.*. The advantage |
1066 |
|
// of doing this is that we use the same scheme for skolems above. |
1067 |
388 |
std::vector<Node> newSkolemsC; |
1068 |
194 |
sinRExp = reduceRegExpPos(sinRExp, sc, newSkolemsC); |
1069 |
194 |
Assert(newSkolemsC.size() == 3); |
1070 |
|
// make the return lemma |
1071 |
|
// can also assume the component match the first and last R are non-empty. |
1072 |
|
// This means that the overall conclusion is: |
1073 |
|
// (x = "") v (x in R) v (x = (str.++ k1 k2 k3) ^ |
1074 |
|
// k1 in R ^ k2 in (re.* R) ^ k3 in R ^ |
1075 |
|
// k1 != "" ^ k3 != "") |
1076 |
194 |
conc = nm->mkNode(OR, |
1077 |
|
se, |
1078 |
|
sinr, |
1079 |
776 |
nm->mkNode(AND, |
1080 |
|
sinRExp, |
1081 |
388 |
newSkolemsC[0].eqNode(emp).negate(), |
1082 |
388 |
newSkolemsC[2].eqNode(emp).negate())); |
1083 |
|
} |
1084 |
|
else |
1085 |
|
{ |
1086 |
2 |
Assert(!utils::isRegExpKind(k)); |
1087 |
|
} |
1088 |
1276 |
return conc; |
1089 |
|
} |
1090 |
|
|
1091 |
|
bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) { |
1092 |
|
for(std::set< PairNodes >::const_iterator itr = s.begin(); |
1093 |
|
itr != s.end(); ++itr) { |
1094 |
|
if((itr->first == n1 && itr->second == n2) || |
1095 |
|
(itr->first == n2 && itr->second == n1)) { |
1096 |
|
return true; |
1097 |
|
} |
1098 |
|
} |
1099 |
|
return false; |
1100 |
|
} |
1101 |
|
|
1102 |
654 |
bool RegExpOpr::containC2(unsigned cnt, Node n) { |
1103 |
654 |
if(n.getKind() == kind::REGEXP_RV) { |
1104 |
202 |
Assert(n[0].getConst<Rational>() <= Rational(String::maxSize())) |
1105 |
|
<< "Exceeded UINT32_MAX in RegExpOpr::containC2"; |
1106 |
202 |
unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt(); |
1107 |
202 |
return cnt == y; |
1108 |
452 |
} else if(n.getKind() == kind::REGEXP_CONCAT) { |
1109 |
102 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1110 |
80 |
if(containC2(cnt, n[i])) { |
1111 |
12 |
return true; |
1112 |
|
} |
1113 |
|
} |
1114 |
418 |
} else if(n.getKind() == kind::REGEXP_STAR) { |
1115 |
34 |
return containC2(cnt, n[0]); |
1116 |
384 |
} else if(n.getKind() == kind::REGEXP_LOOP) { |
1117 |
|
return containC2(cnt, n[0]); |
1118 |
384 |
} else if(n.getKind() == kind::REGEXP_UNION) { |
1119 |
118 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1120 |
88 |
if(containC2(cnt, n[i])) { |
1121 |
12 |
return true; |
1122 |
|
} |
1123 |
|
} |
1124 |
|
} |
1125 |
394 |
return false; |
1126 |
|
} |
1127 |
198 |
Node RegExpOpr::convert1(unsigned cnt, Node n) { |
1128 |
198 |
Trace("regexp-debug") << "Converting " << n << " at " << cnt << "... " << std::endl; |
1129 |
396 |
Node r1, r2; |
1130 |
198 |
convert2(cnt, n, r1, r2); |
1131 |
198 |
Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl; |
1132 |
198 |
Node ret = r1==d_emptySingleton ? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, |
1133 |
198 |
NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2); |
1134 |
198 |
ret = Rewriter::rewrite( ret ); |
1135 |
198 |
Trace("regexp-debug") << "... done convert at " << cnt << ", with return " << ret << std::endl; |
1136 |
396 |
return ret; |
1137 |
|
} |
1138 |
421 |
void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) { |
1139 |
421 |
if(n == d_emptyRegexp) { |
1140 |
24 |
r1 = d_emptyRegexp; |
1141 |
24 |
r2 = d_emptyRegexp; |
1142 |
24 |
return; |
1143 |
397 |
} else if(n == d_emptySingleton) { |
1144 |
25 |
r1 = d_emptySingleton; |
1145 |
25 |
r2 = d_emptySingleton; |
1146 |
|
} |
1147 |
397 |
Kind nk = n.getKind(); |
1148 |
397 |
if (nk == REGEXP_RV) |
1149 |
|
{ |
1150 |
61 |
Assert(n[0].getConst<Rational>() <= Rational(String::maxSize())) |
1151 |
|
<< "Exceeded UINT32_MAX in RegExpOpr::convert2"; |
1152 |
61 |
unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt(); |
1153 |
61 |
r1 = d_emptySingleton; |
1154 |
61 |
if(cnt == y) { |
1155 |
61 |
r2 = d_emptyRegexp; |
1156 |
|
} else { |
1157 |
|
r2 = n; |
1158 |
|
} |
1159 |
|
} |
1160 |
336 |
else if (nk == REGEXP_CONCAT) |
1161 |
|
{ |
1162 |
212 |
bool flag = true; |
1163 |
424 |
std::vector<Node> vr1, vr2; |
1164 |
591 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1165 |
452 |
if(containC2(cnt, n[i])) { |
1166 |
146 |
Node t1, t2; |
1167 |
73 |
convert2(cnt, n[i], t1, t2); |
1168 |
73 |
vr1.push_back(t1); |
1169 |
73 |
r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] : |
1170 |
|
NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1); |
1171 |
73 |
vr2.push_back(t2); |
1172 |
73 |
for( unsigned j=i+1; j<n.getNumChildren(); j++ ) { |
1173 |
|
vr2.push_back(n[j]); |
1174 |
|
} |
1175 |
73 |
r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] : |
1176 |
|
NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2); |
1177 |
73 |
flag = false; |
1178 |
73 |
break; |
1179 |
|
} else { |
1180 |
379 |
vr1.push_back(n[i]); |
1181 |
|
} |
1182 |
|
} |
1183 |
212 |
if(flag) { |
1184 |
139 |
r1 = d_emptySingleton; |
1185 |
139 |
r2 = n; |
1186 |
|
} |
1187 |
|
} |
1188 |
124 |
else if (nk == REGEXP_UNION) |
1189 |
|
{ |
1190 |
126 |
std::vector<Node> vr1, vr2; |
1191 |
213 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1192 |
300 |
Node t1, t2; |
1193 |
150 |
convert2(cnt, n[i], t1, t2); |
1194 |
150 |
vr1.push_back(t1); |
1195 |
150 |
vr2.push_back(t2); |
1196 |
|
} |
1197 |
63 |
r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1); |
1198 |
63 |
r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2); |
1199 |
|
} |
1200 |
61 |
else if (nk == STRING_TO_REGEXP || nk == REGEXP_SIGMA || nk == REGEXP_RANGE |
1201 |
|
|| nk == REGEXP_COMPLEMENT || nk == REGEXP_LOOP) |
1202 |
|
{ |
1203 |
|
// this leaves n unchanged |
1204 |
61 |
r1 = d_emptySingleton; |
1205 |
61 |
r2 = n; |
1206 |
|
} |
1207 |
|
else |
1208 |
|
{ |
1209 |
|
//is it possible? |
1210 |
|
Unreachable(); |
1211 |
|
} |
1212 |
|
} |
1213 |
|
|
1214 |
317 |
Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) { |
1215 |
|
//Assert(checkConstRegExp(r1) && checkConstRegExp(r2)); |
1216 |
317 |
if(r1 > r2) { |
1217 |
60 |
TNode tmpNode = r1; |
1218 |
30 |
r1 = r2; |
1219 |
30 |
r2 = tmpNode; |
1220 |
|
} |
1221 |
317 |
Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl; |
1222 |
634 |
std::pair < Node, Node > p(r1, r2); |
1223 |
317 |
std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p); |
1224 |
317 |
Node rNode; |
1225 |
317 |
if(itr != d_inter_cache.end()) { |
1226 |
38 |
rNode = itr->second; |
1227 |
|
} else { |
1228 |
279 |
Trace("regexp-int-debug") << " ... not in cache" << std::endl; |
1229 |
279 |
if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) { |
1230 |
|
Trace("regexp-int-debug") << " ... one is empty set" << std::endl; |
1231 |
|
rNode = d_emptyRegexp; |
1232 |
279 |
} else if(r1 == d_emptySingleton || r2 == d_emptySingleton) { |
1233 |
24 |
Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl; |
1234 |
48 |
Node exp; |
1235 |
24 |
int r = delta((r1 == d_emptySingleton ? r2 : r1), exp); |
1236 |
24 |
if(r == 0) { |
1237 |
|
//TODO: variable |
1238 |
|
Unreachable(); |
1239 |
24 |
} else if(r == 1) { |
1240 |
18 |
rNode = d_emptySingleton; |
1241 |
|
} else { |
1242 |
6 |
rNode = d_emptyRegexp; |
1243 |
|
} |
1244 |
255 |
} else if(r1 == r2) { |
1245 |
|
Trace("regexp-int-debug") << " ... equal" << std::endl; |
1246 |
|
rNode = r1; //convert1(cnt, r1); |
1247 |
|
} else { |
1248 |
255 |
Trace("regexp-int-debug") << " ... normal checking" << std::endl; |
1249 |
255 |
std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p); |
1250 |
255 |
if(itrcache != cache.end()) { |
1251 |
57 |
rNode = itrcache->second; |
1252 |
|
} else { |
1253 |
198 |
Trace("regexp-int-debug") << " ... normal without cache" << std::endl; |
1254 |
396 |
std::vector<unsigned> cset; |
1255 |
396 |
std::set<unsigned> cset1, cset2; |
1256 |
396 |
std::set< Node > vset1, vset2; |
1257 |
198 |
firstChars(r1, cset1, vset1); |
1258 |
198 |
firstChars(r2, cset2, vset2); |
1259 |
198 |
Trace("regexp-int-debug") << " ... got fset" << std::endl; |
1260 |
198 |
std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset2.end(), |
1261 |
198 |
std::inserter(cset, cset.begin())); |
1262 |
396 |
std::vector< Node > vec_nodes; |
1263 |
396 |
Node delta_exp; |
1264 |
198 |
Trace("regexp-int-debug") << " ... try delta" << std::endl; |
1265 |
198 |
int flag = delta(r1, delta_exp); |
1266 |
198 |
int flag2 = delta(r2, delta_exp); |
1267 |
198 |
Trace("regexp-int-debug") << " ... delta1=" << flag << ", delta2=" << flag2 << std::endl; |
1268 |
198 |
if(flag != 2 && flag2 != 2) { |
1269 |
21 |
if(flag == 1 && flag2 == 1) { |
1270 |
21 |
vec_nodes.push_back(d_emptySingleton); |
1271 |
|
} else { |
1272 |
|
//TODO: variable |
1273 |
|
Unreachable(); |
1274 |
|
} |
1275 |
|
} |
1276 |
198 |
if(Trace.isOn("regexp-int-debug")) { |
1277 |
|
Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {"; |
1278 |
|
for (std::vector<unsigned>::const_iterator it = cset.begin(); |
1279 |
|
it != cset.end(); |
1280 |
|
++it) |
1281 |
|
{ |
1282 |
|
if (it != cset.begin()) |
1283 |
|
{ |
1284 |
|
Trace("regexp-int-debug") << ", "; |
1285 |
|
} |
1286 |
|
Trace("regexp-int-debug") << (*it); |
1287 |
|
} |
1288 |
|
Trace("regexp-int-debug") << std::endl; |
1289 |
|
} |
1290 |
396 |
std::map< PairNodes, Node > cacheX; |
1291 |
454 |
for (std::vector<unsigned>::const_iterator it = cset.begin(); |
1292 |
454 |
it != cset.end(); |
1293 |
|
++it) |
1294 |
|
{ |
1295 |
512 |
std::vector<unsigned> cvec; |
1296 |
256 |
cvec.push_back(*it); |
1297 |
512 |
String c(cvec); |
1298 |
256 |
Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl; |
1299 |
512 |
Node r1l = derivativeSingle(r1, c); |
1300 |
512 |
Node r2l = derivativeSingle(r2, c); |
1301 |
256 |
Trace("regexp-int-debug") << " ... got partial(r1,c) = " << mkString(r1l) << std::endl; |
1302 |
256 |
Trace("regexp-int-debug") << " ... got partial(r2,c) = " << mkString(r2l) << std::endl; |
1303 |
512 |
Node rt; |
1304 |
|
|
1305 |
256 |
if(r1l > r2l) { |
1306 |
68 |
Node tnode = r1l; |
1307 |
34 |
r1l = r2l; r2l = tnode; |
1308 |
|
} |
1309 |
512 |
PairNodes pp(r1l, r2l); |
1310 |
256 |
std::map< PairNodes, Node >::const_iterator itr2 = cacheX.find(pp); |
1311 |
256 |
if(itr2 != cacheX.end()) { |
1312 |
8 |
rt = itr2->second; |
1313 |
|
} else { |
1314 |
496 |
std::map< PairNodes, Node > cache2(cache); |
1315 |
496 |
cache2[p] = NodeManager::currentNM()->mkNode( |
1316 |
|
kind::REGEXP_RV, |
1317 |
496 |
NodeManager::currentNM()->mkConst(cvc5::Rational(cnt))); |
1318 |
248 |
rt = intersectInternal(r1l, r2l, cache2, cnt+1); |
1319 |
248 |
cacheX[ pp ] = rt; |
1320 |
|
} |
1321 |
|
|
1322 |
768 |
rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, |
1323 |
512 |
NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) ); |
1324 |
|
|
1325 |
256 |
Trace("regexp-int-debug") << " ... got p(r1,c) && p(r2,c) = " << mkString(rt) << std::endl; |
1326 |
256 |
vec_nodes.push_back(rt); |
1327 |
|
} |
1328 |
198 |
rNode = Rewriter::rewrite( vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] : |
1329 |
|
NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes) ); |
1330 |
198 |
rNode = convert1(cnt, rNode); |
1331 |
198 |
rNode = Rewriter::rewrite( rNode ); |
1332 |
|
} |
1333 |
|
} |
1334 |
279 |
Trace("regexp-int-debug") << " ... try testing no RV of " << mkString(rNode) << std::endl; |
1335 |
279 |
if (!expr::hasSubtermKind(REGEXP_RV, rNode)) |
1336 |
|
{ |
1337 |
115 |
d_inter_cache[p] = rNode; |
1338 |
|
} |
1339 |
|
} |
1340 |
317 |
Trace("regexp-int") << "End(" << cnt << ") of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl; |
1341 |
634 |
return rNode; |
1342 |
|
} |
1343 |
|
|
1344 |
138 |
Node RegExpOpr::removeIntersection(Node r) { |
1345 |
138 |
Assert(checkConstRegExp(r)); |
1346 |
138 |
NodeManager* nm = NodeManager::currentNM(); |
1347 |
276 |
std::unordered_map<TNode, Node> visited; |
1348 |
138 |
std::unordered_map<TNode, Node>::iterator it; |
1349 |
276 |
std::vector<TNode> visit; |
1350 |
276 |
TNode cur; |
1351 |
138 |
visit.push_back(r); |
1352 |
1948 |
do |
1353 |
|
{ |
1354 |
2086 |
cur = visit.back(); |
1355 |
2086 |
visit.pop_back(); |
1356 |
2086 |
it = visited.find(cur); |
1357 |
|
|
1358 |
2086 |
if (it == visited.end()) |
1359 |
|
{ |
1360 |
986 |
visited[cur] = Node::null(); |
1361 |
986 |
visit.push_back(cur); |
1362 |
1948 |
for (const Node& cn : cur) |
1363 |
|
{ |
1364 |
962 |
visit.push_back(cn); |
1365 |
|
} |
1366 |
|
} |
1367 |
1100 |
else if (it->second.isNull()) |
1368 |
|
{ |
1369 |
986 |
Kind ck = cur.getKind(); |
1370 |
1972 |
Node ret; |
1371 |
986 |
bool childChanged = false; |
1372 |
1972 |
std::vector<Node> children; |
1373 |
986 |
if (cur.getMetaKind() == metakind::PARAMETERIZED) |
1374 |
|
{ |
1375 |
|
children.push_back(cur.getOperator()); |
1376 |
|
} |
1377 |
1948 |
for (const Node& cn : cur) |
1378 |
|
{ |
1379 |
962 |
it = visited.find(cn); |
1380 |
962 |
Assert(it != visited.end()); |
1381 |
962 |
Assert(!it->second.isNull()); |
1382 |
962 |
if (ck == REGEXP_INTER) |
1383 |
|
{ |
1384 |
4 |
if (ret.isNull()) |
1385 |
|
{ |
1386 |
2 |
ret = it->second; |
1387 |
|
} |
1388 |
|
else |
1389 |
|
{ |
1390 |
2 |
ret = intersect(ret, it->second); |
1391 |
|
} |
1392 |
|
} |
1393 |
|
else |
1394 |
|
{ |
1395 |
|
// will construct below |
1396 |
958 |
childChanged = childChanged || cn != it->second; |
1397 |
958 |
children.push_back(it->second); |
1398 |
|
} |
1399 |
|
} |
1400 |
986 |
if (ck != REGEXP_INTER) |
1401 |
|
{ |
1402 |
984 |
if (childChanged) |
1403 |
|
{ |
1404 |
2 |
ret = nm->mkNode(cur.getKind(), children); |
1405 |
|
} |
1406 |
|
else |
1407 |
|
{ |
1408 |
982 |
ret = cur; |
1409 |
|
} |
1410 |
|
} |
1411 |
986 |
visited[cur] = ret; |
1412 |
|
} |
1413 |
2086 |
} while (!visit.empty()); |
1414 |
138 |
Assert(visited.find(r) != visited.end()); |
1415 |
138 |
Assert(!visited.find(r)->second.isNull()); |
1416 |
138 |
if (Trace.isOn("regexp-intersect")) |
1417 |
|
{ |
1418 |
|
Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r) |
1419 |
|
<< " ) = " << mkString(visited[r]) << std::endl; |
1420 |
|
} |
1421 |
276 |
return visited[r]; |
1422 |
|
} |
1423 |
|
|
1424 |
69 |
Node RegExpOpr::intersect(Node r1, Node r2) |
1425 |
|
{ |
1426 |
69 |
if (!checkConstRegExp(r1) || !checkConstRegExp(r2)) |
1427 |
|
{ |
1428 |
|
return Node::null(); |
1429 |
|
} |
1430 |
138 |
Node rr1 = removeIntersection(r1); |
1431 |
138 |
Node rr2 = removeIntersection(r2); |
1432 |
138 |
std::map<PairNodes, Node> cache; |
1433 |
69 |
Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl; |
1434 |
69 |
Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl; |
1435 |
138 |
Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1) |
1436 |
69 |
<< ",\n\t" << mkString(r2) << ")" << std::endl; |
1437 |
138 |
Node retNode = intersectInternal(rr1, rr2, cache, 1); |
1438 |
138 |
Trace("regexp-intersect") |
1439 |
138 |
<< "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t" << mkString(r2) |
1440 |
69 |
<< ") =\n\t" << mkString(retNode) << std::endl; |
1441 |
69 |
Trace("regexp-intersect-node") << "Intersect finished." << std::endl; |
1442 |
69 |
return retNode; |
1443 |
|
} |
1444 |
|
|
1445 |
|
//printing |
1446 |
19497 |
std::string RegExpOpr::niceChar(Node r) { |
1447 |
19497 |
if(r.isConst()) { |
1448 |
38766 |
std::string s = r.getConst<String>().toString(); |
1449 |
19383 |
return s == "." ? "\\." : s; |
1450 |
|
} else { |
1451 |
228 |
std::string ss = "$" + r.toString(); |
1452 |
114 |
return ss; |
1453 |
|
} |
1454 |
|
} |
1455 |
36731 |
std::string RegExpOpr::mkString( Node r ) { |
1456 |
36731 |
std::string retStr; |
1457 |
36731 |
if(r.isNull()) { |
1458 |
|
retStr = "\\E"; |
1459 |
|
} else { |
1460 |
36731 |
int k = r.getKind(); |
1461 |
36731 |
switch( k ) { |
1462 |
472 |
case kind::REGEXP_EMPTY: { |
1463 |
472 |
retStr += "\\E"; |
1464 |
472 |
break; |
1465 |
|
} |
1466 |
326 |
case kind::REGEXP_SIGMA: { |
1467 |
326 |
retStr += "."; |
1468 |
326 |
break; |
1469 |
|
} |
1470 |
18387 |
case kind::STRING_TO_REGEXP: { |
1471 |
36774 |
std::string tmp( niceChar( r[0] ) ); |
1472 |
18387 |
retStr += tmp.size()==1? tmp : "(" + tmp + ")"; |
1473 |
18387 |
break; |
1474 |
|
} |
1475 |
5450 |
case kind::REGEXP_CONCAT: { |
1476 |
5450 |
retStr += "("; |
1477 |
18827 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
1478 |
|
//if(i != 0) retStr += "."; |
1479 |
13377 |
retStr += mkString( r[i] ); |
1480 |
|
} |
1481 |
5450 |
retStr += ")"; |
1482 |
5450 |
break; |
1483 |
|
} |
1484 |
3894 |
case kind::REGEXP_UNION: { |
1485 |
3894 |
retStr += "("; |
1486 |
12404 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
1487 |
8510 |
if(i != 0) retStr += "|"; |
1488 |
8510 |
retStr += mkString( r[i] ); |
1489 |
|
} |
1490 |
3894 |
retStr += ")"; |
1491 |
3894 |
break; |
1492 |
|
} |
1493 |
52 |
case kind::REGEXP_INTER: { |
1494 |
52 |
retStr += "("; |
1495 |
156 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
1496 |
104 |
if(i != 0) retStr += "&"; |
1497 |
104 |
retStr += mkString( r[i] ); |
1498 |
|
} |
1499 |
52 |
retStr += ")"; |
1500 |
52 |
break; |
1501 |
|
} |
1502 |
7006 |
case kind::REGEXP_STAR: { |
1503 |
7006 |
retStr += mkString( r[0] ); |
1504 |
7006 |
retStr += "*"; |
1505 |
7006 |
break; |
1506 |
|
} |
1507 |
|
case kind::REGEXP_PLUS: { |
1508 |
|
retStr += mkString( r[0] ); |
1509 |
|
retStr += "+"; |
1510 |
|
break; |
1511 |
|
} |
1512 |
|
case kind::REGEXP_OPT: { |
1513 |
|
retStr += mkString( r[0] ); |
1514 |
|
retStr += "?"; |
1515 |
|
break; |
1516 |
|
} |
1517 |
555 |
case kind::REGEXP_RANGE: { |
1518 |
555 |
retStr += "["; |
1519 |
555 |
retStr += niceChar( r[0] ); |
1520 |
555 |
retStr += "-"; |
1521 |
555 |
retStr += niceChar( r[1] ); |
1522 |
555 |
retStr += "]"; |
1523 |
555 |
break; |
1524 |
|
} |
1525 |
|
case kind::REGEXP_LOOP: { |
1526 |
|
uint32_t l = utils::getLoopMinOccurrences(r); |
1527 |
|
std::stringstream ss; |
1528 |
|
ss << "(" << mkString(r[0]) << "){" << l << ","; |
1529 |
|
if(r.getNumChildren() == 3) { |
1530 |
|
uint32_t u = utils::getLoopMaxOccurrences(r); |
1531 |
|
ss << u; |
1532 |
|
} |
1533 |
|
ss << "}"; |
1534 |
|
retStr += ss.str(); |
1535 |
|
break; |
1536 |
|
} |
1537 |
532 |
case kind::REGEXP_RV: { |
1538 |
532 |
retStr += "<"; |
1539 |
532 |
retStr += r[0].getConst<Rational>().getNumerator().toString(); |
1540 |
532 |
retStr += ">"; |
1541 |
532 |
break; |
1542 |
|
} |
1543 |
54 |
case REGEXP_COMPLEMENT: |
1544 |
|
{ |
1545 |
54 |
retStr += "^("; |
1546 |
54 |
retStr += mkString(r[0]); |
1547 |
54 |
retStr += ")"; |
1548 |
54 |
break; |
1549 |
|
} |
1550 |
3 |
default: |
1551 |
|
{ |
1552 |
6 |
std::stringstream ss; |
1553 |
3 |
ss << r; |
1554 |
3 |
retStr = ss.str(); |
1555 |
3 |
Assert(!utils::isRegExpKind(r.getKind())); |
1556 |
3 |
break; |
1557 |
|
} |
1558 |
|
} |
1559 |
|
} |
1560 |
|
|
1561 |
36731 |
return retStr; |
1562 |
|
} |
1563 |
|
|
1564 |
1240 |
bool RegExpOpr::regExpIncludes(Node r1, Node r2) |
1565 |
|
{ |
1566 |
1240 |
const auto& it = d_inclusionCache.find(std::make_pair(r1, r2)); |
1567 |
1240 |
if (it != d_inclusionCache.end()) |
1568 |
|
{ |
1569 |
682 |
return (*it).second; |
1570 |
|
} |
1571 |
558 |
bool result = RegExpEntail::regExpIncludes(r1, r2); |
1572 |
558 |
d_inclusionCache[std::make_pair(r1, r2)] = result; |
1573 |
558 |
return result; |
1574 |
|
} |
1575 |
|
|
1576 |
|
} // namespace strings |
1577 |
|
} // namespace theory |
1578 |
29502 |
} // namespace cvc5 |