1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Tianyi Liang, Andrew Reynolds, Mathias Preiner |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Symbolic Regular Expresion Operations |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/regexp_operation.h" |
17 |
|
|
18 |
|
#include <sstream> |
19 |
|
|
20 |
|
#include "expr/node_algorithm.h" |
21 |
|
#include "options/strings_options.h" |
22 |
|
#include "theory/rewriter.h" |
23 |
|
#include "theory/strings/regexp_entail.h" |
24 |
|
#include "theory/strings/theory_strings_utils.h" |
25 |
|
#include "theory/strings/word.h" |
26 |
|
|
27 |
|
using namespace cvc5::kind; |
28 |
|
|
29 |
|
namespace cvc5 { |
30 |
|
namespace theory { |
31 |
|
namespace strings { |
32 |
|
|
33 |
9463 |
RegExpOpr::RegExpOpr(SkolemCache* sc) |
34 |
|
: d_true(NodeManager::currentNM()->mkConst(true)), |
35 |
|
d_false(NodeManager::currentNM()->mkConst(false)), |
36 |
|
d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY, |
37 |
18926 |
std::vector<Node>{})), |
38 |
18926 |
d_zero(NodeManager::currentNM()->mkConst(::cvc5::Rational(0))), |
39 |
18926 |
d_one(NodeManager::currentNM()->mkConst(::cvc5::Rational(1))), |
40 |
|
d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA, |
41 |
18926 |
std::vector<Node>{})), |
42 |
|
d_sigma_star( |
43 |
|
NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)), |
44 |
85167 |
d_sc(sc) |
45 |
|
{ |
46 |
9463 |
d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType()); |
47 |
|
|
48 |
9463 |
d_emptySingleton = |
49 |
18926 |
NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString); |
50 |
9463 |
d_lastchar = utils::getAlphabetCardinality() - 1; |
51 |
9463 |
} |
52 |
|
|
53 |
9463 |
RegExpOpr::~RegExpOpr() {} |
54 |
|
|
55 |
688 |
bool RegExpOpr::checkConstRegExp( Node r ) { |
56 |
688 |
Assert(r.getType().isRegExp()); |
57 |
1376 |
Trace("strings-regexp-cstre") |
58 |
688 |
<< "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl; |
59 |
688 |
RegExpConstType rct = getRegExpConstType(r); |
60 |
688 |
return rct != RE_C_VARIABLE; |
61 |
|
} |
62 |
|
|
63 |
1257 |
RegExpConstType RegExpOpr::getRegExpConstType(Node r) |
64 |
|
{ |
65 |
1257 |
Assert(r.getType().isRegExp()); |
66 |
1257 |
std::unordered_map<Node, RegExpConstType>::iterator it; |
67 |
2514 |
std::vector<TNode> visit; |
68 |
2514 |
TNode cur; |
69 |
1257 |
visit.push_back(r); |
70 |
1505 |
do |
71 |
|
{ |
72 |
2762 |
cur = visit.back(); |
73 |
2762 |
visit.pop_back(); |
74 |
2762 |
it = d_constCache.find(cur); |
75 |
|
|
76 |
2762 |
Kind ck = cur.getKind(); |
77 |
2762 |
if (it == d_constCache.end()) |
78 |
|
{ |
79 |
874 |
if (ck == STRING_TO_REGEXP) |
80 |
|
{ |
81 |
548 |
Node tmp = Rewriter::rewrite(cur[0]); |
82 |
274 |
d_constCache[cur] = |
83 |
274 |
tmp.isConst() ? RE_C_CONRETE_CONSTANT : RE_C_VARIABLE; |
84 |
|
} |
85 |
600 |
else if (ck == REGEXP_SIGMA || ck == REGEXP_RANGE) |
86 |
|
{ |
87 |
88 |
d_constCache[cur] = RE_C_CONSTANT; |
88 |
|
} |
89 |
512 |
else if (!utils::isRegExpKind(ck)) |
90 |
|
{ |
91 |
|
// non-regular expression applications, e.g. function applications |
92 |
|
// with regular expression return type are treated as variables. |
93 |
2 |
d_constCache[cur] = RE_C_VARIABLE; |
94 |
|
} |
95 |
|
else |
96 |
|
{ |
97 |
510 |
d_constCache[cur] = RE_C_UNKNOWN; |
98 |
510 |
visit.push_back(cur); |
99 |
510 |
visit.insert(visit.end(), cur.begin(), cur.end()); |
100 |
|
} |
101 |
|
} |
102 |
1888 |
else if (it->second == RE_C_UNKNOWN) |
103 |
|
{ |
104 |
510 |
RegExpConstType ret = ck == REGEXP_COMPLEMENT ? RE_C_CONSTANT : RE_C_CONRETE_CONSTANT; |
105 |
1505 |
for (const Node& cn : cur) |
106 |
|
{ |
107 |
995 |
it = d_constCache.find(cn); |
108 |
995 |
Assert(it != d_constCache.end()); |
109 |
995 |
if (it->second > ret) |
110 |
|
{ |
111 |
252 |
ret = it->second; |
112 |
|
} |
113 |
|
} |
114 |
510 |
d_constCache[cur] = ret; |
115 |
|
} |
116 |
2762 |
} while (!visit.empty()); |
117 |
1257 |
Assert(d_constCache.find(r) != d_constCache.end()); |
118 |
2514 |
return d_constCache[r]; |
119 |
|
} |
120 |
|
|
121 |
|
// 0-unknown, 1-yes, 2-no |
122 |
1311 |
int RegExpOpr::delta( Node r, Node &exp ) { |
123 |
|
std::map<Node, std::pair<int, Node> >::const_iterator itd = |
124 |
1311 |
d_delta_cache.find(r); |
125 |
1311 |
if (itd != d_delta_cache.end()) |
126 |
|
{ |
127 |
|
// already computed |
128 |
803 |
exp = itd->second.second; |
129 |
803 |
return itd->second.first; |
130 |
|
} |
131 |
508 |
Trace("regexp-delta") << "RegExpOpr::delta: " << r << std::endl; |
132 |
508 |
int ret = 0; |
133 |
508 |
NodeManager* nm = NodeManager::currentNM(); |
134 |
508 |
Kind k = r.getKind(); |
135 |
508 |
switch (k) |
136 |
|
{ |
137 |
|
case REGEXP_EMPTY: |
138 |
|
case REGEXP_SIGMA: |
139 |
|
case REGEXP_RANGE: |
140 |
|
{ |
141 |
|
// does not contain empty string |
142 |
|
ret = 2; |
143 |
|
break; |
144 |
|
} |
145 |
178 |
case STRING_TO_REGEXP: |
146 |
|
{ |
147 |
356 |
Node tmp = Rewriter::rewrite(r[0]); |
148 |
178 |
if (tmp.isConst()) |
149 |
|
{ |
150 |
174 |
if (tmp == d_emptyString) |
151 |
|
{ |
152 |
10 |
ret = 1; |
153 |
|
} else { |
154 |
164 |
ret = 2; |
155 |
|
} |
156 |
|
} |
157 |
|
else |
158 |
|
{ |
159 |
4 |
ret = 0; |
160 |
4 |
if (tmp.getKind() == STRING_CONCAT) |
161 |
|
{ |
162 |
|
for (const Node& tmpc : tmp) |
163 |
|
{ |
164 |
|
if (tmpc.isConst()) |
165 |
|
{ |
166 |
|
ret = 2; |
167 |
|
break; |
168 |
|
} |
169 |
|
} |
170 |
|
} |
171 |
4 |
if (ret == 0) |
172 |
|
{ |
173 |
4 |
exp = r[0].eqNode(d_emptyString); |
174 |
|
} |
175 |
|
} |
176 |
178 |
break; |
177 |
|
} |
178 |
253 |
case REGEXP_CONCAT: |
179 |
|
case REGEXP_UNION: |
180 |
|
case REGEXP_INTER: |
181 |
|
{ |
182 |
|
// has there been an unknown child? |
183 |
253 |
bool hasUnknownChild = false; |
184 |
506 |
std::vector<Node> vec; |
185 |
253 |
int checkTmp = k == REGEXP_UNION ? 1 : 2; |
186 |
253 |
int retTmp = k == REGEXP_UNION ? 2 : 1; |
187 |
333 |
for (const Node& rc : r) |
188 |
|
{ |
189 |
397 |
Node exp2; |
190 |
317 |
int tmp = delta(rc, exp2); |
191 |
317 |
if (tmp == checkTmp) |
192 |
|
{ |
193 |
|
// return is implied by the child's return value |
194 |
237 |
ret = checkTmp; |
195 |
237 |
break; |
196 |
|
} |
197 |
80 |
else if (tmp == 0) |
198 |
|
{ |
199 |
|
// unknown if child contains empty string |
200 |
4 |
Assert(!exp2.isNull()); |
201 |
4 |
vec.push_back(exp2); |
202 |
4 |
hasUnknownChild = true; |
203 |
|
} |
204 |
|
} |
205 |
253 |
if (ret != checkTmp) |
206 |
|
{ |
207 |
16 |
if (!hasUnknownChild) |
208 |
|
{ |
209 |
16 |
ret = retTmp; |
210 |
|
} else { |
211 |
|
Kind kr = k == REGEXP_UNION ? OR : AND; |
212 |
|
exp = vec.size() == 1 ? vec[0] : nm->mkNode(kr, vec); |
213 |
|
} |
214 |
|
} |
215 |
253 |
break; |
216 |
|
} |
217 |
73 |
case REGEXP_STAR: |
218 |
|
case REGEXP_OPT: |
219 |
|
{ |
220 |
|
// contains empty string |
221 |
73 |
ret = 1; |
222 |
73 |
break; |
223 |
|
} |
224 |
|
case REGEXP_PLUS: |
225 |
|
{ |
226 |
|
ret = delta(r[0], exp); |
227 |
|
break; |
228 |
|
} |
229 |
|
case REGEXP_LOOP: |
230 |
|
{ |
231 |
|
uint32_t lo = utils::getLoopMinOccurrences(r); |
232 |
|
if (lo == 0) |
233 |
|
{ |
234 |
|
ret = 1; |
235 |
|
} |
236 |
|
else |
237 |
|
{ |
238 |
|
ret = delta(r[0], exp); |
239 |
|
} |
240 |
|
break; |
241 |
|
} |
242 |
4 |
case REGEXP_COMPLEMENT: |
243 |
|
{ |
244 |
4 |
int tmp = delta(r[0], exp); |
245 |
|
// flip the result if known |
246 |
4 |
ret = tmp == 0 ? 0 : (3 - tmp); |
247 |
4 |
exp = exp.isNull() ? exp : exp.negate(); |
248 |
4 |
break; |
249 |
|
} |
250 |
|
default: |
251 |
|
{ |
252 |
|
Assert(!utils::isRegExpKind(k)); |
253 |
|
break; |
254 |
|
} |
255 |
|
} |
256 |
508 |
if (!exp.isNull()) |
257 |
|
{ |
258 |
4 |
exp = Rewriter::rewrite(exp); |
259 |
|
} |
260 |
1016 |
std::pair<int, Node> p(ret, exp); |
261 |
508 |
d_delta_cache[r] = p; |
262 |
1016 |
Trace("regexp-delta") << "RegExpOpr::delta returns " << ret << " for " << r |
263 |
508 |
<< ", expr = " << exp << std::endl; |
264 |
508 |
return ret; |
265 |
|
} |
266 |
|
|
267 |
|
// 0-unknown, 1-yes, 2-no |
268 |
24 |
int RegExpOpr::derivativeS(Node r, cvc5::String c, Node& retNode) |
269 |
|
{ |
270 |
24 |
Assert(c.size() < 2); |
271 |
24 |
Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl; |
272 |
|
|
273 |
24 |
int ret = 1; |
274 |
24 |
retNode = d_emptyRegexp; |
275 |
24 |
NodeManager* nm = NodeManager::currentNM(); |
276 |
24 |
SkolemManager* sm = nm->getSkolemManager(); |
277 |
|
|
278 |
48 |
PairNodeStr dv = std::make_pair( r, c ); |
279 |
24 |
if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) { |
280 |
4 |
retNode = d_deriv_cache[dv].first; |
281 |
4 |
ret = d_deriv_cache[dv].second; |
282 |
|
} |
283 |
20 |
else if (c.empty()) |
284 |
|
{ |
285 |
|
Node expNode; |
286 |
|
ret = delta( r, expNode ); |
287 |
|
if(ret == 0) { |
288 |
|
retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp); |
289 |
|
} else if(ret == 1) { |
290 |
|
retNode = r; |
291 |
|
} |
292 |
|
std::pair< Node, int > p(retNode, ret); |
293 |
|
d_deriv_cache[dv] = p; |
294 |
|
} else { |
295 |
20 |
switch( r.getKind() ) { |
296 |
|
case kind::REGEXP_EMPTY: { |
297 |
|
ret = 2; |
298 |
|
break; |
299 |
|
} |
300 |
|
case kind::REGEXP_SIGMA: { |
301 |
|
retNode = d_emptySingleton; |
302 |
|
break; |
303 |
|
} |
304 |
|
case kind::REGEXP_RANGE: { |
305 |
|
cvc5::String a = r[0].getConst<String>(); |
306 |
|
cvc5::String b = r[1].getConst<String>(); |
307 |
|
retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp; |
308 |
|
break; |
309 |
|
} |
310 |
8 |
case kind::STRING_TO_REGEXP: { |
311 |
16 |
Node tmp = Rewriter::rewrite(r[0]); |
312 |
8 |
if(tmp.isConst()) { |
313 |
8 |
if(tmp == d_emptyString) { |
314 |
4 |
ret = 2; |
315 |
|
} else { |
316 |
4 |
if (tmp.getConst<String>().front() == c.front()) |
317 |
|
{ |
318 |
|
retNode = |
319 |
|
nm->mkNode(STRING_TO_REGEXP, |
320 |
|
Word::getLength(tmp) == 1 ? d_emptyString |
321 |
|
: Word::substr(tmp, 1)); |
322 |
|
} else { |
323 |
4 |
ret = 2; |
324 |
|
} |
325 |
|
} |
326 |
|
} else { |
327 |
|
ret = 0; |
328 |
|
Node rest; |
329 |
|
if(tmp.getKind() == kind::STRING_CONCAT) { |
330 |
|
Node t2 = tmp[0]; |
331 |
|
if(t2.isConst()) { |
332 |
|
if (t2.getConst<String>().front() == c.front()) |
333 |
|
{ |
334 |
|
Node n = nm->mkNode(STRING_TO_REGEXP, |
335 |
|
Word::getLength(tmp) == 1 |
336 |
|
? d_emptyString |
337 |
|
: Word::substr(tmp, 1)); |
338 |
|
std::vector< Node > vec_nodes; |
339 |
|
vec_nodes.push_back(n); |
340 |
|
for(unsigned i=1; i<tmp.getNumChildren(); i++) { |
341 |
|
vec_nodes.push_back(tmp[i]); |
342 |
|
} |
343 |
|
retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes); |
344 |
|
ret = 1; |
345 |
|
} else { |
346 |
|
ret = 2; |
347 |
|
} |
348 |
|
} else { |
349 |
|
tmp = tmp[0]; |
350 |
|
std::vector< Node > vec_nodes; |
351 |
|
for(unsigned i=1; i<tmp.getNumChildren(); i++) { |
352 |
|
vec_nodes.push_back(tmp[i]); |
353 |
|
} |
354 |
|
rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes); |
355 |
|
} |
356 |
|
} |
357 |
|
if(ret == 0) { |
358 |
|
Node sk = |
359 |
|
sm->mkDummySkolem("rsp", nm->stringType(), "Split RegExp"); |
360 |
|
retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk); |
361 |
|
if(!rest.isNull()) { |
362 |
|
retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest)); |
363 |
|
} |
364 |
|
Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT, |
365 |
|
NodeManager::currentNM()->mkConst(c), sk)); |
366 |
|
retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp)); |
367 |
|
} |
368 |
|
} |
369 |
8 |
break; |
370 |
|
} |
371 |
4 |
case kind::REGEXP_CONCAT: { |
372 |
8 |
std::vector< Node > vec_nodes; |
373 |
8 |
std::vector< Node > delta_nodes; |
374 |
8 |
Node dnode = d_true; |
375 |
12 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
376 |
16 |
Node dc; |
377 |
16 |
Node exp2; |
378 |
8 |
int rt = derivativeS(r[i], c, dc); |
379 |
8 |
if(rt != 2) { |
380 |
|
if(rt == 0) { |
381 |
|
ret = 0; |
382 |
|
} |
383 |
|
std::vector< Node > vec_nodes2; |
384 |
|
if(dc != d_emptySingleton) { |
385 |
|
vec_nodes2.push_back( dc ); |
386 |
|
} |
387 |
|
for(unsigned j=i+1; j<r.getNumChildren(); ++j) { |
388 |
|
if(r[j] != d_emptySingleton) { |
389 |
|
vec_nodes2.push_back( r[j] ); |
390 |
|
} |
391 |
|
} |
392 |
|
Node tmp = vec_nodes2.size()==0 ? d_emptySingleton : |
393 |
|
vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 ); |
394 |
|
if(dnode != d_true) { |
395 |
|
tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp)); |
396 |
|
ret = 0; |
397 |
|
} |
398 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) { |
399 |
|
vec_nodes.push_back( tmp ); |
400 |
|
} |
401 |
|
} |
402 |
16 |
Node exp3; |
403 |
8 |
int rt2 = delta( r[i], exp3 ); |
404 |
8 |
if( rt2 == 0 ) { |
405 |
|
dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3)); |
406 |
8 |
} else if( rt2 == 2 ) { |
407 |
|
break; |
408 |
|
} |
409 |
|
} |
410 |
4 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
411 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
412 |
4 |
if(retNode == d_emptyRegexp) { |
413 |
4 |
ret = 2; |
414 |
|
} |
415 |
4 |
break; |
416 |
|
} |
417 |
4 |
case kind::REGEXP_UNION: { |
418 |
8 |
std::vector< Node > vec_nodes; |
419 |
12 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
420 |
16 |
Node dc; |
421 |
8 |
int rt = derivativeS(r[i], c, dc); |
422 |
8 |
if(rt == 0) { |
423 |
|
ret = 0; |
424 |
|
} |
425 |
8 |
if(rt != 2) { |
426 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
427 |
|
vec_nodes.push_back( dc ); |
428 |
|
} |
429 |
|
} |
430 |
|
//Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] " << mkString(r[i]) << " returns " << mkString(dc) << std::endl; |
431 |
|
} |
432 |
4 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
433 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
434 |
4 |
if(retNode == d_emptyRegexp) { |
435 |
4 |
ret = 2; |
436 |
|
} |
437 |
4 |
break; |
438 |
|
} |
439 |
|
case kind::REGEXP_INTER: { |
440 |
|
bool flag = true; |
441 |
|
bool flag_sg = false; |
442 |
|
std::vector< Node > vec_nodes; |
443 |
|
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
444 |
|
Node dc; |
445 |
|
int rt = derivativeS(r[i], c, dc); |
446 |
|
if(rt == 0) { |
447 |
|
ret = 0; |
448 |
|
} else if(rt == 2) { |
449 |
|
flag = false; |
450 |
|
break; |
451 |
|
} |
452 |
|
if(dc == d_sigma_star) { |
453 |
|
flag_sg = true; |
454 |
|
} else { |
455 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
456 |
|
vec_nodes.push_back( dc ); |
457 |
|
} |
458 |
|
} |
459 |
|
} |
460 |
|
if(flag) { |
461 |
|
if(vec_nodes.size() == 0 && flag_sg) { |
462 |
|
retNode = d_sigma_star; |
463 |
|
} else { |
464 |
|
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
465 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) ); |
466 |
|
if(retNode == d_emptyRegexp) { |
467 |
|
ret = 2; |
468 |
|
} |
469 |
|
} |
470 |
|
} else { |
471 |
|
retNode = d_emptyRegexp; |
472 |
|
ret = 2; |
473 |
|
} |
474 |
|
break; |
475 |
|
} |
476 |
4 |
case kind::REGEXP_STAR: { |
477 |
8 |
Node dc; |
478 |
4 |
ret = derivativeS(r[0], c, dc); |
479 |
4 |
retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r )); |
480 |
4 |
break; |
481 |
|
} |
482 |
|
case kind::REGEXP_LOOP: { |
483 |
|
uint32_t l = utils::getLoopMinOccurrences(r); |
484 |
|
uint32_t u = utils::getLoopMaxOccurrences(r); |
485 |
|
if (l == u && l == 0) |
486 |
|
{ |
487 |
|
ret = 2; |
488 |
|
//retNode = d_emptyRegexp; |
489 |
|
} else { |
490 |
|
Node dc; |
491 |
|
ret = derivativeS(r[0], c, dc); |
492 |
|
if(dc==d_emptyRegexp) { |
493 |
|
Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1)); |
494 |
|
Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]); |
495 |
|
retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); |
496 |
|
} else { |
497 |
|
retNode = d_emptyRegexp; |
498 |
|
} |
499 |
|
} |
500 |
|
break; |
501 |
|
} |
502 |
|
case kind::REGEXP_COMPLEMENT: |
503 |
|
{ |
504 |
|
// don't know result |
505 |
|
return 0; |
506 |
|
break; |
507 |
|
} |
508 |
|
default: { |
509 |
|
Assert(!utils::isRegExpKind(r.getKind())); |
510 |
|
return 0; |
511 |
|
break; |
512 |
|
} |
513 |
|
} |
514 |
20 |
if(retNode != d_emptyRegexp) { |
515 |
|
retNode = Rewriter::rewrite( retNode ); |
516 |
|
} |
517 |
40 |
std::pair< Node, int > p(retNode, ret); |
518 |
20 |
d_deriv_cache[dv] = p; |
519 |
|
} |
520 |
|
|
521 |
24 |
Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl; |
522 |
24 |
return ret; |
523 |
|
} |
524 |
|
|
525 |
970 |
Node RegExpOpr::derivativeSingle(Node r, cvc5::String c) |
526 |
|
{ |
527 |
970 |
Assert(c.size() < 2); |
528 |
970 |
Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl; |
529 |
970 |
Node retNode = d_emptyRegexp; |
530 |
1940 |
PairNodeStr dv = std::make_pair( r, c ); |
531 |
970 |
NodeManager* nm = NodeManager::currentNM(); |
532 |
970 |
if( d_dv_cache.find( dv ) != d_dv_cache.end() ) { |
533 |
290 |
retNode = d_dv_cache[dv]; |
534 |
|
} |
535 |
680 |
else if (c.empty()) |
536 |
|
{ |
537 |
|
Node exp; |
538 |
|
int tmp = delta( r, exp ); |
539 |
|
if(tmp == 0) { |
540 |
|
// TODO variable |
541 |
|
retNode = d_emptyRegexp; |
542 |
|
} else if(tmp == 1) { |
543 |
|
retNode = r; |
544 |
|
} else { |
545 |
|
retNode = d_emptyRegexp; |
546 |
|
} |
547 |
|
} else { |
548 |
680 |
Kind k = r.getKind(); |
549 |
680 |
switch( k ) { |
550 |
|
case kind::REGEXP_EMPTY: { |
551 |
|
retNode = d_emptyRegexp; |
552 |
|
break; |
553 |
|
} |
554 |
|
case kind::REGEXP_SIGMA: { |
555 |
|
retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ); |
556 |
|
break; |
557 |
|
} |
558 |
|
case kind::REGEXP_RANGE: { |
559 |
|
cvc5::String a = r[0].getConst<String>(); |
560 |
|
cvc5::String b = r[1].getConst<String>(); |
561 |
|
retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp; |
562 |
|
break; |
563 |
|
} |
564 |
252 |
case kind::STRING_TO_REGEXP: { |
565 |
252 |
if(r[0].isConst()) { |
566 |
252 |
if(r[0] == d_emptyString) { |
567 |
8 |
retNode = d_emptyRegexp; |
568 |
|
} else { |
569 |
244 |
if (r[0].getConst<String>().front() == c.front()) |
570 |
|
{ |
571 |
188 |
retNode = nm->mkNode(STRING_TO_REGEXP, |
572 |
376 |
Word::getLength(r[0]) == 1 |
573 |
376 |
? d_emptyString |
574 |
|
: Word::substr(r[0], 1)); |
575 |
|
} else { |
576 |
56 |
retNode = d_emptyRegexp; |
577 |
|
} |
578 |
|
} |
579 |
|
} else { |
580 |
|
// TODO variable |
581 |
|
retNode = d_emptyRegexp; |
582 |
|
} |
583 |
252 |
break; |
584 |
|
} |
585 |
269 |
case kind::REGEXP_CONCAT: { |
586 |
538 |
Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ); |
587 |
538 |
std::vector< Node > vec_nodes; |
588 |
357 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
589 |
435 |
Node dc = derivativeSingle(r[i], c); |
590 |
347 |
if(dc != d_emptyRegexp) { |
591 |
550 |
std::vector< Node > vec_nodes2; |
592 |
275 |
if(dc != rees) { |
593 |
196 |
vec_nodes2.push_back( dc ); |
594 |
|
} |
595 |
554 |
for(unsigned j=i+1; j<r.getNumChildren(); ++j) { |
596 |
279 |
if(r[j] != rees) { |
597 |
279 |
vec_nodes2.push_back( r[j] ); |
598 |
|
} |
599 |
|
} |
600 |
275 |
Node tmp = vec_nodes2.size()==0 ? rees : |
601 |
550 |
vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 ); |
602 |
275 |
if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) { |
603 |
275 |
vec_nodes.push_back( tmp ); |
604 |
|
} |
605 |
|
} |
606 |
435 |
Node exp; |
607 |
347 |
if( delta( r[i], exp ) != 1 ) { |
608 |
259 |
break; |
609 |
|
} |
610 |
|
} |
611 |
771 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
612 |
502 |
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
613 |
269 |
break; |
614 |
|
} |
615 |
50 |
case kind::REGEXP_UNION: { |
616 |
100 |
std::vector< Node > vec_nodes; |
617 |
168 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
618 |
236 |
Node dc = derivativeSingle(r[i], c); |
619 |
118 |
if(dc != d_emptyRegexp) { |
620 |
52 |
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
621 |
50 |
vec_nodes.push_back( dc ); |
622 |
|
} |
623 |
|
} |
624 |
|
//Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl; |
625 |
|
} |
626 |
150 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
627 |
100 |
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
628 |
50 |
break; |
629 |
|
} |
630 |
|
case kind::REGEXP_INTER: { |
631 |
|
bool flag = true; |
632 |
|
bool flag_sg = false; |
633 |
|
std::vector< Node > vec_nodes; |
634 |
|
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
635 |
|
Node dc = derivativeSingle(r[i], c); |
636 |
|
if(dc != d_emptyRegexp) { |
637 |
|
if(dc == d_sigma_star) { |
638 |
|
flag_sg = true; |
639 |
|
} else { |
640 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
641 |
|
vec_nodes.push_back( dc ); |
642 |
|
} |
643 |
|
} |
644 |
|
} else { |
645 |
|
flag = false; |
646 |
|
break; |
647 |
|
} |
648 |
|
} |
649 |
|
if(flag) { |
650 |
|
if(vec_nodes.size() == 0 && flag_sg) { |
651 |
|
retNode = d_sigma_star; |
652 |
|
} else { |
653 |
|
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
654 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) ); |
655 |
|
} |
656 |
|
} else { |
657 |
|
retNode = d_emptyRegexp; |
658 |
|
} |
659 |
|
break; |
660 |
|
} |
661 |
109 |
case kind::REGEXP_STAR: { |
662 |
218 |
Node dc = derivativeSingle(r[0], c); |
663 |
109 |
if(dc != d_emptyRegexp) { |
664 |
87 |
retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ); |
665 |
|
} else { |
666 |
22 |
retNode = d_emptyRegexp; |
667 |
|
} |
668 |
109 |
break; |
669 |
|
} |
670 |
|
case kind::REGEXP_LOOP: { |
671 |
|
uint32_t l = utils::getLoopMinOccurrences(r); |
672 |
|
uint32_t u = utils::getLoopMaxOccurrences(r); |
673 |
|
if (l == u || l == 0) |
674 |
|
{ |
675 |
|
retNode = d_emptyRegexp; |
676 |
|
} else { |
677 |
|
Node dc = derivativeSingle(r[0], c); |
678 |
|
if(dc != d_emptyRegexp) { |
679 |
|
Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1)); |
680 |
|
Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]); |
681 |
|
retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); |
682 |
|
} else { |
683 |
|
retNode = d_emptyRegexp; |
684 |
|
} |
685 |
|
} |
686 |
|
//Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl; |
687 |
|
break; |
688 |
|
} |
689 |
|
case kind::REGEXP_COMPLEMENT: |
690 |
|
default: { |
691 |
|
Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; |
692 |
|
Unreachable(); |
693 |
|
break; |
694 |
|
} |
695 |
|
} |
696 |
680 |
if(retNode != d_emptyRegexp) { |
697 |
584 |
retNode = Rewriter::rewrite( retNode ); |
698 |
|
} |
699 |
680 |
d_dv_cache[dv] = retNode; |
700 |
|
} |
701 |
970 |
Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl; |
702 |
1940 |
return retNode; |
703 |
|
} |
704 |
|
|
705 |
746 |
void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset) |
706 |
|
{ |
707 |
746 |
Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl; |
708 |
|
std::map<Node, std::pair<std::set<unsigned>, SetNodes> >::const_iterator itr = |
709 |
746 |
d_fset_cache.find(r); |
710 |
746 |
if(itr != d_fset_cache.end()) { |
711 |
226 |
pcset.insert((itr->second).first.begin(), (itr->second).first.end()); |
712 |
226 |
pvset.insert((itr->second).second.begin(), (itr->second).second.end()); |
713 |
|
} else { |
714 |
|
// cset is code points |
715 |
1040 |
std::set<unsigned> cset; |
716 |
1040 |
SetNodes vset; |
717 |
520 |
Kind k = r.getKind(); |
718 |
520 |
switch( k ) { |
719 |
|
case kind::REGEXP_EMPTY: { |
720 |
|
break; |
721 |
|
} |
722 |
|
case kind::REGEXP_RANGE: { |
723 |
|
unsigned a = r[0].getConst<String>().front(); |
724 |
|
unsigned b = r[1].getConst<String>().front(); |
725 |
|
Assert(a < b); |
726 |
|
Assert(b < std::numeric_limits<unsigned>::max()); |
727 |
|
for (unsigned c = a; c <= b; c++) |
728 |
|
{ |
729 |
|
cset.insert(c); |
730 |
|
} |
731 |
|
break; |
732 |
|
} |
733 |
202 |
case kind::STRING_TO_REGEXP: { |
734 |
404 |
Node st = Rewriter::rewrite(r[0]); |
735 |
202 |
if(st.isConst()) { |
736 |
404 |
String s = st.getConst<String>(); |
737 |
202 |
if(s.size() != 0) { |
738 |
196 |
unsigned sc = s.front(); |
739 |
196 |
cset.insert(sc); |
740 |
|
} |
741 |
|
} |
742 |
|
else if (st.getKind() == kind::STRING_CONCAT) |
743 |
|
{ |
744 |
|
if(st[0].isConst()) { |
745 |
|
String s = st[0].getConst<String>(); |
746 |
|
unsigned sc = s.front(); |
747 |
|
cset.insert(sc); |
748 |
|
} else { |
749 |
|
vset.insert( st[0] ); |
750 |
|
} |
751 |
|
} |
752 |
|
else |
753 |
|
{ |
754 |
|
vset.insert(st); |
755 |
|
} |
756 |
202 |
break; |
757 |
|
} |
758 |
221 |
case kind::REGEXP_CONCAT: { |
759 |
267 |
for(unsigned i=0; i<r.getNumChildren(); i++) { |
760 |
261 |
firstChars(r[i], cset, vset); |
761 |
307 |
Node n = r[i]; |
762 |
307 |
Node exp; |
763 |
261 |
if (delta(n, exp) != 1) |
764 |
|
{ |
765 |
215 |
break; |
766 |
|
} |
767 |
|
} |
768 |
221 |
break; |
769 |
|
} |
770 |
28 |
case kind::REGEXP_UNION: { |
771 |
92 |
for(unsigned i=0; i<r.getNumChildren(); i++) { |
772 |
64 |
firstChars(r[i], cset, vset); |
773 |
|
} |
774 |
28 |
break; |
775 |
|
} |
776 |
|
case kind::REGEXP_INTER: { |
777 |
|
//TODO: Overapproximation for now |
778 |
|
//for(unsigned i=0; i<r.getNumChildren(); i++) { |
779 |
|
// firstChars(r[i], cset, vset); |
780 |
|
//} |
781 |
|
firstChars(r[0], cset, vset); |
782 |
|
break; |
783 |
|
} |
784 |
69 |
case kind::REGEXP_STAR: { |
785 |
69 |
firstChars(r[0], cset, vset); |
786 |
69 |
break; |
787 |
|
} |
788 |
|
case kind::REGEXP_LOOP: { |
789 |
|
firstChars(r[0], cset, vset); |
790 |
|
break; |
791 |
|
} |
792 |
|
case kind::REGEXP_SIGMA: |
793 |
|
case kind::REGEXP_COMPLEMENT: |
794 |
|
default: { |
795 |
|
// we do not expect to call this function on regular expressions that |
796 |
|
// aren't a standard regular expression kind. However, if we do, then |
797 |
|
// the following code is conservative and says that the current |
798 |
|
// regular expression can begin with any character. |
799 |
|
Assert(utils::isRegExpKind(k)); |
800 |
|
// can start with any character |
801 |
|
Assert(d_lastchar < std::numeric_limits<unsigned>::max()); |
802 |
|
for (unsigned i = 0; i <= d_lastchar; i++) |
803 |
|
{ |
804 |
|
cset.insert(i); |
805 |
|
} |
806 |
|
break; |
807 |
|
} |
808 |
|
} |
809 |
520 |
pcset.insert(cset.begin(), cset.end()); |
810 |
520 |
pvset.insert(vset.begin(), vset.end()); |
811 |
1040 |
std::pair<std::set<unsigned>, SetNodes> p(cset, vset); |
812 |
520 |
d_fset_cache[r] = p; |
813 |
|
} |
814 |
|
|
815 |
746 |
if(Trace.isOn("regexp-fset")) { |
816 |
|
Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {"; |
817 |
|
for (std::set<unsigned>::const_iterator it = pcset.begin(); |
818 |
|
it != pcset.end(); |
819 |
|
++it) |
820 |
|
{ |
821 |
|
if (it != pcset.begin()) |
822 |
|
{ |
823 |
|
Trace("regexp-fset") << ","; |
824 |
|
} |
825 |
|
Trace("regexp-fset") << (*it); |
826 |
|
} |
827 |
|
Trace("regexp-fset") << "}" << std::endl; |
828 |
|
} |
829 |
746 |
} |
830 |
|
|
831 |
366 |
Node RegExpOpr::simplify(Node t, bool polarity) |
832 |
|
{ |
833 |
732 |
Trace("strings-regexp-simpl") |
834 |
366 |
<< "RegExpOpr::simplify: " << t << ", polarity=" << polarity << std::endl; |
835 |
366 |
Assert(t.getKind() == kind::STRING_IN_REGEXP); |
836 |
732 |
Node tlit = polarity ? t : t.notNode(); |
837 |
732 |
Node conc; |
838 |
366 |
std::map<Node, Node>::const_iterator itr = d_simpCache.find(tlit); |
839 |
366 |
if (itr != d_simpCache.end()) |
840 |
|
{ |
841 |
4 |
return itr->second; |
842 |
|
} |
843 |
362 |
if (polarity) |
844 |
|
{ |
845 |
548 |
std::vector<Node> newSkolems; |
846 |
274 |
conc = reduceRegExpPos(tlit, d_sc, newSkolems); |
847 |
|
} |
848 |
|
else |
849 |
|
{ |
850 |
|
// see if we can use an optimized version of the reduction for re.++. |
851 |
176 |
Node r = t[1]; |
852 |
88 |
if (r.getKind() == REGEXP_CONCAT) |
853 |
|
{ |
854 |
|
// the index we are removing from the RE concatenation |
855 |
67 |
size_t index = 0; |
856 |
|
// As an optimization to the reduction, if we can determine that |
857 |
|
// all strings in the language of R1 have the same length, say n, |
858 |
|
// then the conclusion of the reduction is quantifier-free: |
859 |
|
// ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2) |
860 |
134 |
Node reLen = getRegExpConcatFixed(r, index); |
861 |
67 |
if (!reLen.isNull()) |
862 |
|
{ |
863 |
52 |
conc = reduceRegExpNegConcatFixed(tlit, reLen, index); |
864 |
|
} |
865 |
|
} |
866 |
88 |
if (conc.isNull()) |
867 |
|
{ |
868 |
36 |
conc = reduceRegExpNeg(tlit); |
869 |
|
} |
870 |
|
} |
871 |
362 |
d_simpCache[tlit] = conc; |
872 |
724 |
Trace("strings-regexp-simpl") |
873 |
362 |
<< "RegExpOpr::simplify: returns " << conc << std::endl; |
874 |
362 |
return conc; |
875 |
|
} |
876 |
|
|
877 |
81 |
Node RegExpOpr::getRegExpConcatFixed(Node r, size_t& index) |
878 |
|
{ |
879 |
81 |
Assert(r.getKind() == REGEXP_CONCAT); |
880 |
81 |
index = 0; |
881 |
162 |
Node reLen = RegExpEntail::getFixedLengthForRegexp(r[0]); |
882 |
81 |
if (!reLen.isNull()) |
883 |
|
{ |
884 |
56 |
return reLen; |
885 |
|
} |
886 |
|
// try from the opposite end |
887 |
25 |
size_t indexE = r.getNumChildren() - 1; |
888 |
25 |
reLen = RegExpEntail::getFixedLengthForRegexp(r[indexE]); |
889 |
25 |
if (!reLen.isNull()) |
890 |
|
{ |
891 |
8 |
index = indexE; |
892 |
8 |
return reLen; |
893 |
|
} |
894 |
17 |
return Node::null(); |
895 |
|
} |
896 |
|
|
897 |
42 |
Node RegExpOpr::reduceRegExpNeg(Node mem) |
898 |
|
{ |
899 |
42 |
Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP); |
900 |
84 |
Node s = mem[0][0]; |
901 |
84 |
Node r = mem[0][1]; |
902 |
42 |
NodeManager* nm = NodeManager::currentNM(); |
903 |
42 |
Kind k = r.getKind(); |
904 |
84 |
Node zero = nm->mkConst(Rational(0)); |
905 |
42 |
Node conc; |
906 |
42 |
if (k == REGEXP_CONCAT) |
907 |
|
{ |
908 |
|
// do not use length entailment, call regular expression concat |
909 |
38 |
Node reLen; |
910 |
19 |
size_t i = 0; |
911 |
19 |
conc = reduceRegExpNegConcatFixed(mem, reLen, i); |
912 |
|
} |
913 |
23 |
else if (k == REGEXP_STAR) |
914 |
|
{ |
915 |
44 |
Node emp = Word::mkEmptyWord(s.getType()); |
916 |
44 |
Node lens = nm->mkNode(STRING_LENGTH, s); |
917 |
44 |
Node sne = s.eqNode(emp).negate(); |
918 |
44 |
Node b1 = nm->mkBoundVar(nm->integerType()); |
919 |
44 |
Node b1v = nm->mkNode(BOUND_VAR_LIST, b1); |
920 |
|
Node g1 = |
921 |
44 |
nm->mkNode(AND, nm->mkNode(GT, b1, zero), nm->mkNode(GEQ, lens, b1)); |
922 |
|
// internal |
923 |
44 |
Node s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1); |
924 |
44 |
Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1)); |
925 |
44 |
Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[0]).negate(); |
926 |
44 |
Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r).negate(); |
927 |
|
|
928 |
22 |
conc = nm->mkNode(OR, s1r1, s2r2); |
929 |
22 |
conc = nm->mkNode(IMPLIES, g1, conc); |
930 |
22 |
conc = nm->mkNode(FORALL, b1v, conc); |
931 |
22 |
conc = nm->mkNode(AND, sne, conc); |
932 |
|
} |
933 |
|
else |
934 |
|
{ |
935 |
1 |
Assert(!utils::isRegExpKind(k)); |
936 |
|
} |
937 |
84 |
return conc; |
938 |
|
} |
939 |
|
|
940 |
79 |
Node RegExpOpr::reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index) |
941 |
|
{ |
942 |
79 |
Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP); |
943 |
158 |
Node s = mem[0][0]; |
944 |
158 |
Node r = mem[0][1]; |
945 |
79 |
NodeManager* nm = NodeManager::currentNM(); |
946 |
79 |
Assert(r.getKind() == REGEXP_CONCAT); |
947 |
158 |
Node zero = nm->mkConst(Rational(0)); |
948 |
|
// The following simplification states that |
949 |
|
// ~( s in R1 ++ R2 ++... ++ Rn ) |
950 |
|
// is equivalent to |
951 |
|
// forall x. |
952 |
|
// 0 <= x <= len(s) => |
953 |
|
// ~(substr(s,0,x) in R1) OR ~(substr(s,x,len(s)-x) in R2 ++ ... ++ Rn) |
954 |
|
// Index is the child index of r that we are stripping off, which is either |
955 |
|
// from the beginning or the end. |
956 |
79 |
Assert(index == 0 || index == r.getNumChildren() - 1); |
957 |
158 |
Node lens = nm->mkNode(STRING_LENGTH, s); |
958 |
158 |
Node b1; |
959 |
158 |
Node b1v; |
960 |
158 |
Node guard; |
961 |
79 |
if (reLen.isNull()) |
962 |
|
{ |
963 |
19 |
b1 = SkolemCache::mkIndexVar(mem); |
964 |
19 |
b1v = nm->mkNode(BOUND_VAR_LIST, b1); |
965 |
57 |
guard = nm->mkNode(AND, |
966 |
38 |
nm->mkNode(GEQ, b1, zero), |
967 |
38 |
nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1)); |
968 |
|
} |
969 |
|
else |
970 |
|
{ |
971 |
60 |
b1 = reLen; |
972 |
|
} |
973 |
158 |
Node s1; |
974 |
158 |
Node s2; |
975 |
79 |
if (index == 0) |
976 |
|
{ |
977 |
71 |
s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1); |
978 |
71 |
s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1)); |
979 |
|
} |
980 |
|
else |
981 |
|
{ |
982 |
8 |
s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1); |
983 |
8 |
s2 = nm->mkNode(STRING_SUBSTR, s, zero, nm->mkNode(MINUS, lens, b1)); |
984 |
|
} |
985 |
158 |
Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[index]).negate(); |
986 |
158 |
std::vector<Node> nvec; |
987 |
337 |
for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++) |
988 |
|
{ |
989 |
258 |
if (i != index) |
990 |
|
{ |
991 |
179 |
nvec.push_back(r[i]); |
992 |
|
} |
993 |
|
} |
994 |
158 |
Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec); |
995 |
79 |
r2 = Rewriter::rewrite(r2); |
996 |
158 |
Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate(); |
997 |
79 |
Node conc = nm->mkNode(OR, s1r1, s2r2); |
998 |
79 |
if (!b1v.isNull()) |
999 |
|
{ |
1000 |
19 |
conc = nm->mkNode(OR, guard.negate(), conc); |
1001 |
19 |
conc = nm->mkNode(FORALL, b1v, conc); |
1002 |
|
} |
1003 |
158 |
return conc; |
1004 |
|
} |
1005 |
|
|
1006 |
488 |
Node RegExpOpr::reduceRegExpPos(Node mem, |
1007 |
|
SkolemCache* sc, |
1008 |
|
std::vector<Node>& newSkolems) |
1009 |
|
{ |
1010 |
488 |
Assert(mem.getKind() == STRING_IN_REGEXP); |
1011 |
976 |
Node s = mem[0]; |
1012 |
976 |
Node r = mem[1]; |
1013 |
488 |
NodeManager* nm = NodeManager::currentNM(); |
1014 |
488 |
Kind k = r.getKind(); |
1015 |
488 |
Node conc; |
1016 |
488 |
if (k == REGEXP_CONCAT) |
1017 |
|
{ |
1018 |
704 |
std::vector<Node> nvec; |
1019 |
704 |
std::vector<Node> cc; |
1020 |
|
// get the (valid) existential for this membership |
1021 |
704 |
Node eform = getExistsForRegExpConcatMem(mem); |
1022 |
352 |
SkolemManager* sm = nm->getSkolemManager(); |
1023 |
|
// Notice that this rule does not introduce witness terms, instead it |
1024 |
|
// uses skolems in the conclusion of the proof rule directly. Thus, |
1025 |
|
// the existential eform does not need to be explicitly justified by a |
1026 |
|
// proof here, since it is only being used as an intermediate formula in |
1027 |
|
// this inference. Hence we do not pass a proof generator to mkSkolemize. |
1028 |
352 |
sm->mkSkolemize(eform, newSkolems, "rc", "regexp concat skolem"); |
1029 |
352 |
Assert(newSkolems.size() == r.getNumChildren()); |
1030 |
|
// Look up skolems for each of the components. If sc has optimizations |
1031 |
|
// enabled, this will return arguments of str.to_re. |
1032 |
1363 |
for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i) |
1033 |
|
{ |
1034 |
1011 |
if (r[i].getKind() == STRING_TO_REGEXP) |
1035 |
|
{ |
1036 |
|
// optimization, just take the body |
1037 |
360 |
newSkolems[i] = r[i][0]; |
1038 |
|
} |
1039 |
|
else |
1040 |
|
{ |
1041 |
651 |
nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i])); |
1042 |
|
} |
1043 |
|
} |
1044 |
|
// (str.in_re x (re.++ R1 .... Rn)) => |
1045 |
|
// (and (str.in_re k1 R1) ... (str.in_re kn Rn) (= x (str.++ k1 ... kn))) |
1046 |
704 |
Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems)); |
1047 |
352 |
nvec.push_back(lem); |
1048 |
352 |
conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec); |
1049 |
|
} |
1050 |
136 |
else if (k == REGEXP_STAR) |
1051 |
|
{ |
1052 |
268 |
Node emp = Word::mkEmptyWord(s.getType()); |
1053 |
268 |
Node se = s.eqNode(emp); |
1054 |
268 |
Node sinr = nm->mkNode(STRING_IN_REGEXP, s, r[0]); |
1055 |
268 |
Node reExpand = nm->mkNode(REGEXP_CONCAT, r[0], r, r[0]); |
1056 |
268 |
Node sinRExp = nm->mkNode(STRING_IN_REGEXP, s, reExpand); |
1057 |
|
// We unfold `x in R*` by considering three cases: `x` is empty, `x` |
1058 |
|
// is matched by `R`, or `x` is matched by two or more `R`s. For the |
1059 |
|
// last case, `x` will break into three pieces, making the beginning |
1060 |
|
// and the end each match `R` and the middle match `R*`. Matching the |
1061 |
|
// beginning and the end with `R` allows us to reason about the |
1062 |
|
// beginning and the end of `x` simultaneously. |
1063 |
|
// |
1064 |
|
// x in R* ---> (x = "") v (x in R) v (x in (re.++ R (re.* R) R)) |
1065 |
|
|
1066 |
|
// We also immediately unfold the last disjunct for re.*. The advantage |
1067 |
|
// of doing this is that we use the same scheme for skolems above. |
1068 |
268 |
std::vector<Node> newSkolemsC; |
1069 |
134 |
sinRExp = reduceRegExpPos(sinRExp, sc, newSkolemsC); |
1070 |
134 |
Assert(newSkolemsC.size() == 3); |
1071 |
|
// make the return lemma |
1072 |
|
// can also assume the component match the first and last R are non-empty. |
1073 |
|
// This means that the overall conclusion is: |
1074 |
|
// (x = "") v (x in R) v (x = (str.++ k1 k2 k3) ^ |
1075 |
|
// k1 in R ^ k2 in (re.* R) ^ k3 in R ^ |
1076 |
|
// k1 != "" ^ k3 != "") |
1077 |
134 |
conc = nm->mkNode(OR, |
1078 |
|
se, |
1079 |
|
sinr, |
1080 |
536 |
nm->mkNode(AND, |
1081 |
|
sinRExp, |
1082 |
268 |
newSkolemsC[0].eqNode(emp).negate(), |
1083 |
268 |
newSkolemsC[2].eqNode(emp).negate())); |
1084 |
|
} |
1085 |
|
else |
1086 |
|
{ |
1087 |
2 |
Assert(!utils::isRegExpKind(k)); |
1088 |
|
} |
1089 |
976 |
return conc; |
1090 |
|
} |
1091 |
|
|
1092 |
|
bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) { |
1093 |
|
for(std::set< PairNodes >::const_iterator itr = s.begin(); |
1094 |
|
itr != s.end(); ++itr) { |
1095 |
|
if((itr->first == n1 && itr->second == n2) || |
1096 |
|
(itr->first == n2 && itr->second == n1)) { |
1097 |
|
return true; |
1098 |
|
} |
1099 |
|
} |
1100 |
|
return false; |
1101 |
|
} |
1102 |
|
|
1103 |
346 |
bool RegExpOpr::containC2(unsigned cnt, Node n) { |
1104 |
346 |
if(n.getKind() == kind::REGEXP_RV) { |
1105 |
132 |
Assert(n[0].getConst<Rational>() <= Rational(String::maxSize())) |
1106 |
|
<< "Exceeded UINT32_MAX in RegExpOpr::containC2"; |
1107 |
132 |
unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt(); |
1108 |
132 |
return cnt == y; |
1109 |
214 |
} else if(n.getKind() == kind::REGEXP_CONCAT) { |
1110 |
16 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1111 |
12 |
if(containC2(cnt, n[i])) { |
1112 |
2 |
return true; |
1113 |
|
} |
1114 |
|
} |
1115 |
208 |
} else if(n.getKind() == kind::REGEXP_STAR) { |
1116 |
16 |
return containC2(cnt, n[0]); |
1117 |
192 |
} else if(n.getKind() == kind::REGEXP_LOOP) { |
1118 |
|
return containC2(cnt, n[0]); |
1119 |
192 |
} else if(n.getKind() == kind::REGEXP_UNION) { |
1120 |
18 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1121 |
14 |
if(containC2(cnt, n[i])) { |
1122 |
2 |
return true; |
1123 |
|
} |
1124 |
|
} |
1125 |
|
} |
1126 |
194 |
return false; |
1127 |
|
} |
1128 |
176 |
Node RegExpOpr::convert1(unsigned cnt, Node n) { |
1129 |
176 |
Trace("regexp-debug") << "Converting " << n << " at " << cnt << "... " << std::endl; |
1130 |
352 |
Node r1, r2; |
1131 |
176 |
convert2(cnt, n, r1, r2); |
1132 |
176 |
Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl; |
1133 |
176 |
Node ret = r1==d_emptySingleton ? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, |
1134 |
176 |
NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2); |
1135 |
176 |
ret = Rewriter::rewrite( ret ); |
1136 |
176 |
Trace("regexp-debug") << "... done convert at " << cnt << ", with return " << ret << std::endl; |
1137 |
352 |
return ret; |
1138 |
|
} |
1139 |
285 |
void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) { |
1140 |
285 |
if(n == d_emptyRegexp) { |
1141 |
24 |
r1 = d_emptyRegexp; |
1142 |
24 |
r2 = d_emptyRegexp; |
1143 |
24 |
return; |
1144 |
261 |
} else if(n == d_emptySingleton) { |
1145 |
21 |
r1 = d_emptySingleton; |
1146 |
21 |
r2 = d_emptySingleton; |
1147 |
|
} |
1148 |
261 |
Kind nk = n.getKind(); |
1149 |
261 |
if (nk == REGEXP_RV) |
1150 |
|
{ |
1151 |
33 |
Assert(n[0].getConst<Rational>() <= Rational(String::maxSize())) |
1152 |
|
<< "Exceeded UINT32_MAX in RegExpOpr::convert2"; |
1153 |
33 |
unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt(); |
1154 |
33 |
r1 = d_emptySingleton; |
1155 |
33 |
if(cnt == y) { |
1156 |
33 |
r2 = d_emptyRegexp; |
1157 |
|
} else { |
1158 |
|
r2 = n; |
1159 |
|
} |
1160 |
|
} |
1161 |
228 |
else if (nk == REGEXP_CONCAT) |
1162 |
|
{ |
1163 |
148 |
bool flag = true; |
1164 |
296 |
std::vector<Node> vr1, vr2; |
1165 |
417 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1166 |
304 |
if(containC2(cnt, n[i])) { |
1167 |
70 |
Node t1, t2; |
1168 |
35 |
convert2(cnt, n[i], t1, t2); |
1169 |
35 |
vr1.push_back(t1); |
1170 |
35 |
r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] : |
1171 |
|
NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1); |
1172 |
35 |
vr2.push_back(t2); |
1173 |
35 |
for( unsigned j=i+1; j<n.getNumChildren(); j++ ) { |
1174 |
|
vr2.push_back(n[j]); |
1175 |
|
} |
1176 |
35 |
r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] : |
1177 |
|
NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2); |
1178 |
35 |
flag = false; |
1179 |
35 |
break; |
1180 |
|
} else { |
1181 |
269 |
vr1.push_back(n[i]); |
1182 |
|
} |
1183 |
|
} |
1184 |
148 |
if(flag) { |
1185 |
113 |
r1 = d_emptySingleton; |
1186 |
113 |
r2 = n; |
1187 |
|
} |
1188 |
|
} |
1189 |
80 |
else if (nk == REGEXP_UNION) |
1190 |
|
{ |
1191 |
66 |
std::vector<Node> vr1, vr2; |
1192 |
107 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1193 |
148 |
Node t1, t2; |
1194 |
74 |
convert2(cnt, n[i], t1, t2); |
1195 |
74 |
vr1.push_back(t1); |
1196 |
74 |
vr2.push_back(t2); |
1197 |
|
} |
1198 |
33 |
r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1); |
1199 |
33 |
r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2); |
1200 |
|
} |
1201 |
47 |
else if (nk == STRING_TO_REGEXP || nk == REGEXP_SIGMA || nk == REGEXP_RANGE |
1202 |
|
|| nk == REGEXP_COMPLEMENT || nk == REGEXP_LOOP) |
1203 |
|
{ |
1204 |
|
// this leaves n unchanged |
1205 |
47 |
r1 = d_emptySingleton; |
1206 |
47 |
r2 = n; |
1207 |
|
} |
1208 |
|
else |
1209 |
|
{ |
1210 |
|
//is it possible? |
1211 |
|
Unreachable(); |
1212 |
|
} |
1213 |
|
} |
1214 |
|
|
1215 |
251 |
Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) { |
1216 |
|
//Assert(checkConstRegExp(r1) && checkConstRegExp(r2)); |
1217 |
251 |
if(r1 > r2) { |
1218 |
52 |
TNode tmpNode = r1; |
1219 |
26 |
r1 = r2; |
1220 |
26 |
r2 = tmpNode; |
1221 |
|
} |
1222 |
251 |
Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl; |
1223 |
502 |
std::pair < Node, Node > p(r1, r2); |
1224 |
251 |
std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p); |
1225 |
251 |
Node rNode; |
1226 |
251 |
if(itr != d_inter_cache.end()) { |
1227 |
26 |
rNode = itr->second; |
1228 |
|
} else { |
1229 |
225 |
Trace("regexp-int-debug") << " ... not in cache" << std::endl; |
1230 |
225 |
if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) { |
1231 |
|
Trace("regexp-int-debug") << " ... one is empty set" << std::endl; |
1232 |
|
rNode = d_emptyRegexp; |
1233 |
225 |
} else if(r1 == d_emptySingleton || r2 == d_emptySingleton) { |
1234 |
18 |
Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl; |
1235 |
36 |
Node exp; |
1236 |
18 |
int r = delta((r1 == d_emptySingleton ? r2 : r1), exp); |
1237 |
18 |
if(r == 0) { |
1238 |
|
//TODO: variable |
1239 |
|
Unreachable(); |
1240 |
18 |
} else if(r == 1) { |
1241 |
14 |
rNode = d_emptySingleton; |
1242 |
|
} else { |
1243 |
4 |
rNode = d_emptyRegexp; |
1244 |
|
} |
1245 |
207 |
} else if(r1 == r2) { |
1246 |
2 |
Trace("regexp-int-debug") << " ... equal" << std::endl; |
1247 |
2 |
rNode = r1; //convert1(cnt, r1); |
1248 |
|
} else { |
1249 |
205 |
Trace("regexp-int-debug") << " ... normal checking" << std::endl; |
1250 |
205 |
std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p); |
1251 |
205 |
if(itrcache != cache.end()) { |
1252 |
29 |
rNode = itrcache->second; |
1253 |
|
} else { |
1254 |
176 |
Trace("regexp-int-debug") << " ... normal without cache" << std::endl; |
1255 |
352 |
std::vector<unsigned> cset; |
1256 |
352 |
std::set<unsigned> cset1, cset2; |
1257 |
352 |
std::set< Node > vset1, vset2; |
1258 |
176 |
firstChars(r1, cset1, vset1); |
1259 |
176 |
firstChars(r2, cset2, vset2); |
1260 |
176 |
Trace("regexp-int-debug") << " ... got fset" << std::endl; |
1261 |
176 |
std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset2.end(), |
1262 |
176 |
std::inserter(cset, cset.begin())); |
1263 |
352 |
std::vector< Node > vec_nodes; |
1264 |
352 |
Node delta_exp; |
1265 |
176 |
Trace("regexp-int-debug") << " ... try delta" << std::endl; |
1266 |
176 |
int flag = delta(r1, delta_exp); |
1267 |
176 |
int flag2 = delta(r2, delta_exp); |
1268 |
176 |
Trace("regexp-int-debug") << " ... delta1=" << flag << ", delta2=" << flag2 << std::endl; |
1269 |
176 |
if(flag != 2 && flag2 != 2) { |
1270 |
19 |
if(flag == 1 && flag2 == 1) { |
1271 |
19 |
vec_nodes.push_back(d_emptySingleton); |
1272 |
|
} else { |
1273 |
|
//TODO: variable |
1274 |
|
Unreachable(); |
1275 |
|
} |
1276 |
|
} |
1277 |
176 |
if(Trace.isOn("regexp-int-debug")) { |
1278 |
|
Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {"; |
1279 |
|
for (std::vector<unsigned>::const_iterator it = cset.begin(); |
1280 |
|
it != cset.end(); |
1281 |
|
++it) |
1282 |
|
{ |
1283 |
|
if (it != cset.begin()) |
1284 |
|
{ |
1285 |
|
Trace("regexp-int-debug") << ", "; |
1286 |
|
} |
1287 |
|
Trace("regexp-int-debug") << (*it); |
1288 |
|
} |
1289 |
|
Trace("regexp-int-debug") << std::endl; |
1290 |
|
} |
1291 |
352 |
std::map< PairNodes, Node > cacheX; |
1292 |
374 |
for (std::vector<unsigned>::const_iterator it = cset.begin(); |
1293 |
374 |
it != cset.end(); |
1294 |
|
++it) |
1295 |
|
{ |
1296 |
396 |
std::vector<unsigned> cvec; |
1297 |
198 |
cvec.push_back(*it); |
1298 |
396 |
String c(cvec); |
1299 |
198 |
Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl; |
1300 |
396 |
Node r1l = derivativeSingle(r1, c); |
1301 |
396 |
Node r2l = derivativeSingle(r2, c); |
1302 |
198 |
Trace("regexp-int-debug") << " ... got partial(r1,c) = " << mkString(r1l) << std::endl; |
1303 |
198 |
Trace("regexp-int-debug") << " ... got partial(r2,c) = " << mkString(r2l) << std::endl; |
1304 |
396 |
Node rt; |
1305 |
|
|
1306 |
198 |
if(r1l > r2l) { |
1307 |
32 |
Node tnode = r1l; |
1308 |
16 |
r1l = r2l; r2l = tnode; |
1309 |
|
} |
1310 |
396 |
PairNodes pp(r1l, r2l); |
1311 |
198 |
std::map< PairNodes, Node >::const_iterator itr2 = cacheX.find(pp); |
1312 |
198 |
if(itr2 != cacheX.end()) { |
1313 |
4 |
rt = itr2->second; |
1314 |
|
} else { |
1315 |
388 |
std::map< PairNodes, Node > cache2(cache); |
1316 |
388 |
cache2[p] = NodeManager::currentNM()->mkNode( |
1317 |
|
kind::REGEXP_RV, |
1318 |
388 |
NodeManager::currentNM()->mkConst(cvc5::Rational(cnt))); |
1319 |
194 |
rt = intersectInternal(r1l, r2l, cache2, cnt+1); |
1320 |
194 |
cacheX[ pp ] = rt; |
1321 |
|
} |
1322 |
|
|
1323 |
594 |
rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, |
1324 |
396 |
NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) ); |
1325 |
|
|
1326 |
198 |
Trace("regexp-int-debug") << " ... got p(r1,c) && p(r2,c) = " << mkString(rt) << std::endl; |
1327 |
198 |
vec_nodes.push_back(rt); |
1328 |
|
} |
1329 |
176 |
rNode = Rewriter::rewrite( vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] : |
1330 |
|
NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes) ); |
1331 |
176 |
rNode = convert1(cnt, rNode); |
1332 |
176 |
rNode = Rewriter::rewrite( rNode ); |
1333 |
|
} |
1334 |
|
} |
1335 |
225 |
Trace("regexp-int-debug") << " ... try testing no RV of " << mkString(rNode) << std::endl; |
1336 |
225 |
if (!expr::hasSubtermKind(REGEXP_RV, rNode)) |
1337 |
|
{ |
1338 |
101 |
d_inter_cache[p] = rNode; |
1339 |
|
} |
1340 |
|
} |
1341 |
251 |
Trace("regexp-int") << "End(" << cnt << ") of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl; |
1342 |
502 |
return rNode; |
1343 |
|
} |
1344 |
|
|
1345 |
114 |
Node RegExpOpr::removeIntersection(Node r) { |
1346 |
114 |
Assert(checkConstRegExp(r)); |
1347 |
114 |
NodeManager* nm = NodeManager::currentNM(); |
1348 |
228 |
std::unordered_map<TNode, Node> visited; |
1349 |
114 |
std::unordered_map<TNode, Node>::iterator it; |
1350 |
228 |
std::vector<TNode> visit; |
1351 |
228 |
TNode cur; |
1352 |
114 |
visit.push_back(r); |
1353 |
1302 |
do |
1354 |
|
{ |
1355 |
1416 |
cur = visit.back(); |
1356 |
1416 |
visit.pop_back(); |
1357 |
1416 |
it = visited.find(cur); |
1358 |
|
|
1359 |
1416 |
if (it == visited.end()) |
1360 |
|
{ |
1361 |
694 |
visited[cur] = Node::null(); |
1362 |
694 |
visit.push_back(cur); |
1363 |
1302 |
for (const Node& cn : cur) |
1364 |
|
{ |
1365 |
608 |
visit.push_back(cn); |
1366 |
|
} |
1367 |
|
} |
1368 |
722 |
else if (it->second.isNull()) |
1369 |
|
{ |
1370 |
694 |
Kind ck = cur.getKind(); |
1371 |
1388 |
Node ret; |
1372 |
694 |
bool childChanged = false; |
1373 |
1388 |
std::vector<Node> children; |
1374 |
694 |
if (cur.getMetaKind() == metakind::PARAMETERIZED) |
1375 |
|
{ |
1376 |
|
children.push_back(cur.getOperator()); |
1377 |
|
} |
1378 |
1302 |
for (const Node& cn : cur) |
1379 |
|
{ |
1380 |
608 |
it = visited.find(cn); |
1381 |
608 |
Assert(it != visited.end()); |
1382 |
608 |
Assert(!it->second.isNull()); |
1383 |
608 |
if (ck == REGEXP_INTER) |
1384 |
|
{ |
1385 |
4 |
if (ret.isNull()) |
1386 |
|
{ |
1387 |
2 |
ret = it->second; |
1388 |
|
} |
1389 |
|
else |
1390 |
|
{ |
1391 |
2 |
ret = intersect(ret, it->second); |
1392 |
|
} |
1393 |
|
} |
1394 |
|
else |
1395 |
|
{ |
1396 |
|
// will construct below |
1397 |
604 |
childChanged = childChanged || cn != it->second; |
1398 |
604 |
children.push_back(it->second); |
1399 |
|
} |
1400 |
|
} |
1401 |
694 |
if (ck != REGEXP_INTER) |
1402 |
|
{ |
1403 |
692 |
if (childChanged) |
1404 |
|
{ |
1405 |
2 |
ret = nm->mkNode(cur.getKind(), children); |
1406 |
|
} |
1407 |
|
else |
1408 |
|
{ |
1409 |
690 |
ret = cur; |
1410 |
|
} |
1411 |
|
} |
1412 |
694 |
visited[cur] = ret; |
1413 |
|
} |
1414 |
1416 |
} while (!visit.empty()); |
1415 |
114 |
Assert(visited.find(r) != visited.end()); |
1416 |
114 |
Assert(!visited.find(r)->second.isNull()); |
1417 |
114 |
if (Trace.isOn("regexp-intersect")) |
1418 |
|
{ |
1419 |
|
Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r) |
1420 |
|
<< " ) = " << mkString(visited[r]) << std::endl; |
1421 |
|
} |
1422 |
228 |
return visited[r]; |
1423 |
|
} |
1424 |
|
|
1425 |
57 |
Node RegExpOpr::intersect(Node r1, Node r2) |
1426 |
|
{ |
1427 |
57 |
if (!checkConstRegExp(r1) || !checkConstRegExp(r2)) |
1428 |
|
{ |
1429 |
|
return Node::null(); |
1430 |
|
} |
1431 |
114 |
Node rr1 = removeIntersection(r1); |
1432 |
114 |
Node rr2 = removeIntersection(r2); |
1433 |
114 |
std::map<PairNodes, Node> cache; |
1434 |
57 |
Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl; |
1435 |
57 |
Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl; |
1436 |
114 |
Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1) |
1437 |
57 |
<< ",\n\t" << mkString(r2) << ")" << std::endl; |
1438 |
114 |
Node retNode = intersectInternal(rr1, rr2, cache, 1); |
1439 |
114 |
Trace("regexp-intersect") |
1440 |
114 |
<< "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t" << mkString(r2) |
1441 |
57 |
<< ") =\n\t" << mkString(retNode) << std::endl; |
1442 |
57 |
Trace("regexp-intersect-node") << "Intersect finished." << std::endl; |
1443 |
57 |
return retNode; |
1444 |
|
} |
1445 |
|
|
1446 |
|
//printing |
1447 |
11108 |
std::string RegExpOpr::niceChar(Node r) { |
1448 |
11108 |
if(r.isConst()) { |
1449 |
22028 |
std::string s = r.getConst<String>().toString(); |
1450 |
11014 |
return s == "." ? "\\." : s; |
1451 |
|
} else { |
1452 |
188 |
std::string ss = "$" + r.toString(); |
1453 |
94 |
return ss; |
1454 |
|
} |
1455 |
|
} |
1456 |
21230 |
std::string RegExpOpr::mkString( Node r ) { |
1457 |
21230 |
std::string retStr; |
1458 |
21230 |
if(r.isNull()) { |
1459 |
|
retStr = "\\E"; |
1460 |
|
} else { |
1461 |
21230 |
int k = r.getKind(); |
1462 |
21230 |
switch( k ) { |
1463 |
300 |
case kind::REGEXP_EMPTY: { |
1464 |
300 |
retStr += "\\E"; |
1465 |
300 |
break; |
1466 |
|
} |
1467 |
266 |
case kind::REGEXP_SIGMA: { |
1468 |
266 |
retStr += "."; |
1469 |
266 |
break; |
1470 |
|
} |
1471 |
10574 |
case kind::STRING_TO_REGEXP: { |
1472 |
21148 |
std::string tmp( niceChar( r[0] ) ); |
1473 |
10574 |
retStr += tmp.size()==1? tmp : "(" + tmp + ")"; |
1474 |
10574 |
break; |
1475 |
|
} |
1476 |
3651 |
case kind::REGEXP_CONCAT: { |
1477 |
3651 |
retStr += "("; |
1478 |
11630 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
1479 |
|
//if(i != 0) retStr += "."; |
1480 |
7979 |
retStr += mkString( r[i] ); |
1481 |
|
} |
1482 |
3651 |
retStr += ")"; |
1483 |
3651 |
break; |
1484 |
|
} |
1485 |
1236 |
case kind::REGEXP_UNION: { |
1486 |
1236 |
retStr += "("; |
1487 |
4100 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
1488 |
2864 |
if(i != 0) retStr += "|"; |
1489 |
2864 |
retStr += mkString( r[i] ); |
1490 |
|
} |
1491 |
1236 |
retStr += ")"; |
1492 |
1236 |
break; |
1493 |
|
} |
1494 |
55 |
case kind::REGEXP_INTER: { |
1495 |
55 |
retStr += "("; |
1496 |
165 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
1497 |
110 |
if(i != 0) retStr += "&"; |
1498 |
110 |
retStr += mkString( r[i] ); |
1499 |
|
} |
1500 |
55 |
retStr += ")"; |
1501 |
55 |
break; |
1502 |
|
} |
1503 |
4439 |
case kind::REGEXP_STAR: { |
1504 |
4439 |
retStr += mkString( r[0] ); |
1505 |
4439 |
retStr += "*"; |
1506 |
4439 |
break; |
1507 |
|
} |
1508 |
|
case kind::REGEXP_PLUS: { |
1509 |
|
retStr += mkString( r[0] ); |
1510 |
|
retStr += "+"; |
1511 |
|
break; |
1512 |
|
} |
1513 |
|
case kind::REGEXP_OPT: { |
1514 |
|
retStr += mkString( r[0] ); |
1515 |
|
retStr += "?"; |
1516 |
|
break; |
1517 |
|
} |
1518 |
267 |
case kind::REGEXP_RANGE: { |
1519 |
267 |
retStr += "["; |
1520 |
267 |
retStr += niceChar( r[0] ); |
1521 |
267 |
retStr += "-"; |
1522 |
267 |
retStr += niceChar( r[1] ); |
1523 |
267 |
retStr += "]"; |
1524 |
267 |
break; |
1525 |
|
} |
1526 |
|
case kind::REGEXP_LOOP: { |
1527 |
|
uint32_t l = utils::getLoopMinOccurrences(r); |
1528 |
|
std::stringstream ss; |
1529 |
|
ss << "(" << mkString(r[0]) << "){" << l << ","; |
1530 |
|
if(r.getNumChildren() == 3) { |
1531 |
|
uint32_t u = utils::getLoopMaxOccurrences(r); |
1532 |
|
ss << u; |
1533 |
|
} |
1534 |
|
ss << "}"; |
1535 |
|
retStr += ss.str(); |
1536 |
|
break; |
1537 |
|
} |
1538 |
382 |
case kind::REGEXP_RV: { |
1539 |
382 |
retStr += "<"; |
1540 |
382 |
retStr += r[0].getConst<Rational>().getNumerator().toString(); |
1541 |
382 |
retStr += ">"; |
1542 |
382 |
break; |
1543 |
|
} |
1544 |
57 |
case REGEXP_COMPLEMENT: |
1545 |
|
{ |
1546 |
57 |
retStr += "^("; |
1547 |
57 |
retStr += mkString(r[0]); |
1548 |
57 |
retStr += ")"; |
1549 |
57 |
break; |
1550 |
|
} |
1551 |
3 |
default: |
1552 |
|
{ |
1553 |
6 |
std::stringstream ss; |
1554 |
3 |
ss << r; |
1555 |
3 |
retStr = ss.str(); |
1556 |
3 |
Assert(!utils::isRegExpKind(r.getKind())); |
1557 |
3 |
break; |
1558 |
|
} |
1559 |
|
} |
1560 |
|
} |
1561 |
|
|
1562 |
21230 |
return retStr; |
1563 |
|
} |
1564 |
|
|
1565 |
775 |
bool RegExpOpr::regExpIncludes(Node r1, Node r2) |
1566 |
|
{ |
1567 |
775 |
const auto& it = d_inclusionCache.find(std::make_pair(r1, r2)); |
1568 |
775 |
if (it != d_inclusionCache.end()) |
1569 |
|
{ |
1570 |
418 |
return (*it).second; |
1571 |
|
} |
1572 |
357 |
bool result = RegExpEntail::regExpIncludes(r1, r2); |
1573 |
357 |
d_inclusionCache[std::make_pair(r1, r2)] = result; |
1574 |
357 |
return result; |
1575 |
|
} |
1576 |
|
|
1577 |
|
/** |
1578 |
|
* Associating formulas with their "exists form", or an existentially |
1579 |
|
* quantified formula that is equivalent to it. This is currently used |
1580 |
|
* for regular expression memberships in the method below. |
1581 |
|
*/ |
1582 |
|
struct ExistsFormAttributeId |
1583 |
|
{ |
1584 |
|
}; |
1585 |
|
typedef expr::Attribute<ExistsFormAttributeId, Node> ExistsFormAttribute; |
1586 |
|
|
1587 |
352 |
Node RegExpOpr::getExistsForRegExpConcatMem(Node mem) |
1588 |
|
{ |
1589 |
|
// get or make the exists form of the membership |
1590 |
|
ExistsFormAttribute efa; |
1591 |
352 |
if (mem.hasAttribute(efa)) |
1592 |
|
{ |
1593 |
|
// already computed |
1594 |
96 |
return mem.getAttribute(efa); |
1595 |
|
} |
1596 |
256 |
Assert(mem.getKind() == STRING_IN_REGEXP); |
1597 |
512 |
Node x = mem[0]; |
1598 |
512 |
Node r = mem[1]; |
1599 |
256 |
Assert(r.getKind() == REGEXP_CONCAT); |
1600 |
256 |
NodeManager* nm = NodeManager::currentNM(); |
1601 |
512 |
TypeNode xtn = x.getType(); |
1602 |
512 |
std::vector<Node> vars; |
1603 |
512 |
std::vector<Node> mems; |
1604 |
1016 |
for (const Node& rc : r) |
1605 |
|
{ |
1606 |
1520 |
Node v = nm->mkBoundVar(xtn); |
1607 |
760 |
vars.push_back(v); |
1608 |
760 |
mems.push_back(nm->mkNode(STRING_IN_REGEXP, v, rc)); |
1609 |
|
} |
1610 |
512 |
Node sconcat = nm->mkNode(STRING_CONCAT, vars); |
1611 |
512 |
Node eq = x.eqNode(sconcat); |
1612 |
256 |
mems.insert(mems.begin(), eq); |
1613 |
512 |
Node bvl = nm->mkNode(BOUND_VAR_LIST, vars); |
1614 |
512 |
Node ebody = nm->mkNode(AND, mems); |
1615 |
512 |
Node eform = nm->mkNode(EXISTS, bvl, ebody); |
1616 |
256 |
mem.setAttribute(efa, eform); |
1617 |
256 |
Trace("regexp-opr") << "Exists form " << mem << " : " << eform << std::endl; |
1618 |
256 |
return eform; |
1619 |
|
} |
1620 |
|
|
1621 |
|
} // namespace strings |
1622 |
|
} // namespace theory |
1623 |
28191 |
} // namespace cvc5 |