1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Tianyi Liang, Andrew Reynolds, Mathias Preiner |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Symbolic Regular Expresion Operations |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/regexp_operation.h" |
17 |
|
|
18 |
|
#include <sstream> |
19 |
|
|
20 |
|
#include "expr/node_algorithm.h" |
21 |
|
#include "options/strings_options.h" |
22 |
|
#include "theory/rewriter.h" |
23 |
|
#include "theory/strings/regexp_entail.h" |
24 |
|
#include "theory/strings/theory_strings_utils.h" |
25 |
|
#include "theory/strings/word.h" |
26 |
|
#include "util/regexp.h" |
27 |
|
|
28 |
|
using namespace cvc5::kind; |
29 |
|
|
30 |
|
namespace cvc5 { |
31 |
|
namespace theory { |
32 |
|
namespace strings { |
33 |
|
|
34 |
15272 |
RegExpOpr::RegExpOpr(Env& env, SkolemCache* sc) |
35 |
|
: EnvObj(env), |
36 |
|
d_true(NodeManager::currentNM()->mkConst(true)), |
37 |
|
d_false(NodeManager::currentNM()->mkConst(false)), |
38 |
|
d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY, |
39 |
30544 |
std::vector<Node>{})), |
40 |
30544 |
d_zero(NodeManager::currentNM()->mkConst(::cvc5::Rational(0))), |
41 |
30544 |
d_one(NodeManager::currentNM()->mkConst(::cvc5::Rational(1))), |
42 |
|
d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA, |
43 |
30544 |
std::vector<Node>{})), |
44 |
|
d_sigma_star( |
45 |
|
NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)), |
46 |
137448 |
d_sc(sc) |
47 |
|
{ |
48 |
15272 |
d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType()); |
49 |
|
|
50 |
15272 |
d_emptySingleton = |
51 |
30544 |
NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString); |
52 |
15272 |
d_lastchar = options().strings.stringsAlphaCard - 1; |
53 |
15272 |
} |
54 |
|
|
55 |
15267 |
RegExpOpr::~RegExpOpr() {} |
56 |
|
|
57 |
990 |
bool RegExpOpr::checkConstRegExp( Node r ) { |
58 |
990 |
Assert(r.getType().isRegExp()); |
59 |
1980 |
Trace("strings-regexp-cstre") |
60 |
990 |
<< "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl; |
61 |
990 |
RegExpConstType rct = getRegExpConstType(r); |
62 |
990 |
return rct != RE_C_VARIABLE; |
63 |
|
} |
64 |
|
|
65 |
1823 |
RegExpConstType RegExpOpr::getRegExpConstType(Node r) |
66 |
|
{ |
67 |
1823 |
Assert(r.getType().isRegExp()); |
68 |
1823 |
std::unordered_map<Node, RegExpConstType>::iterator it; |
69 |
3646 |
std::vector<TNode> visit; |
70 |
3646 |
TNode cur; |
71 |
1823 |
visit.push_back(r); |
72 |
2442 |
do |
73 |
|
{ |
74 |
4265 |
cur = visit.back(); |
75 |
4265 |
visit.pop_back(); |
76 |
4265 |
it = d_constCache.find(cur); |
77 |
|
|
78 |
4265 |
Kind ck = cur.getKind(); |
79 |
4265 |
if (it == d_constCache.end()) |
80 |
|
{ |
81 |
1340 |
if (ck == STRING_TO_REGEXP) |
82 |
|
{ |
83 |
836 |
Node tmp = rewrite(cur[0]); |
84 |
418 |
d_constCache[cur] = |
85 |
418 |
tmp.isConst() ? RE_C_CONRETE_CONSTANT : RE_C_VARIABLE; |
86 |
|
} |
87 |
922 |
else if (ck == REGEXP_SIGMA || ck == REGEXP_RANGE) |
88 |
|
{ |
89 |
131 |
d_constCache[cur] = RE_C_CONSTANT; |
90 |
|
} |
91 |
791 |
else if (!utils::isRegExpKind(ck)) |
92 |
|
{ |
93 |
|
// non-regular expression applications, e.g. function applications |
94 |
|
// with regular expression return type are treated as variables. |
95 |
4 |
d_constCache[cur] = RE_C_VARIABLE; |
96 |
|
} |
97 |
|
else |
98 |
|
{ |
99 |
787 |
d_constCache[cur] = RE_C_UNKNOWN; |
100 |
787 |
visit.push_back(cur); |
101 |
787 |
visit.insert(visit.end(), cur.begin(), cur.end()); |
102 |
|
} |
103 |
|
} |
104 |
2925 |
else if (it->second == RE_C_UNKNOWN) |
105 |
|
{ |
106 |
787 |
RegExpConstType ret = ck == REGEXP_COMPLEMENT ? RE_C_CONSTANT : RE_C_CONRETE_CONSTANT; |
107 |
2442 |
for (const Node& cn : cur) |
108 |
|
{ |
109 |
1655 |
it = d_constCache.find(cn); |
110 |
1655 |
Assert(it != d_constCache.end()); |
111 |
1655 |
if (it->second > ret) |
112 |
|
{ |
113 |
432 |
ret = it->second; |
114 |
|
} |
115 |
|
} |
116 |
787 |
d_constCache[cur] = ret; |
117 |
|
} |
118 |
4265 |
} while (!visit.empty()); |
119 |
1823 |
Assert(d_constCache.find(r) != d_constCache.end()); |
120 |
3646 |
return d_constCache[r]; |
121 |
|
} |
122 |
|
|
123 |
|
// 0-unknown, 1-yes, 2-no |
124 |
1703 |
int RegExpOpr::delta( Node r, Node &exp ) { |
125 |
|
std::map<Node, std::pair<int, Node> >::const_iterator itd = |
126 |
1703 |
d_delta_cache.find(r); |
127 |
1703 |
if (itd != d_delta_cache.end()) |
128 |
|
{ |
129 |
|
// already computed |
130 |
1107 |
exp = itd->second.second; |
131 |
1107 |
return itd->second.first; |
132 |
|
} |
133 |
596 |
Trace("regexp-delta") << "RegExpOpr::delta: " << r << std::endl; |
134 |
596 |
int ret = 0; |
135 |
596 |
NodeManager* nm = NodeManager::currentNM(); |
136 |
596 |
Kind k = r.getKind(); |
137 |
596 |
switch (k) |
138 |
|
{ |
139 |
|
case REGEXP_EMPTY: |
140 |
|
case REGEXP_SIGMA: |
141 |
|
case REGEXP_RANGE: |
142 |
|
{ |
143 |
|
// does not contain empty string |
144 |
|
ret = 2; |
145 |
|
break; |
146 |
|
} |
147 |
186 |
case STRING_TO_REGEXP: |
148 |
|
{ |
149 |
372 |
Node tmp = rewrite(r[0]); |
150 |
186 |
if (tmp.isConst()) |
151 |
|
{ |
152 |
178 |
if (tmp == d_emptyString) |
153 |
|
{ |
154 |
12 |
ret = 1; |
155 |
|
} else { |
156 |
166 |
ret = 2; |
157 |
|
} |
158 |
|
} |
159 |
|
else |
160 |
|
{ |
161 |
8 |
ret = 0; |
162 |
8 |
if (tmp.getKind() == STRING_CONCAT) |
163 |
|
{ |
164 |
|
for (const Node& tmpc : tmp) |
165 |
|
{ |
166 |
|
if (tmpc.isConst()) |
167 |
|
{ |
168 |
|
ret = 2; |
169 |
|
break; |
170 |
|
} |
171 |
|
} |
172 |
|
} |
173 |
8 |
if (ret == 0) |
174 |
|
{ |
175 |
8 |
exp = r[0].eqNode(d_emptyString); |
176 |
|
} |
177 |
|
} |
178 |
186 |
break; |
179 |
|
} |
180 |
311 |
case REGEXP_CONCAT: |
181 |
|
case REGEXP_UNION: |
182 |
|
case REGEXP_INTER: |
183 |
|
{ |
184 |
|
// has there been an unknown child? |
185 |
311 |
bool hasUnknownChild = false; |
186 |
622 |
std::vector<Node> vec; |
187 |
311 |
int checkTmp = k == REGEXP_UNION ? 1 : 2; |
188 |
311 |
int retTmp = k == REGEXP_UNION ? 2 : 1; |
189 |
447 |
for (const Node& rc : r) |
190 |
|
{ |
191 |
559 |
Node exp2; |
192 |
423 |
int tmp = delta(rc, exp2); |
193 |
423 |
if (tmp == checkTmp) |
194 |
|
{ |
195 |
|
// return is implied by the child's return value |
196 |
287 |
ret = checkTmp; |
197 |
287 |
break; |
198 |
|
} |
199 |
136 |
else if (tmp == 0) |
200 |
|
{ |
201 |
|
// unknown if child contains empty string |
202 |
16 |
Assert(!exp2.isNull()); |
203 |
16 |
vec.push_back(exp2); |
204 |
16 |
hasUnknownChild = true; |
205 |
|
} |
206 |
|
} |
207 |
311 |
if (ret != checkTmp) |
208 |
|
{ |
209 |
24 |
if (!hasUnknownChild) |
210 |
|
{ |
211 |
24 |
ret = retTmp; |
212 |
|
} else { |
213 |
|
Kind kr = k == REGEXP_UNION ? OR : AND; |
214 |
|
exp = vec.size() == 1 ? vec[0] : nm->mkNode(kr, vec); |
215 |
|
} |
216 |
|
} |
217 |
311 |
break; |
218 |
|
} |
219 |
91 |
case REGEXP_STAR: |
220 |
|
case REGEXP_OPT: |
221 |
|
{ |
222 |
|
// contains empty string |
223 |
91 |
ret = 1; |
224 |
91 |
break; |
225 |
|
} |
226 |
|
case REGEXP_PLUS: |
227 |
|
{ |
228 |
|
ret = delta(r[0], exp); |
229 |
|
break; |
230 |
|
} |
231 |
|
case REGEXP_LOOP: |
232 |
|
{ |
233 |
|
uint32_t lo = utils::getLoopMinOccurrences(r); |
234 |
|
if (lo == 0) |
235 |
|
{ |
236 |
|
ret = 1; |
237 |
|
} |
238 |
|
else |
239 |
|
{ |
240 |
|
ret = delta(r[0], exp); |
241 |
|
} |
242 |
|
break; |
243 |
|
} |
244 |
8 |
case REGEXP_COMPLEMENT: |
245 |
|
{ |
246 |
8 |
int tmp = delta(r[0], exp); |
247 |
|
// flip the result if known |
248 |
8 |
ret = tmp == 0 ? 0 : (3 - tmp); |
249 |
8 |
exp = exp.isNull() ? exp : exp.negate(); |
250 |
8 |
break; |
251 |
|
} |
252 |
|
default: |
253 |
|
{ |
254 |
|
Assert(!utils::isRegExpKind(k)); |
255 |
|
break; |
256 |
|
} |
257 |
|
} |
258 |
596 |
if (!exp.isNull()) |
259 |
|
{ |
260 |
8 |
exp = rewrite(exp); |
261 |
|
} |
262 |
1192 |
std::pair<int, Node> p(ret, exp); |
263 |
596 |
d_delta_cache[r] = p; |
264 |
1192 |
Trace("regexp-delta") << "RegExpOpr::delta returns " << ret << " for " << r |
265 |
596 |
<< ", expr = " << exp << std::endl; |
266 |
596 |
return ret; |
267 |
|
} |
268 |
|
|
269 |
|
// 0-unknown, 1-yes, 2-no |
270 |
24 |
int RegExpOpr::derivativeS(Node r, cvc5::String c, Node& retNode) |
271 |
|
{ |
272 |
24 |
Assert(c.size() < 2); |
273 |
24 |
Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl; |
274 |
|
|
275 |
24 |
int ret = 1; |
276 |
24 |
retNode = d_emptyRegexp; |
277 |
24 |
NodeManager* nm = NodeManager::currentNM(); |
278 |
24 |
SkolemManager* sm = nm->getSkolemManager(); |
279 |
|
|
280 |
48 |
PairNodeStr dv = std::make_pair( r, c ); |
281 |
24 |
if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) { |
282 |
4 |
retNode = d_deriv_cache[dv].first; |
283 |
4 |
ret = d_deriv_cache[dv].second; |
284 |
|
} |
285 |
20 |
else if (c.empty()) |
286 |
|
{ |
287 |
|
Node expNode; |
288 |
|
ret = delta( r, expNode ); |
289 |
|
if(ret == 0) { |
290 |
|
retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp); |
291 |
|
} else if(ret == 1) { |
292 |
|
retNode = r; |
293 |
|
} |
294 |
|
std::pair< Node, int > p(retNode, ret); |
295 |
|
d_deriv_cache[dv] = p; |
296 |
|
} else { |
297 |
20 |
switch( r.getKind() ) { |
298 |
|
case kind::REGEXP_EMPTY: { |
299 |
|
ret = 2; |
300 |
|
break; |
301 |
|
} |
302 |
|
case kind::REGEXP_SIGMA: { |
303 |
|
retNode = d_emptySingleton; |
304 |
|
break; |
305 |
|
} |
306 |
|
case kind::REGEXP_RANGE: { |
307 |
|
cvc5::String a = r[0].getConst<String>(); |
308 |
|
cvc5::String b = r[1].getConst<String>(); |
309 |
|
retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp; |
310 |
|
break; |
311 |
|
} |
312 |
8 |
case kind::STRING_TO_REGEXP: { |
313 |
16 |
Node tmp = rewrite(r[0]); |
314 |
8 |
if(tmp.isConst()) { |
315 |
8 |
if(tmp == d_emptyString) { |
316 |
4 |
ret = 2; |
317 |
|
} else { |
318 |
4 |
if (tmp.getConst<String>().front() == c.front()) |
319 |
|
{ |
320 |
|
retNode = |
321 |
|
nm->mkNode(STRING_TO_REGEXP, |
322 |
|
Word::getLength(tmp) == 1 ? d_emptyString |
323 |
|
: Word::substr(tmp, 1)); |
324 |
|
} else { |
325 |
4 |
ret = 2; |
326 |
|
} |
327 |
|
} |
328 |
|
} else { |
329 |
|
ret = 0; |
330 |
|
Node rest; |
331 |
|
if(tmp.getKind() == kind::STRING_CONCAT) { |
332 |
|
Node t2 = tmp[0]; |
333 |
|
if(t2.isConst()) { |
334 |
|
if (t2.getConst<String>().front() == c.front()) |
335 |
|
{ |
336 |
|
Node n = nm->mkNode(STRING_TO_REGEXP, |
337 |
|
Word::getLength(tmp) == 1 |
338 |
|
? d_emptyString |
339 |
|
: Word::substr(tmp, 1)); |
340 |
|
std::vector< Node > vec_nodes; |
341 |
|
vec_nodes.push_back(n); |
342 |
|
for(unsigned i=1; i<tmp.getNumChildren(); i++) { |
343 |
|
vec_nodes.push_back(tmp[i]); |
344 |
|
} |
345 |
|
retNode = nm->mkNode(kind::REGEXP_CONCAT, vec_nodes); |
346 |
|
ret = 1; |
347 |
|
} else { |
348 |
|
ret = 2; |
349 |
|
} |
350 |
|
} else { |
351 |
|
tmp = tmp[0]; |
352 |
|
std::vector< Node > vec_nodes; |
353 |
|
for(unsigned i=1; i<tmp.getNumChildren(); i++) { |
354 |
|
vec_nodes.push_back(tmp[i]); |
355 |
|
} |
356 |
|
rest = nm->mkNode(kind::REGEXP_CONCAT, vec_nodes); |
357 |
|
} |
358 |
|
} |
359 |
|
if(ret == 0) { |
360 |
|
Node sk = |
361 |
|
sm->mkDummySkolem("rsp", nm->stringType(), "Split RegExp"); |
362 |
|
retNode = nm->mkNode(kind::STRING_TO_REGEXP, sk); |
363 |
|
if(!rest.isNull()) { |
364 |
|
retNode = rewrite(nm->mkNode(kind::REGEXP_CONCAT, retNode, rest)); |
365 |
|
} |
366 |
|
Node exp = |
367 |
|
tmp.eqNode(nm->mkNode(kind::STRING_CONCAT, nm->mkConst(c), sk)); |
368 |
|
retNode = |
369 |
|
rewrite(nm->mkNode(kind::ITE, exp, retNode, d_emptyRegexp)); |
370 |
|
} |
371 |
|
} |
372 |
8 |
break; |
373 |
|
} |
374 |
4 |
case kind::REGEXP_CONCAT: { |
375 |
8 |
std::vector< Node > vec_nodes; |
376 |
8 |
std::vector< Node > delta_nodes; |
377 |
8 |
Node dnode = d_true; |
378 |
12 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
379 |
16 |
Node dc; |
380 |
16 |
Node exp2; |
381 |
8 |
int rt = derivativeS(r[i], c, dc); |
382 |
8 |
if(rt != 2) { |
383 |
|
if(rt == 0) { |
384 |
|
ret = 0; |
385 |
|
} |
386 |
|
std::vector< Node > vec_nodes2; |
387 |
|
if(dc != d_emptySingleton) { |
388 |
|
vec_nodes2.push_back( dc ); |
389 |
|
} |
390 |
|
for(unsigned j=i+1; j<r.getNumChildren(); ++j) { |
391 |
|
if(r[j] != d_emptySingleton) { |
392 |
|
vec_nodes2.push_back( r[j] ); |
393 |
|
} |
394 |
|
} |
395 |
|
Node tmp = vec_nodes2.size()==0 ? d_emptySingleton : |
396 |
|
vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 ); |
397 |
|
if(dnode != d_true) { |
398 |
|
tmp = rewrite(nm->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp)); |
399 |
|
ret = 0; |
400 |
|
} |
401 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) { |
402 |
|
vec_nodes.push_back( tmp ); |
403 |
|
} |
404 |
|
} |
405 |
16 |
Node exp3; |
406 |
8 |
int rt2 = delta( r[i], exp3 ); |
407 |
8 |
if( rt2 == 0 ) { |
408 |
|
dnode = rewrite(nm->mkNode(kind::AND, dnode, exp3)); |
409 |
8 |
} else if( rt2 == 2 ) { |
410 |
|
break; |
411 |
|
} |
412 |
|
} |
413 |
4 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
414 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
415 |
4 |
if(retNode == d_emptyRegexp) { |
416 |
4 |
ret = 2; |
417 |
|
} |
418 |
4 |
break; |
419 |
|
} |
420 |
4 |
case kind::REGEXP_UNION: { |
421 |
8 |
std::vector< Node > vec_nodes; |
422 |
12 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
423 |
16 |
Node dc; |
424 |
8 |
int rt = derivativeS(r[i], c, dc); |
425 |
8 |
if(rt == 0) { |
426 |
|
ret = 0; |
427 |
|
} |
428 |
8 |
if(rt != 2) { |
429 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
430 |
|
vec_nodes.push_back( dc ); |
431 |
|
} |
432 |
|
} |
433 |
|
//Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] " << mkString(r[i]) << " returns " << mkString(dc) << std::endl; |
434 |
|
} |
435 |
4 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
436 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
437 |
4 |
if(retNode == d_emptyRegexp) { |
438 |
4 |
ret = 2; |
439 |
|
} |
440 |
4 |
break; |
441 |
|
} |
442 |
|
case kind::REGEXP_INTER: { |
443 |
|
bool flag = true; |
444 |
|
bool flag_sg = false; |
445 |
|
std::vector< Node > vec_nodes; |
446 |
|
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
447 |
|
Node dc; |
448 |
|
int rt = derivativeS(r[i], c, dc); |
449 |
|
if(rt == 0) { |
450 |
|
ret = 0; |
451 |
|
} else if(rt == 2) { |
452 |
|
flag = false; |
453 |
|
break; |
454 |
|
} |
455 |
|
if(dc == d_sigma_star) { |
456 |
|
flag_sg = true; |
457 |
|
} else { |
458 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
459 |
|
vec_nodes.push_back( dc ); |
460 |
|
} |
461 |
|
} |
462 |
|
} |
463 |
|
if(flag) { |
464 |
|
if(vec_nodes.size() == 0 && flag_sg) { |
465 |
|
retNode = d_sigma_star; |
466 |
|
} else { |
467 |
|
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
468 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) ); |
469 |
|
if(retNode == d_emptyRegexp) { |
470 |
|
ret = 2; |
471 |
|
} |
472 |
|
} |
473 |
|
} else { |
474 |
|
retNode = d_emptyRegexp; |
475 |
|
ret = 2; |
476 |
|
} |
477 |
|
break; |
478 |
|
} |
479 |
4 |
case kind::REGEXP_STAR: { |
480 |
8 |
Node dc; |
481 |
4 |
ret = derivativeS(r[0], c, dc); |
482 |
4 |
retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r )); |
483 |
4 |
break; |
484 |
|
} |
485 |
|
case kind::REGEXP_LOOP: { |
486 |
|
uint32_t l = utils::getLoopMinOccurrences(r); |
487 |
|
uint32_t u = utils::getLoopMaxOccurrences(r); |
488 |
|
if (l == u && l == 0) |
489 |
|
{ |
490 |
|
ret = 2; |
491 |
|
//retNode = d_emptyRegexp; |
492 |
|
} else { |
493 |
|
Node dc; |
494 |
|
ret = derivativeS(r[0], c, dc); |
495 |
|
if(dc==d_emptyRegexp) { |
496 |
|
Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1)); |
497 |
|
Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]); |
498 |
|
retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); |
499 |
|
} else { |
500 |
|
retNode = d_emptyRegexp; |
501 |
|
} |
502 |
|
} |
503 |
|
break; |
504 |
|
} |
505 |
|
case kind::REGEXP_COMPLEMENT: |
506 |
|
{ |
507 |
|
// don't know result |
508 |
|
return 0; |
509 |
|
break; |
510 |
|
} |
511 |
|
default: { |
512 |
|
Assert(!utils::isRegExpKind(r.getKind())); |
513 |
|
return 0; |
514 |
|
break; |
515 |
|
} |
516 |
|
} |
517 |
20 |
if(retNode != d_emptyRegexp) { |
518 |
|
retNode = rewrite(retNode); |
519 |
|
} |
520 |
40 |
std::pair< Node, int > p(retNode, ret); |
521 |
20 |
d_deriv_cache[dv] = p; |
522 |
|
} |
523 |
|
|
524 |
24 |
Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl; |
525 |
24 |
return ret; |
526 |
|
} |
527 |
|
|
528 |
1410 |
Node RegExpOpr::derivativeSingle(Node r, cvc5::String c) |
529 |
|
{ |
530 |
1410 |
Assert(c.size() < 2); |
531 |
1410 |
Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl; |
532 |
1410 |
Node retNode = d_emptyRegexp; |
533 |
2820 |
PairNodeStr dv = std::make_pair( r, c ); |
534 |
1410 |
NodeManager* nm = NodeManager::currentNM(); |
535 |
1410 |
if( d_dv_cache.find( dv ) != d_dv_cache.end() ) { |
536 |
526 |
retNode = d_dv_cache[dv]; |
537 |
|
} |
538 |
884 |
else if (c.empty()) |
539 |
|
{ |
540 |
|
Node exp; |
541 |
|
int tmp = delta( r, exp ); |
542 |
|
if(tmp == 0) { |
543 |
|
// TODO variable |
544 |
|
retNode = d_emptyRegexp; |
545 |
|
} else if(tmp == 1) { |
546 |
|
retNode = r; |
547 |
|
} else { |
548 |
|
retNode = d_emptyRegexp; |
549 |
|
} |
550 |
|
} else { |
551 |
884 |
Kind k = r.getKind(); |
552 |
884 |
switch( k ) { |
553 |
|
case kind::REGEXP_EMPTY: { |
554 |
|
retNode = d_emptyRegexp; |
555 |
|
break; |
556 |
|
} |
557 |
|
case kind::REGEXP_SIGMA: { |
558 |
|
retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ); |
559 |
|
break; |
560 |
|
} |
561 |
|
case kind::REGEXP_RANGE: { |
562 |
|
cvc5::String a = r[0].getConst<String>(); |
563 |
|
cvc5::String b = r[1].getConst<String>(); |
564 |
|
retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp; |
565 |
|
break; |
566 |
|
} |
567 |
270 |
case kind::STRING_TO_REGEXP: { |
568 |
270 |
if(r[0].isConst()) { |
569 |
270 |
if(r[0] == d_emptyString) { |
570 |
14 |
retNode = d_emptyRegexp; |
571 |
|
} else { |
572 |
256 |
if (r[0].getConst<String>().front() == c.front()) |
573 |
|
{ |
574 |
192 |
retNode = nm->mkNode(STRING_TO_REGEXP, |
575 |
384 |
Word::getLength(r[0]) == 1 |
576 |
384 |
? d_emptyString |
577 |
|
: Word::substr(r[0], 1)); |
578 |
|
} else { |
579 |
64 |
retNode = d_emptyRegexp; |
580 |
|
} |
581 |
|
} |
582 |
|
} else { |
583 |
|
// TODO variable |
584 |
|
retNode = d_emptyRegexp; |
585 |
|
} |
586 |
270 |
break; |
587 |
|
} |
588 |
355 |
case kind::REGEXP_CONCAT: { |
589 |
710 |
Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString ); |
590 |
710 |
std::vector< Node > vec_nodes; |
591 |
531 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
592 |
685 |
Node dc = derivativeSingle(r[i], c); |
593 |
509 |
if(dc != d_emptyRegexp) { |
594 |
714 |
std::vector< Node > vec_nodes2; |
595 |
357 |
if(dc != rees) { |
596 |
236 |
vec_nodes2.push_back( dc ); |
597 |
|
} |
598 |
768 |
for(unsigned j=i+1; j<r.getNumChildren(); ++j) { |
599 |
411 |
if(r[j] != rees) { |
600 |
411 |
vec_nodes2.push_back( r[j] ); |
601 |
|
} |
602 |
|
} |
603 |
357 |
Node tmp = vec_nodes2.size()==0 ? rees : |
604 |
714 |
vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 ); |
605 |
357 |
if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) { |
606 |
355 |
vec_nodes.push_back( tmp ); |
607 |
|
} |
608 |
|
} |
609 |
685 |
Node exp; |
610 |
509 |
if( delta( r[i], exp ) != 1 ) { |
611 |
333 |
break; |
612 |
|
} |
613 |
|
} |
614 |
999 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
615 |
644 |
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
616 |
355 |
break; |
617 |
|
} |
618 |
108 |
case kind::REGEXP_UNION: { |
619 |
216 |
std::vector< Node > vec_nodes; |
620 |
346 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
621 |
476 |
Node dc = derivativeSingle(r[i], c); |
622 |
238 |
if(dc != d_emptyRegexp) { |
623 |
100 |
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
624 |
98 |
vec_nodes.push_back( dc ); |
625 |
|
} |
626 |
|
} |
627 |
|
//Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl; |
628 |
|
} |
629 |
286 |
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
630 |
178 |
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) ); |
631 |
108 |
break; |
632 |
|
} |
633 |
|
case kind::REGEXP_INTER: { |
634 |
|
bool flag = true; |
635 |
|
bool flag_sg = false; |
636 |
|
std::vector< Node > vec_nodes; |
637 |
|
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
638 |
|
Node dc = derivativeSingle(r[i], c); |
639 |
|
if(dc != d_emptyRegexp) { |
640 |
|
if(dc == d_sigma_star) { |
641 |
|
flag_sg = true; |
642 |
|
} else { |
643 |
|
if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) { |
644 |
|
vec_nodes.push_back( dc ); |
645 |
|
} |
646 |
|
} |
647 |
|
} else { |
648 |
|
flag = false; |
649 |
|
break; |
650 |
|
} |
651 |
|
} |
652 |
|
if(flag) { |
653 |
|
if(vec_nodes.size() == 0 && flag_sg) { |
654 |
|
retNode = d_sigma_star; |
655 |
|
} else { |
656 |
|
retNode = vec_nodes.size() == 0 ? d_emptyRegexp : |
657 |
|
( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) ); |
658 |
|
} |
659 |
|
} else { |
660 |
|
retNode = d_emptyRegexp; |
661 |
|
} |
662 |
|
break; |
663 |
|
} |
664 |
151 |
case kind::REGEXP_STAR: { |
665 |
302 |
Node dc = derivativeSingle(r[0], c); |
666 |
151 |
if(dc != d_emptyRegexp) { |
667 |
115 |
retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ); |
668 |
|
} else { |
669 |
36 |
retNode = d_emptyRegexp; |
670 |
|
} |
671 |
151 |
break; |
672 |
|
} |
673 |
|
case kind::REGEXP_LOOP: { |
674 |
|
uint32_t l = utils::getLoopMinOccurrences(r); |
675 |
|
uint32_t u = utils::getLoopMaxOccurrences(r); |
676 |
|
if (l == u || l == 0) |
677 |
|
{ |
678 |
|
retNode = d_emptyRegexp; |
679 |
|
} else { |
680 |
|
Node dc = derivativeSingle(r[0], c); |
681 |
|
if(dc != d_emptyRegexp) { |
682 |
|
Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1)); |
683 |
|
Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]); |
684 |
|
retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 ); |
685 |
|
} else { |
686 |
|
retNode = d_emptyRegexp; |
687 |
|
} |
688 |
|
} |
689 |
|
//Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl; |
690 |
|
break; |
691 |
|
} |
692 |
|
case kind::REGEXP_COMPLEMENT: |
693 |
|
default: { |
694 |
|
Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl; |
695 |
|
Unreachable(); |
696 |
|
break; |
697 |
|
} |
698 |
|
} |
699 |
884 |
if(retNode != d_emptyRegexp) { |
700 |
732 |
retNode = rewrite(retNode); |
701 |
|
} |
702 |
884 |
d_dv_cache[dv] = retNode; |
703 |
|
} |
704 |
1410 |
Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl; |
705 |
2820 |
return retNode; |
706 |
|
} |
707 |
|
|
708 |
898 |
void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset) |
709 |
|
{ |
710 |
898 |
Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl; |
711 |
|
std::map<Node, std::pair<std::set<unsigned>, SetNodes> >::const_iterator itr = |
712 |
898 |
d_fset_cache.find(r); |
713 |
898 |
if(itr != d_fset_cache.end()) { |
714 |
312 |
pcset.insert((itr->second).first.begin(), (itr->second).first.end()); |
715 |
312 |
pvset.insert((itr->second).second.begin(), (itr->second).second.end()); |
716 |
|
} else { |
717 |
|
// cset is code points |
718 |
1172 |
std::set<unsigned> cset; |
719 |
1172 |
SetNodes vset; |
720 |
586 |
Kind k = r.getKind(); |
721 |
586 |
switch( k ) { |
722 |
|
case kind::REGEXP_EMPTY: { |
723 |
|
break; |
724 |
|
} |
725 |
|
case kind::REGEXP_RANGE: { |
726 |
|
unsigned a = r[0].getConst<String>().front(); |
727 |
|
unsigned b = r[1].getConst<String>().front(); |
728 |
|
Assert(a < b); |
729 |
|
Assert(b < std::numeric_limits<unsigned>::max()); |
730 |
|
for (unsigned c = a; c <= b; c++) |
731 |
|
{ |
732 |
|
cset.insert(c); |
733 |
|
} |
734 |
|
break; |
735 |
|
} |
736 |
204 |
case kind::STRING_TO_REGEXP: { |
737 |
408 |
Node st = rewrite(r[0]); |
738 |
204 |
if(st.isConst()) { |
739 |
408 |
String s = st.getConst<String>(); |
740 |
204 |
if(s.size() != 0) { |
741 |
196 |
unsigned sc = s.front(); |
742 |
196 |
cset.insert(sc); |
743 |
|
} |
744 |
|
} |
745 |
|
else if (st.getKind() == kind::STRING_CONCAT) |
746 |
|
{ |
747 |
|
if(st[0].isConst()) { |
748 |
|
String s = st[0].getConst<String>(); |
749 |
|
unsigned sc = s.front(); |
750 |
|
cset.insert(sc); |
751 |
|
} else { |
752 |
|
vset.insert( st[0] ); |
753 |
|
} |
754 |
|
} |
755 |
|
else |
756 |
|
{ |
757 |
|
vset.insert(st); |
758 |
|
} |
759 |
204 |
break; |
760 |
|
} |
761 |
253 |
case kind::REGEXP_CONCAT: { |
762 |
329 |
for(unsigned i=0; i<r.getNumChildren(); i++) { |
763 |
319 |
firstChars(r[i], cset, vset); |
764 |
395 |
Node n = r[i]; |
765 |
395 |
Node exp; |
766 |
319 |
if (delta(n, exp) != 1) |
767 |
|
{ |
768 |
243 |
break; |
769 |
|
} |
770 |
|
} |
771 |
253 |
break; |
772 |
|
} |
773 |
46 |
case kind::REGEXP_UNION: { |
774 |
146 |
for(unsigned i=0; i<r.getNumChildren(); i++) { |
775 |
100 |
firstChars(r[i], cset, vset); |
776 |
|
} |
777 |
46 |
break; |
778 |
|
} |
779 |
|
case kind::REGEXP_INTER: { |
780 |
|
//TODO: Overapproximation for now |
781 |
|
//for(unsigned i=0; i<r.getNumChildren(); i++) { |
782 |
|
// firstChars(r[i], cset, vset); |
783 |
|
//} |
784 |
|
firstChars(r[0], cset, vset); |
785 |
|
break; |
786 |
|
} |
787 |
83 |
case kind::REGEXP_STAR: { |
788 |
83 |
firstChars(r[0], cset, vset); |
789 |
83 |
break; |
790 |
|
} |
791 |
|
case kind::REGEXP_LOOP: { |
792 |
|
firstChars(r[0], cset, vset); |
793 |
|
break; |
794 |
|
} |
795 |
|
case kind::REGEXP_SIGMA: |
796 |
|
case kind::REGEXP_COMPLEMENT: |
797 |
|
default: { |
798 |
|
// we do not expect to call this function on regular expressions that |
799 |
|
// aren't a standard regular expression kind. However, if we do, then |
800 |
|
// the following code is conservative and says that the current |
801 |
|
// regular expression can begin with any character. |
802 |
|
Assert(utils::isRegExpKind(k)); |
803 |
|
// can start with any character |
804 |
|
Assert(d_lastchar < std::numeric_limits<unsigned>::max()); |
805 |
|
for (unsigned i = 0; i <= d_lastchar; i++) |
806 |
|
{ |
807 |
|
cset.insert(i); |
808 |
|
} |
809 |
|
break; |
810 |
|
} |
811 |
|
} |
812 |
586 |
pcset.insert(cset.begin(), cset.end()); |
813 |
586 |
pvset.insert(vset.begin(), vset.end()); |
814 |
1172 |
std::pair<std::set<unsigned>, SetNodes> p(cset, vset); |
815 |
586 |
d_fset_cache[r] = p; |
816 |
|
} |
817 |
|
|
818 |
898 |
if(Trace.isOn("regexp-fset")) { |
819 |
|
Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {"; |
820 |
|
for (std::set<unsigned>::const_iterator it = pcset.begin(); |
821 |
|
it != pcset.end(); |
822 |
|
++it) |
823 |
|
{ |
824 |
|
if (it != pcset.begin()) |
825 |
|
{ |
826 |
|
Trace("regexp-fset") << ","; |
827 |
|
} |
828 |
|
Trace("regexp-fset") << (*it); |
829 |
|
} |
830 |
|
Trace("regexp-fset") << "}" << std::endl; |
831 |
|
} |
832 |
898 |
} |
833 |
|
|
834 |
560 |
Node RegExpOpr::simplify(Node t, bool polarity) |
835 |
|
{ |
836 |
1120 |
Trace("strings-regexp-simpl") |
837 |
560 |
<< "RegExpOpr::simplify: " << t << ", polarity=" << polarity << std::endl; |
838 |
560 |
Assert(t.getKind() == kind::STRING_IN_REGEXP); |
839 |
1120 |
Node tlit = polarity ? t : t.notNode(); |
840 |
1120 |
Node conc; |
841 |
560 |
std::map<Node, Node>::const_iterator itr = d_simpCache.find(tlit); |
842 |
560 |
if (itr != d_simpCache.end()) |
843 |
|
{ |
844 |
8 |
return itr->second; |
845 |
|
} |
846 |
552 |
if (polarity) |
847 |
|
{ |
848 |
816 |
std::vector<Node> newSkolems; |
849 |
408 |
conc = reduceRegExpPos(tlit, d_sc, newSkolems); |
850 |
|
} |
851 |
|
else |
852 |
|
{ |
853 |
|
// see if we can use an optimized version of the reduction for re.++. |
854 |
288 |
Node r = t[1]; |
855 |
144 |
if (r.getKind() == REGEXP_CONCAT) |
856 |
|
{ |
857 |
|
// the index we are removing from the RE concatenation |
858 |
122 |
size_t index = 0; |
859 |
|
// As an optimization to the reduction, if we can determine that |
860 |
|
// all strings in the language of R1 have the same length, say n, |
861 |
|
// then the conclusion of the reduction is quantifier-free: |
862 |
|
// ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2) |
863 |
244 |
Node reLen = getRegExpConcatFixed(r, index); |
864 |
122 |
if (!reLen.isNull()) |
865 |
|
{ |
866 |
102 |
conc = reduceRegExpNegConcatFixed(tlit, reLen, index); |
867 |
|
} |
868 |
|
} |
869 |
144 |
if (conc.isNull()) |
870 |
|
{ |
871 |
42 |
conc = reduceRegExpNeg(tlit); |
872 |
|
} |
873 |
|
} |
874 |
552 |
d_simpCache[tlit] = conc; |
875 |
1104 |
Trace("strings-regexp-simpl") |
876 |
552 |
<< "RegExpOpr::simplify: returns " << conc << std::endl; |
877 |
552 |
return conc; |
878 |
|
} |
879 |
|
|
880 |
150 |
Node RegExpOpr::getRegExpConcatFixed(Node r, size_t& index) |
881 |
|
{ |
882 |
150 |
Assert(r.getKind() == REGEXP_CONCAT); |
883 |
150 |
index = 0; |
884 |
300 |
Node reLen = RegExpEntail::getFixedLengthForRegexp(r[0]); |
885 |
150 |
if (!reLen.isNull()) |
886 |
|
{ |
887 |
96 |
return reLen; |
888 |
|
} |
889 |
|
// try from the opposite end |
890 |
54 |
size_t indexE = r.getNumChildren() - 1; |
891 |
54 |
reLen = RegExpEntail::getFixedLengthForRegexp(r[indexE]); |
892 |
54 |
if (!reLen.isNull()) |
893 |
|
{ |
894 |
33 |
index = indexE; |
895 |
33 |
return reLen; |
896 |
|
} |
897 |
21 |
return Node::null(); |
898 |
|
} |
899 |
|
|
900 |
44 |
Node RegExpOpr::reduceRegExpNeg(Node mem) |
901 |
|
{ |
902 |
44 |
Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP); |
903 |
88 |
Node s = mem[0][0]; |
904 |
88 |
Node r = mem[0][1]; |
905 |
44 |
NodeManager* nm = NodeManager::currentNM(); |
906 |
44 |
Kind k = r.getKind(); |
907 |
88 |
Node zero = nm->mkConst(Rational(0)); |
908 |
44 |
Node conc; |
909 |
44 |
if (k == REGEXP_CONCAT) |
910 |
|
{ |
911 |
|
// do not use length entailment, call regular expression concat |
912 |
44 |
Node reLen; |
913 |
22 |
size_t i = 0; |
914 |
22 |
conc = reduceRegExpNegConcatFixed(mem, reLen, i); |
915 |
|
} |
916 |
22 |
else if (k == REGEXP_STAR) |
917 |
|
{ |
918 |
40 |
Node emp = Word::mkEmptyWord(s.getType()); |
919 |
40 |
Node lens = nm->mkNode(STRING_LENGTH, s); |
920 |
40 |
Node sne = s.eqNode(emp).negate(); |
921 |
40 |
Node b1 = nm->mkBoundVar(nm->integerType()); |
922 |
40 |
Node b1v = nm->mkNode(BOUND_VAR_LIST, b1); |
923 |
|
Node g1 = |
924 |
40 |
nm->mkNode(AND, nm->mkNode(GT, b1, zero), nm->mkNode(GEQ, lens, b1)); |
925 |
|
// internal |
926 |
40 |
Node s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1); |
927 |
40 |
Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1)); |
928 |
40 |
Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[0]).negate(); |
929 |
40 |
Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r).negate(); |
930 |
|
|
931 |
20 |
conc = nm->mkNode(OR, s1r1, s2r2); |
932 |
20 |
conc = nm->mkNode(IMPLIES, g1, conc); |
933 |
|
// must mark as an internal quantifier |
934 |
20 |
conc = utils::mkForallInternal(b1v, conc); |
935 |
20 |
conc = nm->mkNode(AND, sne, conc); |
936 |
|
} |
937 |
|
else |
938 |
|
{ |
939 |
2 |
Assert(!utils::isRegExpKind(k)); |
940 |
|
} |
941 |
88 |
return conc; |
942 |
|
} |
943 |
|
|
944 |
142 |
Node RegExpOpr::reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index) |
945 |
|
{ |
946 |
142 |
Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP); |
947 |
284 |
Node s = mem[0][0]; |
948 |
284 |
Node r = mem[0][1]; |
949 |
142 |
NodeManager* nm = NodeManager::currentNM(); |
950 |
142 |
Assert(r.getKind() == REGEXP_CONCAT); |
951 |
284 |
Node zero = nm->mkConst(Rational(0)); |
952 |
|
// The following simplification states that |
953 |
|
// ~( s in R1 ++ R2 ++... ++ Rn ) |
954 |
|
// is equivalent to |
955 |
|
// forall x. |
956 |
|
// 0 <= x <= len(s) => |
957 |
|
// ~(substr(s,0,x) in R1) OR ~(substr(s,x,len(s)-x) in R2 ++ ... ++ Rn) |
958 |
|
// Index is the child index of r that we are stripping off, which is either |
959 |
|
// from the beginning or the end. |
960 |
142 |
Assert(index == 0 || index == r.getNumChildren() - 1); |
961 |
284 |
Node lens = nm->mkNode(STRING_LENGTH, s); |
962 |
284 |
Node b1; |
963 |
284 |
Node b1v; |
964 |
284 |
Node guard; |
965 |
142 |
if (reLen.isNull()) |
966 |
|
{ |
967 |
22 |
b1 = SkolemCache::mkIndexVar(mem); |
968 |
22 |
b1v = nm->mkNode(BOUND_VAR_LIST, b1); |
969 |
66 |
guard = nm->mkNode(AND, |
970 |
44 |
nm->mkNode(GEQ, b1, zero), |
971 |
44 |
nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1)); |
972 |
|
} |
973 |
|
else |
974 |
|
{ |
975 |
120 |
b1 = reLen; |
976 |
|
} |
977 |
284 |
Node s1; |
978 |
284 |
Node s2; |
979 |
142 |
if (index == 0) |
980 |
|
{ |
981 |
112 |
s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1); |
982 |
112 |
s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1)); |
983 |
|
} |
984 |
|
else |
985 |
|
{ |
986 |
30 |
s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1); |
987 |
30 |
s2 = nm->mkNode(STRING_SUBSTR, s, zero, nm->mkNode(MINUS, lens, b1)); |
988 |
|
} |
989 |
284 |
Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[index]).negate(); |
990 |
284 |
std::vector<Node> nvec; |
991 |
578 |
for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++) |
992 |
|
{ |
993 |
436 |
if (i != index) |
994 |
|
{ |
995 |
294 |
nvec.push_back(r[i]); |
996 |
|
} |
997 |
|
} |
998 |
284 |
Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec); |
999 |
284 |
Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate(); |
1000 |
142 |
Node conc = nm->mkNode(OR, s1r1, s2r2); |
1001 |
142 |
if (!b1v.isNull()) |
1002 |
|
{ |
1003 |
22 |
conc = nm->mkNode(OR, guard.negate(), conc); |
1004 |
|
// must mark as an internal quantifier |
1005 |
22 |
conc = utils::mkForallInternal(b1v, conc); |
1006 |
|
} |
1007 |
284 |
return conc; |
1008 |
|
} |
1009 |
|
|
1010 |
701 |
Node RegExpOpr::reduceRegExpPos(Node mem, |
1011 |
|
SkolemCache* sc, |
1012 |
|
std::vector<Node>& newSkolems) |
1013 |
|
{ |
1014 |
701 |
Assert(mem.getKind() == STRING_IN_REGEXP); |
1015 |
1402 |
Node s = mem[0]; |
1016 |
1402 |
Node r = mem[1]; |
1017 |
701 |
NodeManager* nm = NodeManager::currentNM(); |
1018 |
701 |
Kind k = r.getKind(); |
1019 |
701 |
Node conc; |
1020 |
701 |
if (k == REGEXP_CONCAT) |
1021 |
|
{ |
1022 |
970 |
std::vector<Node> nvec; |
1023 |
970 |
std::vector<Node> cc; |
1024 |
485 |
SkolemManager* sm = nm->getSkolemManager(); |
1025 |
|
// Look up skolems for each of the components. If sc has optimizations |
1026 |
|
// enabled, this will return arguments of str.to_re. |
1027 |
1976 |
for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i) |
1028 |
|
{ |
1029 |
1491 |
if (r[i].getKind() == STRING_TO_REGEXP) |
1030 |
|
{ |
1031 |
|
// optimization, just take the body |
1032 |
470 |
newSkolems.push_back(r[i][0]); |
1033 |
|
} |
1034 |
|
else |
1035 |
|
{ |
1036 |
2042 |
Node ivalue = nm->mkConst(Rational(i)); |
1037 |
|
Node sk = sm->mkSkolemFunction(SkolemFunId::RE_UNFOLD_POS_COMPONENT, |
1038 |
2042 |
s.getType(), |
1039 |
4084 |
{mem[0], mem[1], ivalue}); |
1040 |
1021 |
newSkolems.push_back(sk); |
1041 |
1021 |
nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i])); |
1042 |
|
} |
1043 |
|
} |
1044 |
|
// (str.in_re x (re.++ R0 .... Rn)) => |
1045 |
|
// (and (= x (str.++ k0 ... kn)) (str.in_re k0 R0) ... (str.in_re kn Rn) ) |
1046 |
970 |
Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems)); |
1047 |
485 |
nvec.insert(nvec.begin(), lem); |
1048 |
485 |
conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec); |
1049 |
|
} |
1050 |
216 |
else if (k == REGEXP_STAR) |
1051 |
|
{ |
1052 |
424 |
Node emp = Word::mkEmptyWord(s.getType()); |
1053 |
424 |
Node se = s.eqNode(emp); |
1054 |
424 |
Node sinr = nm->mkNode(STRING_IN_REGEXP, s, r[0]); |
1055 |
424 |
Node reExpand = nm->mkNode(REGEXP_CONCAT, r[0], r, r[0]); |
1056 |
424 |
Node sinRExp = nm->mkNode(STRING_IN_REGEXP, s, reExpand); |
1057 |
|
// We unfold `x in R*` by considering three cases: `x` is empty, `x` |
1058 |
|
// is matched by `R`, or `x` is matched by two or more `R`s. For the |
1059 |
|
// last case, `x` will break into three pieces, making the beginning |
1060 |
|
// and the end each match `R` and the middle match `R*`. Matching the |
1061 |
|
// beginning and the end with `R` allows us to reason about the |
1062 |
|
// beginning and the end of `x` simultaneously. |
1063 |
|
// |
1064 |
|
// x in R* ---> (x = "") v (x in R) v (x in (re.++ R (re.* R) R)) |
1065 |
|
|
1066 |
|
// We also immediately unfold the last disjunct for re.*. The advantage |
1067 |
|
// of doing this is that we use the same scheme for skolems above. |
1068 |
424 |
std::vector<Node> newSkolemsC; |
1069 |
212 |
sinRExp = reduceRegExpPos(sinRExp, sc, newSkolemsC); |
1070 |
212 |
Assert(newSkolemsC.size() == 3); |
1071 |
|
// make the return lemma |
1072 |
|
// can also assume the component match the first and last R are non-empty. |
1073 |
|
// This means that the overall conclusion is: |
1074 |
|
// (x = "") v (x in R) v (x = (str.++ k1 k2 k3) ^ |
1075 |
|
// k1 in R ^ k2 in (re.* R) ^ k3 in R ^ |
1076 |
|
// k1 != "" ^ k3 != "") |
1077 |
212 |
conc = nm->mkNode(OR, |
1078 |
|
se, |
1079 |
|
sinr, |
1080 |
848 |
nm->mkNode(AND, |
1081 |
|
sinRExp, |
1082 |
424 |
newSkolemsC[0].eqNode(emp).negate(), |
1083 |
424 |
newSkolemsC[2].eqNode(emp).negate())); |
1084 |
|
} |
1085 |
|
else |
1086 |
|
{ |
1087 |
4 |
Assert(!utils::isRegExpKind(k)); |
1088 |
|
} |
1089 |
1402 |
return conc; |
1090 |
|
} |
1091 |
|
|
1092 |
|
bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) { |
1093 |
|
for(std::set< PairNodes >::const_iterator itr = s.begin(); |
1094 |
|
itr != s.end(); ++itr) { |
1095 |
|
if((itr->first == n1 && itr->second == n2) || |
1096 |
|
(itr->first == n2 && itr->second == n1)) { |
1097 |
|
return true; |
1098 |
|
} |
1099 |
|
} |
1100 |
|
return false; |
1101 |
|
} |
1102 |
|
|
1103 |
654 |
bool RegExpOpr::containC2(unsigned cnt, Node n) { |
1104 |
654 |
if(n.getKind() == kind::REGEXP_RV) { |
1105 |
202 |
Assert(n[0].getConst<Rational>() <= Rational(String::maxSize())) |
1106 |
|
<< "Exceeded UINT32_MAX in RegExpOpr::containC2"; |
1107 |
202 |
unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt(); |
1108 |
202 |
return cnt == y; |
1109 |
452 |
} else if(n.getKind() == kind::REGEXP_CONCAT) { |
1110 |
102 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1111 |
80 |
if(containC2(cnt, n[i])) { |
1112 |
12 |
return true; |
1113 |
|
} |
1114 |
|
} |
1115 |
418 |
} else if(n.getKind() == kind::REGEXP_STAR) { |
1116 |
34 |
return containC2(cnt, n[0]); |
1117 |
384 |
} else if(n.getKind() == kind::REGEXP_LOOP) { |
1118 |
|
return containC2(cnt, n[0]); |
1119 |
384 |
} else if(n.getKind() == kind::REGEXP_UNION) { |
1120 |
118 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1121 |
88 |
if(containC2(cnt, n[i])) { |
1122 |
12 |
return true; |
1123 |
|
} |
1124 |
|
} |
1125 |
|
} |
1126 |
394 |
return false; |
1127 |
|
} |
1128 |
198 |
Node RegExpOpr::convert1(unsigned cnt, Node n) { |
1129 |
198 |
Trace("regexp-debug") << "Converting " << n << " at " << cnt << "... " << std::endl; |
1130 |
396 |
Node r1, r2; |
1131 |
198 |
convert2(cnt, n, r1, r2); |
1132 |
198 |
Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl; |
1133 |
198 |
Node ret = r1==d_emptySingleton ? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, |
1134 |
198 |
NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2); |
1135 |
198 |
ret = rewrite(ret); |
1136 |
198 |
Trace("regexp-debug") << "... done convert at " << cnt << ", with return " << ret << std::endl; |
1137 |
396 |
return ret; |
1138 |
|
} |
1139 |
421 |
void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) { |
1140 |
421 |
if(n == d_emptyRegexp) { |
1141 |
24 |
r1 = d_emptyRegexp; |
1142 |
24 |
r2 = d_emptyRegexp; |
1143 |
24 |
return; |
1144 |
397 |
} else if(n == d_emptySingleton) { |
1145 |
25 |
r1 = d_emptySingleton; |
1146 |
25 |
r2 = d_emptySingleton; |
1147 |
|
} |
1148 |
397 |
Kind nk = n.getKind(); |
1149 |
397 |
if (nk == REGEXP_RV) |
1150 |
|
{ |
1151 |
61 |
Assert(n[0].getConst<Rational>() <= Rational(String::maxSize())) |
1152 |
|
<< "Exceeded UINT32_MAX in RegExpOpr::convert2"; |
1153 |
61 |
unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt(); |
1154 |
61 |
r1 = d_emptySingleton; |
1155 |
61 |
if(cnt == y) { |
1156 |
61 |
r2 = d_emptyRegexp; |
1157 |
|
} else { |
1158 |
|
r2 = n; |
1159 |
|
} |
1160 |
|
} |
1161 |
336 |
else if (nk == REGEXP_CONCAT) |
1162 |
|
{ |
1163 |
212 |
bool flag = true; |
1164 |
424 |
std::vector<Node> vr1, vr2; |
1165 |
591 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1166 |
452 |
if(containC2(cnt, n[i])) { |
1167 |
146 |
Node t1, t2; |
1168 |
73 |
convert2(cnt, n[i], t1, t2); |
1169 |
73 |
vr1.push_back(t1); |
1170 |
73 |
r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] : |
1171 |
|
NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1); |
1172 |
73 |
vr2.push_back(t2); |
1173 |
73 |
for( unsigned j=i+1; j<n.getNumChildren(); j++ ) { |
1174 |
|
vr2.push_back(n[j]); |
1175 |
|
} |
1176 |
73 |
r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] : |
1177 |
|
NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2); |
1178 |
73 |
flag = false; |
1179 |
73 |
break; |
1180 |
|
} else { |
1181 |
379 |
vr1.push_back(n[i]); |
1182 |
|
} |
1183 |
|
} |
1184 |
212 |
if(flag) { |
1185 |
139 |
r1 = d_emptySingleton; |
1186 |
139 |
r2 = n; |
1187 |
|
} |
1188 |
|
} |
1189 |
124 |
else if (nk == REGEXP_UNION) |
1190 |
|
{ |
1191 |
126 |
std::vector<Node> vr1, vr2; |
1192 |
213 |
for( unsigned i=0; i<n.getNumChildren(); i++ ) { |
1193 |
300 |
Node t1, t2; |
1194 |
150 |
convert2(cnt, n[i], t1, t2); |
1195 |
150 |
vr1.push_back(t1); |
1196 |
150 |
vr2.push_back(t2); |
1197 |
|
} |
1198 |
63 |
r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1); |
1199 |
63 |
r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2); |
1200 |
|
} |
1201 |
61 |
else if (nk == STRING_TO_REGEXP || nk == REGEXP_SIGMA || nk == REGEXP_RANGE |
1202 |
|
|| nk == REGEXP_COMPLEMENT || nk == REGEXP_LOOP) |
1203 |
|
{ |
1204 |
|
// this leaves n unchanged |
1205 |
61 |
r1 = d_emptySingleton; |
1206 |
61 |
r2 = n; |
1207 |
|
} |
1208 |
|
else |
1209 |
|
{ |
1210 |
|
//is it possible? |
1211 |
|
Unreachable(); |
1212 |
|
} |
1213 |
|
} |
1214 |
|
|
1215 |
317 |
Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) { |
1216 |
|
//Assert(checkConstRegExp(r1) && checkConstRegExp(r2)); |
1217 |
317 |
if(r1 > r2) { |
1218 |
60 |
TNode tmpNode = r1; |
1219 |
30 |
r1 = r2; |
1220 |
30 |
r2 = tmpNode; |
1221 |
|
} |
1222 |
317 |
NodeManager* nm = NodeManager::currentNM(); |
1223 |
317 |
Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n "<< mkString(r1) << ",\n " << mkString(r2) << std::endl; |
1224 |
634 |
std::pair < Node, Node > p(r1, r2); |
1225 |
317 |
std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p); |
1226 |
317 |
Node rNode; |
1227 |
317 |
if(itr != d_inter_cache.end()) { |
1228 |
38 |
rNode = itr->second; |
1229 |
|
} else { |
1230 |
279 |
Trace("regexp-int-debug") << " ... not in cache" << std::endl; |
1231 |
279 |
if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) { |
1232 |
|
Trace("regexp-int-debug") << " ... one is empty set" << std::endl; |
1233 |
|
rNode = d_emptyRegexp; |
1234 |
279 |
} else if(r1 == d_emptySingleton || r2 == d_emptySingleton) { |
1235 |
24 |
Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl; |
1236 |
48 |
Node exp; |
1237 |
24 |
int r = delta((r1 == d_emptySingleton ? r2 : r1), exp); |
1238 |
24 |
if(r == 0) { |
1239 |
|
//TODO: variable |
1240 |
|
Unreachable(); |
1241 |
24 |
} else if(r == 1) { |
1242 |
18 |
rNode = d_emptySingleton; |
1243 |
|
} else { |
1244 |
6 |
rNode = d_emptyRegexp; |
1245 |
|
} |
1246 |
255 |
} else if(r1 == r2) { |
1247 |
|
Trace("regexp-int-debug") << " ... equal" << std::endl; |
1248 |
|
rNode = r1; //convert1(cnt, r1); |
1249 |
|
} else { |
1250 |
255 |
Trace("regexp-int-debug") << " ... normal checking" << std::endl; |
1251 |
255 |
std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p); |
1252 |
255 |
if(itrcache != cache.end()) { |
1253 |
57 |
rNode = itrcache->second; |
1254 |
|
} else { |
1255 |
198 |
Trace("regexp-int-debug") << " ... normal without cache" << std::endl; |
1256 |
396 |
std::vector<unsigned> cset; |
1257 |
396 |
std::set<unsigned> cset1, cset2; |
1258 |
396 |
std::set< Node > vset1, vset2; |
1259 |
198 |
firstChars(r1, cset1, vset1); |
1260 |
198 |
firstChars(r2, cset2, vset2); |
1261 |
198 |
Trace("regexp-int-debug") << " ... got fset" << std::endl; |
1262 |
198 |
std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset2.end(), |
1263 |
198 |
std::inserter(cset, cset.begin())); |
1264 |
396 |
std::vector< Node > vec_nodes; |
1265 |
396 |
Node delta_exp; |
1266 |
198 |
Trace("regexp-int-debug") << " ... try delta" << std::endl; |
1267 |
198 |
int flag = delta(r1, delta_exp); |
1268 |
198 |
int flag2 = delta(r2, delta_exp); |
1269 |
198 |
Trace("regexp-int-debug") << " ... delta1=" << flag << ", delta2=" << flag2 << std::endl; |
1270 |
198 |
if(flag != 2 && flag2 != 2) { |
1271 |
21 |
if(flag == 1 && flag2 == 1) { |
1272 |
21 |
vec_nodes.push_back(d_emptySingleton); |
1273 |
|
} else { |
1274 |
|
//TODO: variable |
1275 |
|
Unreachable(); |
1276 |
|
} |
1277 |
|
} |
1278 |
198 |
if(Trace.isOn("regexp-int-debug")) { |
1279 |
|
Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {"; |
1280 |
|
for (std::vector<unsigned>::const_iterator it = cset.begin(); |
1281 |
|
it != cset.end(); |
1282 |
|
++it) |
1283 |
|
{ |
1284 |
|
if (it != cset.begin()) |
1285 |
|
{ |
1286 |
|
Trace("regexp-int-debug") << ", "; |
1287 |
|
} |
1288 |
|
Trace("regexp-int-debug") << (*it); |
1289 |
|
} |
1290 |
|
Trace("regexp-int-debug") << std::endl; |
1291 |
|
} |
1292 |
396 |
std::map< PairNodes, Node > cacheX; |
1293 |
454 |
for (std::vector<unsigned>::const_iterator it = cset.begin(); |
1294 |
454 |
it != cset.end(); |
1295 |
|
++it) |
1296 |
|
{ |
1297 |
512 |
std::vector<unsigned> cvec; |
1298 |
256 |
cvec.push_back(*it); |
1299 |
512 |
String c(cvec); |
1300 |
256 |
Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl; |
1301 |
512 |
Node r1l = derivativeSingle(r1, c); |
1302 |
512 |
Node r2l = derivativeSingle(r2, c); |
1303 |
256 |
Trace("regexp-int-debug") << " ... got partial(r1,c) = " << mkString(r1l) << std::endl; |
1304 |
256 |
Trace("regexp-int-debug") << " ... got partial(r2,c) = " << mkString(r2l) << std::endl; |
1305 |
512 |
Node rt; |
1306 |
|
|
1307 |
256 |
if(r1l > r2l) { |
1308 |
68 |
Node tnode = r1l; |
1309 |
34 |
r1l = r2l; r2l = tnode; |
1310 |
|
} |
1311 |
512 |
PairNodes pp(r1l, r2l); |
1312 |
256 |
std::map< PairNodes, Node >::const_iterator itr2 = cacheX.find(pp); |
1313 |
256 |
if(itr2 != cacheX.end()) { |
1314 |
8 |
rt = itr2->second; |
1315 |
|
} else { |
1316 |
496 |
std::map< PairNodes, Node > cache2(cache); |
1317 |
496 |
cache2[p] = NodeManager::currentNM()->mkNode( |
1318 |
|
kind::REGEXP_RV, |
1319 |
496 |
NodeManager::currentNM()->mkConst(cvc5::Rational(cnt))); |
1320 |
248 |
rt = intersectInternal(r1l, r2l, cache2, cnt+1); |
1321 |
248 |
cacheX[ pp ] = rt; |
1322 |
|
} |
1323 |
|
|
1324 |
512 |
rt = rewrite( |
1325 |
1024 |
nm->mkNode(kind::REGEXP_CONCAT, |
1326 |
512 |
nm->mkNode(kind::STRING_TO_REGEXP, nm->mkConst(c)), |
1327 |
|
rt)); |
1328 |
|
|
1329 |
256 |
Trace("regexp-int-debug") << " ... got p(r1,c) && p(r2,c) = " << mkString(rt) << std::endl; |
1330 |
256 |
vec_nodes.push_back(rt); |
1331 |
|
} |
1332 |
396 |
rNode = rewrite(vec_nodes.size() == 0 |
1333 |
725 |
? d_emptyRegexp |
1334 |
194 |
: vec_nodes.size() == 1 |
1335 |
135 |
? vec_nodes[0] |
1336 |
|
: nm->mkNode(kind::REGEXP_UNION, vec_nodes)); |
1337 |
198 |
rNode = convert1(cnt, rNode); |
1338 |
198 |
rNode = rewrite(rNode); |
1339 |
|
} |
1340 |
|
} |
1341 |
279 |
Trace("regexp-int-debug") << " ... try testing no RV of " << mkString(rNode) << std::endl; |
1342 |
279 |
if (!expr::hasSubtermKind(REGEXP_RV, rNode)) |
1343 |
|
{ |
1344 |
115 |
d_inter_cache[p] = rNode; |
1345 |
|
} |
1346 |
|
} |
1347 |
317 |
Trace("regexp-int") << "End(" << cnt << ") of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl; |
1348 |
634 |
return rNode; |
1349 |
|
} |
1350 |
|
|
1351 |
138 |
Node RegExpOpr::removeIntersection(Node r) { |
1352 |
138 |
Assert(checkConstRegExp(r)); |
1353 |
138 |
NodeManager* nm = NodeManager::currentNM(); |
1354 |
276 |
std::unordered_map<TNode, Node> visited; |
1355 |
138 |
std::unordered_map<TNode, Node>::iterator it; |
1356 |
276 |
std::vector<TNode> visit; |
1357 |
276 |
TNode cur; |
1358 |
138 |
visit.push_back(r); |
1359 |
1948 |
do |
1360 |
|
{ |
1361 |
2086 |
cur = visit.back(); |
1362 |
2086 |
visit.pop_back(); |
1363 |
2086 |
it = visited.find(cur); |
1364 |
|
|
1365 |
2086 |
if (it == visited.end()) |
1366 |
|
{ |
1367 |
986 |
visited[cur] = Node::null(); |
1368 |
986 |
visit.push_back(cur); |
1369 |
1948 |
for (const Node& cn : cur) |
1370 |
|
{ |
1371 |
962 |
visit.push_back(cn); |
1372 |
|
} |
1373 |
|
} |
1374 |
1100 |
else if (it->second.isNull()) |
1375 |
|
{ |
1376 |
986 |
Kind ck = cur.getKind(); |
1377 |
1972 |
Node ret; |
1378 |
986 |
bool childChanged = false; |
1379 |
1972 |
std::vector<Node> children; |
1380 |
986 |
if (cur.getMetaKind() == metakind::PARAMETERIZED) |
1381 |
|
{ |
1382 |
|
children.push_back(cur.getOperator()); |
1383 |
|
} |
1384 |
1948 |
for (const Node& cn : cur) |
1385 |
|
{ |
1386 |
962 |
it = visited.find(cn); |
1387 |
962 |
Assert(it != visited.end()); |
1388 |
962 |
Assert(!it->second.isNull()); |
1389 |
962 |
if (ck == REGEXP_INTER) |
1390 |
|
{ |
1391 |
4 |
if (ret.isNull()) |
1392 |
|
{ |
1393 |
2 |
ret = it->second; |
1394 |
|
} |
1395 |
|
else |
1396 |
|
{ |
1397 |
2 |
ret = intersect(ret, it->second); |
1398 |
|
} |
1399 |
|
} |
1400 |
|
else |
1401 |
|
{ |
1402 |
|
// will construct below |
1403 |
958 |
childChanged = childChanged || cn != it->second; |
1404 |
958 |
children.push_back(it->second); |
1405 |
|
} |
1406 |
|
} |
1407 |
986 |
if (ck != REGEXP_INTER) |
1408 |
|
{ |
1409 |
984 |
if (childChanged) |
1410 |
|
{ |
1411 |
2 |
ret = nm->mkNode(cur.getKind(), children); |
1412 |
|
} |
1413 |
|
else |
1414 |
|
{ |
1415 |
982 |
ret = cur; |
1416 |
|
} |
1417 |
|
} |
1418 |
986 |
visited[cur] = ret; |
1419 |
|
} |
1420 |
2086 |
} while (!visit.empty()); |
1421 |
138 |
Assert(visited.find(r) != visited.end()); |
1422 |
138 |
Assert(!visited.find(r)->second.isNull()); |
1423 |
138 |
if (Trace.isOn("regexp-intersect")) |
1424 |
|
{ |
1425 |
|
Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r) |
1426 |
|
<< " ) = " << mkString(visited[r]) << std::endl; |
1427 |
|
} |
1428 |
276 |
return visited[r]; |
1429 |
|
} |
1430 |
|
|
1431 |
69 |
Node RegExpOpr::intersect(Node r1, Node r2) |
1432 |
|
{ |
1433 |
69 |
if (!checkConstRegExp(r1) || !checkConstRegExp(r2)) |
1434 |
|
{ |
1435 |
|
return Node::null(); |
1436 |
|
} |
1437 |
138 |
Node rr1 = removeIntersection(r1); |
1438 |
138 |
Node rr2 = removeIntersection(r2); |
1439 |
138 |
std::map<PairNodes, Node> cache; |
1440 |
69 |
Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl; |
1441 |
69 |
Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl; |
1442 |
138 |
Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1) |
1443 |
69 |
<< ",\n\t" << mkString(r2) << ")" << std::endl; |
1444 |
138 |
Node retNode = intersectInternal(rr1, rr2, cache, 1); |
1445 |
138 |
Trace("regexp-intersect") |
1446 |
138 |
<< "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t" << mkString(r2) |
1447 |
69 |
<< ") =\n\t" << mkString(retNode) << std::endl; |
1448 |
69 |
Trace("regexp-intersect-node") << "Intersect finished." << std::endl; |
1449 |
69 |
return retNode; |
1450 |
|
} |
1451 |
|
|
1452 |
|
//printing |
1453 |
19625 |
std::string RegExpOpr::niceChar(Node r) { |
1454 |
19625 |
if(r.isConst()) { |
1455 |
38918 |
std::string s = r.getConst<String>().toString(); |
1456 |
19459 |
return s == "." ? "\\." : s; |
1457 |
|
} else { |
1458 |
332 |
std::string ss = "$" + r.toString(); |
1459 |
166 |
return ss; |
1460 |
|
} |
1461 |
|
} |
1462 |
37204 |
std::string RegExpOpr::mkString( Node r ) { |
1463 |
37204 |
std::string retStr; |
1464 |
37204 |
if(r.isNull()) { |
1465 |
|
retStr = "\\E"; |
1466 |
|
} else { |
1467 |
37204 |
int k = r.getKind(); |
1468 |
37204 |
switch( k ) { |
1469 |
472 |
case kind::REGEXP_EMPTY: { |
1470 |
472 |
retStr += "\\E"; |
1471 |
472 |
break; |
1472 |
|
} |
1473 |
436 |
case kind::REGEXP_SIGMA: { |
1474 |
436 |
retStr += "."; |
1475 |
436 |
break; |
1476 |
|
} |
1477 |
18515 |
case kind::STRING_TO_REGEXP: { |
1478 |
37030 |
std::string tmp( niceChar( r[0] ) ); |
1479 |
18515 |
retStr += tmp.size()==1? tmp : "(" + tmp + ")"; |
1480 |
18515 |
break; |
1481 |
|
} |
1482 |
5508 |
case kind::REGEXP_CONCAT: { |
1483 |
5508 |
retStr += "("; |
1484 |
19107 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
1485 |
|
//if(i != 0) retStr += "."; |
1486 |
13599 |
retStr += mkString( r[i] ); |
1487 |
|
} |
1488 |
5508 |
retStr += ")"; |
1489 |
5508 |
break; |
1490 |
|
} |
1491 |
3902 |
case kind::REGEXP_UNION: { |
1492 |
3902 |
retStr += "("; |
1493 |
12428 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
1494 |
8526 |
if(i != 0) retStr += "|"; |
1495 |
8526 |
retStr += mkString( r[i] ); |
1496 |
|
} |
1497 |
3902 |
retStr += ")"; |
1498 |
3902 |
break; |
1499 |
|
} |
1500 |
68 |
case kind::REGEXP_INTER: { |
1501 |
68 |
retStr += "("; |
1502 |
204 |
for(unsigned i=0; i<r.getNumChildren(); ++i) { |
1503 |
136 |
if(i != 0) retStr += "&"; |
1504 |
136 |
retStr += mkString( r[i] ); |
1505 |
|
} |
1506 |
68 |
retStr += ")"; |
1507 |
68 |
break; |
1508 |
|
} |
1509 |
7112 |
case kind::REGEXP_STAR: { |
1510 |
7112 |
retStr += mkString( r[0] ); |
1511 |
7112 |
retStr += "*"; |
1512 |
7112 |
break; |
1513 |
|
} |
1514 |
|
case kind::REGEXP_PLUS: { |
1515 |
|
retStr += mkString( r[0] ); |
1516 |
|
retStr += "+"; |
1517 |
|
break; |
1518 |
|
} |
1519 |
|
case kind::REGEXP_OPT: { |
1520 |
|
retStr += mkString( r[0] ); |
1521 |
|
retStr += "?"; |
1522 |
|
break; |
1523 |
|
} |
1524 |
555 |
case kind::REGEXP_RANGE: { |
1525 |
555 |
retStr += "["; |
1526 |
555 |
retStr += niceChar( r[0] ); |
1527 |
555 |
retStr += "-"; |
1528 |
555 |
retStr += niceChar( r[1] ); |
1529 |
555 |
retStr += "]"; |
1530 |
555 |
break; |
1531 |
|
} |
1532 |
|
case kind::REGEXP_LOOP: { |
1533 |
|
uint32_t l = utils::getLoopMinOccurrences(r); |
1534 |
|
std::stringstream ss; |
1535 |
|
ss << "(" << mkString(r[0]) << "){" << l << ","; |
1536 |
|
if(r.getNumChildren() == 3) { |
1537 |
|
uint32_t u = utils::getLoopMaxOccurrences(r); |
1538 |
|
ss << u; |
1539 |
|
} |
1540 |
|
ss << "}"; |
1541 |
|
retStr += ss.str(); |
1542 |
|
break; |
1543 |
|
} |
1544 |
532 |
case kind::REGEXP_RV: { |
1545 |
532 |
retStr += "<"; |
1546 |
532 |
retStr += r[0].getConst<Rational>().getNumerator().toString(); |
1547 |
532 |
retStr += ">"; |
1548 |
532 |
break; |
1549 |
|
} |
1550 |
98 |
case REGEXP_COMPLEMENT: |
1551 |
|
{ |
1552 |
98 |
retStr += "^("; |
1553 |
98 |
retStr += mkString(r[0]); |
1554 |
98 |
retStr += ")"; |
1555 |
98 |
break; |
1556 |
|
} |
1557 |
6 |
default: |
1558 |
|
{ |
1559 |
12 |
std::stringstream ss; |
1560 |
6 |
ss << r; |
1561 |
6 |
retStr = ss.str(); |
1562 |
6 |
Assert(!utils::isRegExpKind(r.getKind())); |
1563 |
6 |
break; |
1564 |
|
} |
1565 |
|
} |
1566 |
|
} |
1567 |
|
|
1568 |
37204 |
return retStr; |
1569 |
|
} |
1570 |
|
|
1571 |
1310 |
bool RegExpOpr::regExpIncludes(Node r1, Node r2) |
1572 |
|
{ |
1573 |
1310 |
const auto& it = d_inclusionCache.find(std::make_pair(r1, r2)); |
1574 |
1310 |
if (it != d_inclusionCache.end()) |
1575 |
|
{ |
1576 |
728 |
return (*it).second; |
1577 |
|
} |
1578 |
582 |
bool result = RegExpEntail::regExpIncludes(r1, r2); |
1579 |
582 |
d_inclusionCache[std::make_pair(r1, r2)] = result; |
1580 |
582 |
return result; |
1581 |
|
} |
1582 |
|
|
1583 |
|
} // namespace strings |
1584 |
|
} // namespace theory |
1585 |
31137 |
} // namespace cvc5 |