1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Tianyi Liang |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of the regular expression solver for the theory of strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/regexp_solver.h" |
17 |
|
|
18 |
|
#include <cmath> |
19 |
|
|
20 |
|
#include "options/strings_options.h" |
21 |
|
#include "smt/logic_exception.h" |
22 |
|
#include "theory/ext_theory.h" |
23 |
|
#include "theory/strings/theory_strings_utils.h" |
24 |
|
#include "theory/theory_model.h" |
25 |
|
#include "util/statistics_value.h" |
26 |
|
|
27 |
|
using namespace std; |
28 |
|
using namespace cvc5::context; |
29 |
|
using namespace cvc5::kind; |
30 |
|
|
31 |
|
namespace cvc5 { |
32 |
|
namespace theory { |
33 |
|
namespace strings { |
34 |
|
|
35 |
9942 |
RegExpSolver::RegExpSolver(Env& env, |
36 |
|
SolverState& s, |
37 |
|
InferenceManager& im, |
38 |
|
SkolemCache* skc, |
39 |
|
CoreSolver& cs, |
40 |
|
ExtfSolver& es, |
41 |
9942 |
SequencesStatistics& stats) |
42 |
|
: EnvObj(env), |
43 |
|
d_state(s), |
44 |
|
d_im(im), |
45 |
|
d_csolver(cs), |
46 |
|
d_esolver(es), |
47 |
|
d_statistics(stats), |
48 |
9942 |
d_regexp_ucached(userContext()), |
49 |
|
d_regexp_ccached(context()), |
50 |
|
d_processed_memberships(context()), |
51 |
19884 |
d_regexp_opr(skc) |
52 |
|
{ |
53 |
9942 |
d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String("")); |
54 |
9942 |
d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY); |
55 |
9942 |
d_true = NodeManager::currentNM()->mkConst(true); |
56 |
9942 |
d_false = NodeManager::currentNM()->mkConst(false); |
57 |
9942 |
} |
58 |
|
|
59 |
|
Node RegExpSolver::mkAnd(Node c1, Node c2) |
60 |
|
{ |
61 |
|
return NodeManager::currentNM()->mkNode(AND, c1, c2); |
62 |
|
} |
63 |
|
|
64 |
8149 |
void RegExpSolver::checkMemberships() |
65 |
|
{ |
66 |
|
// add the memberships |
67 |
16298 |
std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP); |
68 |
|
// maps representatives to regular expression memberships in that class |
69 |
16298 |
std::map<Node, std::vector<Node> > assertedMems; |
70 |
8149 |
const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo(); |
71 |
8149 |
std::map<Node, ExtfInfoTmp>::const_iterator it; |
72 |
9623 |
for (unsigned i = 0; i < mems.size(); i++) |
73 |
|
{ |
74 |
2948 |
Node n = mems[i]; |
75 |
1474 |
Assert(n.getKind() == STRING_IN_REGEXP); |
76 |
1474 |
it = einfo.find(n); |
77 |
1474 |
Assert(it != einfo.end()); |
78 |
1474 |
if (!it->second.d_const.isNull()) |
79 |
|
{ |
80 |
1442 |
bool pol = it->second.d_const.getConst<bool>(); |
81 |
2884 |
Trace("strings-process-debug") |
82 |
1442 |
<< " add membership : " << n << ", pol = " << pol << std::endl; |
83 |
2884 |
Node r = d_state.getRepresentative(n[0]); |
84 |
1442 |
assertedMems[r].push_back(pol ? n : n.negate()); |
85 |
|
} |
86 |
|
else |
87 |
|
{ |
88 |
64 |
Trace("strings-process-debug") |
89 |
32 |
<< " irrelevant (non-asserted) membership : " << n << std::endl; |
90 |
|
} |
91 |
|
} |
92 |
8149 |
check(assertedMems); |
93 |
8149 |
} |
94 |
|
|
95 |
8149 |
void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems) |
96 |
|
{ |
97 |
8149 |
bool addedLemma = false; |
98 |
8149 |
bool changed = false; |
99 |
16276 |
std::vector<Node> processed; |
100 |
|
|
101 |
8149 |
Trace("regexp-process") << "Checking Memberships ... " << std::endl; |
102 |
9129 |
for (const std::pair<const Node, std::vector<Node> >& mr : mems) |
103 |
|
{ |
104 |
1982 |
std::vector<Node> mems2 = mr.second; |
105 |
2004 |
Trace("regexp-process") |
106 |
1002 |
<< "Memberships(" << mr.first << ") = " << mr.second << std::endl; |
107 |
1002 |
if (!checkEqcInclusion(mems2)) |
108 |
|
{ |
109 |
|
// conflict discovered, return |
110 |
10 |
return; |
111 |
|
} |
112 |
992 |
if (!checkEqcIntersect(mems2)) |
113 |
|
{ |
114 |
|
// conflict discovered, return |
115 |
12 |
return; |
116 |
|
} |
117 |
|
} |
118 |
|
|
119 |
16254 |
Trace("regexp-debug") |
120 |
8127 |
<< "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma |
121 |
8127 |
<< std::endl; |
122 |
8127 |
if (!addedLemma) |
123 |
|
{ |
124 |
|
// get all memberships |
125 |
16254 |
std::map<Node, Node> allMems; |
126 |
9107 |
for (const std::pair<const Node, std::vector<Node> >& mr : mems) |
127 |
|
{ |
128 |
2363 |
for (const Node& m : mr.second) |
129 |
|
{ |
130 |
1383 |
allMems[m] = mr.first; |
131 |
|
} |
132 |
|
} |
133 |
|
|
134 |
8127 |
NodeManager* nm = NodeManager::currentNM(); |
135 |
|
// representatives of strings that are the LHS of positive memberships that |
136 |
|
// we unfolded |
137 |
16254 |
std::unordered_set<Node> repUnfold; |
138 |
|
// check positive (e=0), then negative (e=1) memberships |
139 |
24381 |
for (unsigned e = 0; e < 2; e++) |
140 |
|
{ |
141 |
18966 |
for (const std::pair<const Node, Node>& mp : allMems) |
142 |
|
{ |
143 |
3299 |
Node assertion = mp.first; |
144 |
3299 |
Node rep = mp.second; |
145 |
|
// check regular expression membership |
146 |
5528 |
Trace("regexp-debug") |
147 |
2764 |
<< "Check : " << assertion << " " |
148 |
5528 |
<< (d_regexp_ucached.find(assertion) == d_regexp_ucached.end()) |
149 |
2764 |
<< " " |
150 |
5528 |
<< (d_regexp_ccached.find(assertion) == d_regexp_ccached.end()) |
151 |
2764 |
<< std::endl; |
152 |
5528 |
if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end() |
153 |
2764 |
|| d_regexp_ccached.find(assertion) != d_regexp_ccached.end()) |
154 |
|
{ |
155 |
1458 |
continue; |
156 |
|
} |
157 |
2612 |
Trace("strings-regexp") |
158 |
1306 |
<< "We have regular expression assertion : " << assertion |
159 |
1306 |
<< std::endl; |
160 |
1841 |
Node atom = assertion.getKind() == NOT ? assertion[0] : assertion; |
161 |
1306 |
Assert(atom == Rewriter::rewrite(atom)); |
162 |
1306 |
bool polarity = assertion.getKind() != NOT; |
163 |
1306 |
if (polarity != (e == 0)) |
164 |
|
{ |
165 |
645 |
continue; |
166 |
|
} |
167 |
661 |
bool flag = true; |
168 |
1196 |
Node x = atom[0]; |
169 |
1196 |
Node r = atom[1]; |
170 |
661 |
Assert(rep == d_state.getRepresentative(x)); |
171 |
|
// The following code takes normal forms into account for the purposes |
172 |
|
// of simplifying a regular expression membership x in R. For example, |
173 |
|
// if x = "A" in the current context, then we may be interested in |
174 |
|
// reasoning about ( x in R ) * { x -> "A" }. Say we update the |
175 |
|
// membership to nx in R', then: |
176 |
|
// - nfexp => ( x in R ) <=> nx in R' |
177 |
|
// - rnfexp => R = R' |
178 |
|
// We use these explanations below as assumptions on inferences when |
179 |
|
// appropriate. Notice that for inferring conflicts and tautologies, |
180 |
|
// we use the normal form of x always. This is because we always want to |
181 |
|
// discover conflicts/tautologies whenever possible. |
182 |
|
// For inferences based on regular expression unfolding, we do not use |
183 |
|
// the normal form of x. The reason is that it is better to unfold |
184 |
|
// regular expression memberships in a context-indepedent manner, |
185 |
|
// that is, not taking into account the current normal form of x, since |
186 |
|
// this ensures these lemmas are still relevant after backtracking. |
187 |
1196 |
std::vector<Node> nfexp; |
188 |
1196 |
std::vector<Node> rnfexp; |
189 |
|
// The normal form of x is stored in nx, while x is left unchanged. |
190 |
1196 |
Node nx = x; |
191 |
661 |
if (!x.isConst()) |
192 |
|
{ |
193 |
642 |
nx = d_csolver.getNormalString(x, nfexp); |
194 |
|
} |
195 |
|
// If r is not a constant regular expression, we update it based on |
196 |
|
// normal forms, which may concretize its variables. |
197 |
661 |
if (!d_regexp_opr.checkConstRegExp(r)) |
198 |
|
{ |
199 |
111 |
r = getNormalSymRegExp(r, rnfexp); |
200 |
111 |
nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end()); |
201 |
111 |
changed = true; |
202 |
|
} |
203 |
1322 |
Trace("strings-regexp-nf") << "Term " << atom << " is normalized to " |
204 |
661 |
<< nx << " IN " << r << std::endl; |
205 |
661 |
if (nx != x || changed) |
206 |
|
{ |
207 |
|
// We rewrite the membership nx IN r. |
208 |
668 |
Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r)); |
209 |
366 |
Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl; |
210 |
366 |
if (tmp.isConst()) |
211 |
|
{ |
212 |
76 |
if (tmp.getConst<bool>() == polarity) |
213 |
|
{ |
214 |
|
// it is satisfied in this SAT context |
215 |
12 |
d_regexp_ccached.insert(assertion); |
216 |
12 |
continue; |
217 |
|
} |
218 |
|
else |
219 |
|
{ |
220 |
|
// we have a conflict |
221 |
104 |
std::vector<Node> iexp = nfexp; |
222 |
104 |
std::vector<Node> noExplain; |
223 |
52 |
iexp.push_back(assertion); |
224 |
52 |
noExplain.push_back(assertion); |
225 |
104 |
Node conc = Node::null(); |
226 |
52 |
d_im.sendInference( |
227 |
|
iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT); |
228 |
52 |
addedLemma = true; |
229 |
52 |
break; |
230 |
|
} |
231 |
|
} |
232 |
|
} |
233 |
597 |
if (e == 1 && repUnfold.find(rep) != repUnfold.end()) |
234 |
|
{ |
235 |
|
// do not unfold negative memberships of strings that have new |
236 |
|
// positive unfoldings. For example: |
237 |
|
// x in ("A")* ^ NOT x in ("B")* |
238 |
|
// We unfold x = "A" ++ x' only. The intution here is that positive |
239 |
|
// unfoldings lead to stronger constraints (equalities are stronger |
240 |
|
// than disequalities), and are easier to check. |
241 |
62 |
continue; |
242 |
|
} |
243 |
535 |
if (polarity) |
244 |
|
{ |
245 |
400 |
flag = checkPDerivative(x, r, atom, addedLemma, rnfexp); |
246 |
|
} |
247 |
|
else |
248 |
|
{ |
249 |
135 |
if (!options::stringExp()) |
250 |
|
{ |
251 |
|
throw LogicException( |
252 |
|
"Strings Incomplete (due to Negative Membership) by default, " |
253 |
|
"try --strings-exp option."); |
254 |
|
} |
255 |
|
} |
256 |
535 |
if (flag) |
257 |
|
{ |
258 |
|
// check if the term is atomic |
259 |
1054 |
Trace("strings-regexp") |
260 |
527 |
<< "Unroll/simplify membership of atomic term " << rep |
261 |
527 |
<< std::endl; |
262 |
|
// if so, do simple unrolling |
263 |
527 |
Trace("strings-regexp") << "Simplify on " << atom << std::endl; |
264 |
1054 |
Node conc = d_regexp_opr.simplify(atom, polarity); |
265 |
527 |
Trace("strings-regexp") << "...finished, got " << conc << std::endl; |
266 |
|
// if simplifying successfully generated a lemma |
267 |
527 |
if (!conc.isNull()) |
268 |
|
{ |
269 |
1048 |
std::vector<Node> iexp; |
270 |
1048 |
std::vector<Node> noExplain; |
271 |
524 |
iexp.push_back(assertion); |
272 |
524 |
noExplain.push_back(assertion); |
273 |
524 |
Assert(atom.getKind() == STRING_IN_REGEXP); |
274 |
524 |
if (polarity) |
275 |
|
{ |
276 |
390 |
d_statistics.d_regexpUnfoldingsPos << atom[1].getKind(); |
277 |
|
} |
278 |
|
else |
279 |
|
{ |
280 |
134 |
d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind(); |
281 |
|
} |
282 |
524 |
InferenceId inf = |
283 |
524 |
polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG; |
284 |
|
// in very rare cases, we may find out that the unfolding lemma |
285 |
|
// for a membership is equivalent to true, in spite of the RE |
286 |
|
// not being rewritten to true. |
287 |
524 |
if (d_im.sendInference(iexp, noExplain, conc, inf)) |
288 |
|
{ |
289 |
518 |
addedLemma = true; |
290 |
518 |
if (e == 0) |
291 |
|
{ |
292 |
|
// Remember that we have unfolded a membership for x |
293 |
|
// notice that we only do this here, after we have definitely |
294 |
|
// added a lemma. |
295 |
388 |
repUnfold.insert(rep); |
296 |
|
} |
297 |
|
} |
298 |
524 |
processed.push_back(assertion); |
299 |
|
} |
300 |
|
else |
301 |
|
{ |
302 |
|
// otherwise we are incomplete |
303 |
3 |
d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY); |
304 |
|
} |
305 |
|
} |
306 |
535 |
if (d_state.isInConflict()) |
307 |
|
{ |
308 |
|
break; |
309 |
|
} |
310 |
|
} |
311 |
|
} |
312 |
|
} |
313 |
8127 |
if (addedLemma) |
314 |
|
{ |
315 |
392 |
if (!d_state.isInConflict()) |
316 |
|
{ |
317 |
912 |
for (unsigned i = 0; i < processed.size(); i++) |
318 |
|
{ |
319 |
1040 |
Trace("strings-regexp") |
320 |
520 |
<< "...add " << processed[i] << " to u-cache." << std::endl; |
321 |
520 |
d_regexp_ucached.insert(processed[i]); |
322 |
|
} |
323 |
|
} |
324 |
|
} |
325 |
|
} |
326 |
|
|
327 |
1002 |
bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems) |
328 |
|
{ |
329 |
2004 |
std::unordered_set<Node> remove; |
330 |
|
|
331 |
2416 |
for (const Node& m1 : mems) |
332 |
|
{ |
333 |
1424 |
bool m1Neg = m1.getKind() == NOT; |
334 |
2800 |
Node m1Lit = m1Neg ? m1[0] : m1; |
335 |
|
|
336 |
1424 |
if (remove.find(m1) != remove.end()) |
337 |
|
{ |
338 |
|
// Skip memberships marked for removal |
339 |
38 |
continue; |
340 |
|
} |
341 |
|
|
342 |
3866 |
for (const Node& m2 : mems) |
343 |
|
{ |
344 |
2544 |
if (m1 == m2) |
345 |
|
{ |
346 |
1358 |
continue; |
347 |
|
} |
348 |
|
|
349 |
1186 |
bool m2Neg = m2.getKind() == NOT; |
350 |
2308 |
Node m2Lit = m2Neg ? m2[0] : m2; |
351 |
|
|
352 |
|
// Both regular expression memberships have the same polarity |
353 |
1186 |
if (m1Neg == m2Neg) |
354 |
|
{ |
355 |
756 |
if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1])) |
356 |
|
{ |
357 |
94 |
if (m1Neg) |
358 |
|
{ |
359 |
|
// ~str.in.re(x, R1) includes ~str.in.re(x, R2) ---> |
360 |
|
// mark ~str.in.re(x, R2) as reduced |
361 |
40 |
d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG); |
362 |
40 |
remove.insert(m2); |
363 |
|
} |
364 |
|
else |
365 |
|
{ |
366 |
|
// str.in.re(x, R1) includes str.in.re(x, R2) ---> |
367 |
|
// mark str.in.re(x, R1) as reduced |
368 |
54 |
d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE); |
369 |
54 |
remove.insert(m1); |
370 |
|
|
371 |
|
// We don't need to process m1 anymore |
372 |
54 |
break; |
373 |
|
} |
374 |
|
} |
375 |
|
} |
376 |
|
else |
377 |
|
{ |
378 |
850 |
Node pos = m1Neg ? m2Lit : m1Lit; |
379 |
850 |
Node neg = m1Neg ? m1Lit : m2Lit; |
380 |
430 |
if (d_regexp_opr.regExpIncludes(neg[1], pos[1])) |
381 |
|
{ |
382 |
|
// We have a conflict because we have a case where str.in.re(x, R1) |
383 |
|
// and ~str.in.re(x, R2) but R2 includes R1, so there is no |
384 |
|
// possible value for x that satisfies both memberships. |
385 |
20 |
std::vector<Node> vec_nodes; |
386 |
10 |
vec_nodes.push_back(pos); |
387 |
10 |
vec_nodes.push_back(neg.negate()); |
388 |
|
|
389 |
10 |
if (pos[0] != neg[0]) |
390 |
|
{ |
391 |
|
vec_nodes.push_back(pos[0].eqNode(neg[0])); |
392 |
|
} |
393 |
|
|
394 |
20 |
Node conc; |
395 |
10 |
d_im.sendInference( |
396 |
|
vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true); |
397 |
10 |
return false; |
398 |
|
} |
399 |
|
} |
400 |
|
} |
401 |
|
} |
402 |
|
|
403 |
2976 |
mems.erase(std::remove_if( |
404 |
|
mems.begin(), |
405 |
|
mems.end(), |
406 |
2402 |
[&remove](Node& n) { return remove.find(n) != remove.end(); }), |
407 |
3968 |
mems.end()); |
408 |
|
|
409 |
992 |
return true; |
410 |
|
} |
411 |
|
|
412 |
992 |
bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems) |
413 |
|
{ |
414 |
|
// do not compute intersections if the re intersection mode is none |
415 |
992 |
if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE) |
416 |
|
{ |
417 |
|
return true; |
418 |
|
} |
419 |
992 |
if (mems.empty()) |
420 |
|
{ |
421 |
|
// nothing to do |
422 |
22 |
return true; |
423 |
|
} |
424 |
|
// the initial regular expression membership and its constant type |
425 |
1940 |
Node mi; |
426 |
970 |
RegExpConstType rcti = RE_C_UNKNOWN; |
427 |
970 |
NodeManager* nm = NodeManager::currentNM(); |
428 |
2178 |
for (const Node& m : mems) |
429 |
|
{ |
430 |
1765 |
if (m.getKind() != STRING_IN_REGEXP) |
431 |
|
{ |
432 |
|
// do not do negative |
433 |
497 |
Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP); |
434 |
1698 |
continue; |
435 |
|
} |
436 |
771 |
RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]); |
437 |
771 |
if (rct == RE_C_VARIABLE |
438 |
1311 |
|| (options::stringRegExpInterMode() |
439 |
|
== options::RegExpInterMode::CONSTANT |
440 |
623 |
&& rct != RE_C_CONRETE_CONSTANT)) |
441 |
|
{ |
442 |
|
// cannot do intersection on RE with variables, or with re.allchar based |
443 |
|
// on option. |
444 |
540 |
continue; |
445 |
|
} |
446 |
462 |
if (options::stringRegExpInterMode() |
447 |
231 |
== options::RegExpInterMode::ONE_CONSTANT) |
448 |
|
{ |
449 |
|
if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT) |
450 |
|
{ |
451 |
|
// if both have re.allchar, do not do intersection if the |
452 |
|
// options::RegExpInterMode::ONE_CONSTANT option is set. |
453 |
|
continue; |
454 |
|
} |
455 |
|
} |
456 |
395 |
if (mi.isNull()) |
457 |
|
{ |
458 |
|
// first regular expression seen |
459 |
164 |
mi = m; |
460 |
164 |
rcti = rct; |
461 |
164 |
continue; |
462 |
|
} |
463 |
74 |
Node resR = d_regexp_opr.intersect(mi[1], m[1]); |
464 |
|
// intersection should be computable |
465 |
67 |
Assert(!resR.isNull()); |
466 |
67 |
if (resR == d_emptyRegexp) |
467 |
|
{ |
468 |
|
// conflict, explain |
469 |
24 |
std::vector<Node> vec_nodes; |
470 |
12 |
vec_nodes.push_back(mi); |
471 |
12 |
vec_nodes.push_back(m); |
472 |
12 |
if (mi[0] != m[0]) |
473 |
|
{ |
474 |
8 |
vec_nodes.push_back(mi[0].eqNode(m[0])); |
475 |
|
} |
476 |
24 |
Node conc; |
477 |
12 |
d_im.sendInference( |
478 |
|
vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true); |
479 |
|
// conflict, return |
480 |
12 |
return false; |
481 |
|
} |
482 |
|
// rewrite to ensure the equality checks below are precise |
483 |
62 |
Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR); |
484 |
62 |
Node mresr = Rewriter::rewrite(mres); |
485 |
55 |
if (mresr == mi) |
486 |
|
{ |
487 |
|
// if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent |
488 |
|
// to x in R1, hence x in R2 can be marked redundant. |
489 |
|
d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME); |
490 |
|
} |
491 |
55 |
else if (mresr == m) |
492 |
|
{ |
493 |
|
// same as above, opposite direction |
494 |
7 |
d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME); |
495 |
|
} |
496 |
|
else |
497 |
|
{ |
498 |
|
// new conclusion |
499 |
|
// (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2)) |
500 |
96 |
std::vector<Node> vec_nodes; |
501 |
48 |
vec_nodes.push_back(mi); |
502 |
48 |
vec_nodes.push_back(m); |
503 |
48 |
if (mi[0] != m[0]) |
504 |
|
{ |
505 |
8 |
vec_nodes.push_back(mi[0].eqNode(m[0])); |
506 |
|
} |
507 |
48 |
d_im.sendInference( |
508 |
|
vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true); |
509 |
|
// both are reduced |
510 |
48 |
d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER); |
511 |
48 |
d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER); |
512 |
|
// do not send more than one lemma for this class |
513 |
48 |
return true; |
514 |
|
} |
515 |
|
} |
516 |
910 |
return true; |
517 |
|
} |
518 |
|
|
519 |
400 |
bool RegExpSolver::checkPDerivative( |
520 |
|
Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp) |
521 |
|
{ |
522 |
400 |
if (d_state.areEqual(x, d_emptyString)) |
523 |
|
{ |
524 |
4 |
Node exp; |
525 |
4 |
switch (d_regexp_opr.delta(r, exp)) |
526 |
|
{ |
527 |
|
case 0: |
528 |
|
{ |
529 |
|
std::vector<Node> noExplain; |
530 |
|
noExplain.push_back(atom); |
531 |
|
noExplain.push_back(x.eqNode(d_emptyString)); |
532 |
|
std::vector<Node> iexp = nf_exp; |
533 |
|
iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); |
534 |
|
d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA); |
535 |
|
addedLemma = true; |
536 |
|
d_regexp_ccached.insert(atom); |
537 |
|
return false; |
538 |
|
} |
539 |
|
case 1: |
540 |
|
{ |
541 |
|
d_regexp_ccached.insert(atom); |
542 |
|
break; |
543 |
|
} |
544 |
4 |
case 2: |
545 |
|
{ |
546 |
8 |
std::vector<Node> noExplain; |
547 |
4 |
noExplain.push_back(atom); |
548 |
4 |
noExplain.push_back(x.eqNode(d_emptyString)); |
549 |
8 |
std::vector<Node> iexp = nf_exp; |
550 |
4 |
iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); |
551 |
4 |
d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF); |
552 |
4 |
addedLemma = true; |
553 |
4 |
d_regexp_ccached.insert(atom); |
554 |
4 |
return false; |
555 |
|
} |
556 |
|
default: |
557 |
|
// Impossible |
558 |
|
break; |
559 |
|
} |
560 |
|
} |
561 |
|
else |
562 |
|
{ |
563 |
396 |
if (deriveRegExp(x, r, atom, nf_exp)) |
564 |
|
{ |
565 |
4 |
addedLemma = true; |
566 |
4 |
d_regexp_ccached.insert(atom); |
567 |
4 |
return false; |
568 |
|
} |
569 |
|
} |
570 |
392 |
return true; |
571 |
|
} |
572 |
|
|
573 |
396 |
cvc5::String RegExpSolver::getHeadConst(Node x) |
574 |
|
{ |
575 |
396 |
if (x.isConst()) |
576 |
|
{ |
577 |
2 |
return x.getConst<String>(); |
578 |
|
} |
579 |
394 |
else if (x.getKind() == STRING_CONCAT) |
580 |
|
{ |
581 |
50 |
if (x[0].isConst()) |
582 |
|
{ |
583 |
6 |
return x[0].getConst<String>(); |
584 |
|
} |
585 |
|
} |
586 |
388 |
return d_emptyString.getConst<String>(); |
587 |
|
} |
588 |
|
|
589 |
396 |
bool RegExpSolver::deriveRegExp(Node x, |
590 |
|
Node r, |
591 |
|
Node atom, |
592 |
|
std::vector<Node>& ant) |
593 |
|
{ |
594 |
396 |
Assert(x != d_emptyString); |
595 |
792 |
Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x |
596 |
396 |
<< ", r= " << r << std::endl; |
597 |
792 |
cvc5::String s = getHeadConst(x); |
598 |
|
// only allow RE_DERIVE for concrete constant regular expressions |
599 |
396 |
if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT) |
600 |
|
{ |
601 |
8 |
Node conc = Node::null(); |
602 |
8 |
Node dc = r; |
603 |
4 |
bool flag = true; |
604 |
4 |
for (unsigned i = 0; i < s.size(); ++i) |
605 |
|
{ |
606 |
4 |
cvc5::String c = s.substr(i, 1); |
607 |
4 |
Node dc2; |
608 |
4 |
int rt = d_regexp_opr.derivativeS(dc, c, dc2); |
609 |
4 |
dc = dc2; |
610 |
4 |
if (rt == 2) |
611 |
|
{ |
612 |
|
// CONFLICT |
613 |
4 |
flag = false; |
614 |
4 |
break; |
615 |
|
} |
616 |
|
} |
617 |
|
// send lemma |
618 |
4 |
if (flag) |
619 |
|
{ |
620 |
|
if (x.isConst()) |
621 |
|
{ |
622 |
|
Assert(false) |
623 |
|
<< "Impossible: RegExpSolver::deriveRegExp: const string in const " |
624 |
|
"regular expression."; |
625 |
|
return false; |
626 |
|
} |
627 |
|
else |
628 |
|
{ |
629 |
|
Assert(x.getKind() == STRING_CONCAT); |
630 |
|
std::vector<Node> vec_nodes; |
631 |
|
for (unsigned int i = 1; i < x.getNumChildren(); ++i) |
632 |
|
{ |
633 |
|
vec_nodes.push_back(x[i]); |
634 |
|
} |
635 |
|
Node left = utils::mkConcat(vec_nodes, x.getType()); |
636 |
|
left = Rewriter::rewrite(left); |
637 |
|
conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc); |
638 |
|
} |
639 |
|
} |
640 |
8 |
std::vector<Node> iexp = ant; |
641 |
8 |
std::vector<Node> noExplain; |
642 |
4 |
noExplain.push_back(atom); |
643 |
4 |
iexp.push_back(atom); |
644 |
4 |
d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE); |
645 |
4 |
return true; |
646 |
|
} |
647 |
392 |
return false; |
648 |
|
} |
649 |
|
|
650 |
577 |
Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp) |
651 |
|
{ |
652 |
577 |
Node ret = r; |
653 |
577 |
switch (r.getKind()) |
654 |
|
{ |
655 |
72 |
case REGEXP_EMPTY: |
656 |
|
case REGEXP_SIGMA: |
657 |
72 |
case REGEXP_RANGE: break; |
658 |
220 |
case STRING_TO_REGEXP: |
659 |
|
{ |
660 |
220 |
if (!r[0].isConst()) |
661 |
|
{ |
662 |
228 |
Node tmp = d_csolver.getNormalString(r[0], nf_exp); |
663 |
114 |
if (tmp != r[0]) |
664 |
|
{ |
665 |
100 |
ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp); |
666 |
|
} |
667 |
|
} |
668 |
220 |
break; |
669 |
|
} |
670 |
282 |
case REGEXP_CONCAT: |
671 |
|
case REGEXP_UNION: |
672 |
|
case REGEXP_INTER: |
673 |
|
case REGEXP_STAR: |
674 |
|
case REGEXP_COMPLEMENT: |
675 |
|
{ |
676 |
564 |
std::vector<Node> vec_nodes; |
677 |
748 |
for (const Node& cr : r) |
678 |
|
{ |
679 |
466 |
vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp)); |
680 |
|
} |
681 |
282 |
ret = Rewriter::rewrite( |
682 |
564 |
NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes)); |
683 |
282 |
break; |
684 |
|
} |
685 |
3 |
default: |
686 |
|
{ |
687 |
6 |
Trace("strings-error") << "Unsupported term: " << r |
688 |
3 |
<< " in normalization SymRegExp." << std::endl; |
689 |
3 |
Assert(!utils::isRegExpKind(r.getKind())); |
690 |
|
} |
691 |
|
} |
692 |
577 |
return ret; |
693 |
|
} |
694 |
|
|
695 |
|
} // namespace strings |
696 |
|
} // namespace theory |
697 |
29577 |
} // namespace cvc5 |