1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Tianyi Liang |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of the regular expression solver for the theory of strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/regexp_solver.h" |
17 |
|
|
18 |
|
#include <cmath> |
19 |
|
|
20 |
|
#include "options/strings_options.h" |
21 |
|
#include "smt/logic_exception.h" |
22 |
|
#include "theory/ext_theory.h" |
23 |
|
#include "theory/strings/theory_strings_utils.h" |
24 |
|
#include "theory/theory_model.h" |
25 |
|
#include "util/statistics_value.h" |
26 |
|
|
27 |
|
using namespace std; |
28 |
|
using namespace cvc5::context; |
29 |
|
using namespace cvc5::kind; |
30 |
|
|
31 |
|
namespace cvc5 { |
32 |
|
namespace theory { |
33 |
|
namespace strings { |
34 |
|
|
35 |
9853 |
RegExpSolver::RegExpSolver(SolverState& s, |
36 |
|
InferenceManager& im, |
37 |
|
SkolemCache* skc, |
38 |
|
CoreSolver& cs, |
39 |
|
ExtfSolver& es, |
40 |
9853 |
SequencesStatistics& stats) |
41 |
|
: d_state(s), |
42 |
|
d_im(im), |
43 |
|
d_csolver(cs), |
44 |
|
d_esolver(es), |
45 |
|
d_statistics(stats), |
46 |
9853 |
d_regexp_ucached(s.getUserContext()), |
47 |
|
d_regexp_ccached(s.getSatContext()), |
48 |
|
d_processed_memberships(s.getSatContext()), |
49 |
19706 |
d_regexp_opr(skc) |
50 |
|
{ |
51 |
9853 |
d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String("")); |
52 |
9853 |
d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY); |
53 |
9853 |
d_true = NodeManager::currentNM()->mkConst(true); |
54 |
9853 |
d_false = NodeManager::currentNM()->mkConst(false); |
55 |
9853 |
} |
56 |
|
|
57 |
|
Node RegExpSolver::mkAnd(Node c1, Node c2) |
58 |
|
{ |
59 |
|
return NodeManager::currentNM()->mkNode(AND, c1, c2); |
60 |
|
} |
61 |
|
|
62 |
8348 |
void RegExpSolver::checkMemberships() |
63 |
|
{ |
64 |
|
// add the memberships |
65 |
16696 |
std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP); |
66 |
|
// maps representatives to regular expression memberships in that class |
67 |
16696 |
std::map<Node, std::vector<Node> > assertedMems; |
68 |
8348 |
const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo(); |
69 |
8348 |
std::map<Node, ExtfInfoTmp>::const_iterator it; |
70 |
9635 |
for (unsigned i = 0; i < mems.size(); i++) |
71 |
|
{ |
72 |
2574 |
Node n = mems[i]; |
73 |
1287 |
Assert(n.getKind() == STRING_IN_REGEXP); |
74 |
1287 |
it = einfo.find(n); |
75 |
1287 |
Assert(it != einfo.end()); |
76 |
1287 |
if (!it->second.d_const.isNull()) |
77 |
|
{ |
78 |
1275 |
bool pol = it->second.d_const.getConst<bool>(); |
79 |
2550 |
Trace("strings-process-debug") |
80 |
1275 |
<< " add membership : " << n << ", pol = " << pol << std::endl; |
81 |
2550 |
Node r = d_state.getRepresentative(n[0]); |
82 |
1275 |
assertedMems[r].push_back(pol ? n : n.negate()); |
83 |
|
} |
84 |
|
else |
85 |
|
{ |
86 |
24 |
Trace("strings-process-debug") |
87 |
12 |
<< " irrelevant (non-asserted) membership : " << n << std::endl; |
88 |
|
} |
89 |
|
} |
90 |
8348 |
check(assertedMems); |
91 |
8348 |
} |
92 |
|
|
93 |
8348 |
void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems) |
94 |
|
{ |
95 |
8348 |
bool addedLemma = false; |
96 |
8348 |
bool changed = false; |
97 |
16674 |
std::vector<Node> processed; |
98 |
|
|
99 |
8348 |
Trace("regexp-process") << "Checking Memberships ... " << std::endl; |
100 |
9187 |
for (const std::pair<const Node, std::vector<Node> >& mr : mems) |
101 |
|
{ |
102 |
1700 |
std::vector<Node> mems2 = mr.second; |
103 |
1722 |
Trace("regexp-process") |
104 |
861 |
<< "Memberships(" << mr.first << ") = " << mr.second << std::endl; |
105 |
861 |
if (!checkEqcInclusion(mems2)) |
106 |
|
{ |
107 |
|
// conflict discovered, return |
108 |
10 |
return; |
109 |
|
} |
110 |
851 |
if (!checkEqcIntersect(mems2)) |
111 |
|
{ |
112 |
|
// conflict discovered, return |
113 |
12 |
return; |
114 |
|
} |
115 |
|
} |
116 |
|
|
117 |
16652 |
Trace("regexp-debug") |
118 |
8326 |
<< "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma |
119 |
8326 |
<< std::endl; |
120 |
8326 |
if (!addedLemma) |
121 |
|
{ |
122 |
|
// get all memberships |
123 |
16652 |
std::map<Node, Node> allMems; |
124 |
9165 |
for (const std::pair<const Node, std::vector<Node> >& mr : mems) |
125 |
|
{ |
126 |
2055 |
for (const Node& m : mr.second) |
127 |
|
{ |
128 |
1216 |
allMems[m] = mr.first; |
129 |
|
} |
130 |
|
} |
131 |
|
|
132 |
8326 |
NodeManager* nm = NodeManager::currentNM(); |
133 |
|
// representatives of strings that are the LHS of positive memberships that |
134 |
|
// we unfolded |
135 |
16652 |
std::unordered_set<Node> repUnfold; |
136 |
|
// check positive (e=0), then negative (e=1) memberships |
137 |
24978 |
for (unsigned e = 0; e < 2; e++) |
138 |
|
{ |
139 |
19008 |
for (const std::pair<const Node, Node>& mp : allMems) |
140 |
|
{ |
141 |
2908 |
Node assertion = mp.first; |
142 |
2908 |
Node rep = mp.second; |
143 |
|
// check regular expression membership |
144 |
4860 |
Trace("regexp-debug") |
145 |
2430 |
<< "Check : " << assertion << " " |
146 |
4860 |
<< (d_regexp_ucached.find(assertion) == d_regexp_ucached.end()) |
147 |
2430 |
<< " " |
148 |
4860 |
<< (d_regexp_ccached.find(assertion) == d_regexp_ccached.end()) |
149 |
2430 |
<< std::endl; |
150 |
4860 |
if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end() |
151 |
2430 |
|| d_regexp_ccached.find(assertion) != d_regexp_ccached.end()) |
152 |
|
{ |
153 |
1234 |
continue; |
154 |
|
} |
155 |
2392 |
Trace("strings-regexp") |
156 |
1196 |
<< "We have regular expression assertion : " << assertion |
157 |
1196 |
<< std::endl; |
158 |
1674 |
Node atom = assertion.getKind() == NOT ? assertion[0] : assertion; |
159 |
1196 |
Assert(atom == Rewriter::rewrite(atom)); |
160 |
1196 |
bool polarity = assertion.getKind() != NOT; |
161 |
1196 |
if (polarity != (e == 0)) |
162 |
|
{ |
163 |
590 |
continue; |
164 |
|
} |
165 |
606 |
bool flag = true; |
166 |
1084 |
Node x = atom[0]; |
167 |
1084 |
Node r = atom[1]; |
168 |
606 |
Assert(rep == d_state.getRepresentative(x)); |
169 |
|
// The following code takes normal forms into account for the purposes |
170 |
|
// of simplifying a regular expression membership x in R. For example, |
171 |
|
// if x = "A" in the current context, then we may be interested in |
172 |
|
// reasoning about ( x in R ) * { x -> "A" }. Say we update the |
173 |
|
// membership to nx in R', then: |
174 |
|
// - nfexp => ( x in R ) <=> nx in R' |
175 |
|
// - rnfexp => R = R' |
176 |
|
// We use these explanations below as assumptions on inferences when |
177 |
|
// appropriate. Notice that for inferring conflicts and tautologies, |
178 |
|
// we use the normal form of x always. This is because we always want to |
179 |
|
// discover conflicts/tautologies whenever possible. |
180 |
|
// For inferences based on regular expression unfolding, we do not use |
181 |
|
// the normal form of x. The reason is that it is better to unfold |
182 |
|
// regular expression memberships in a context-indepedent manner, |
183 |
|
// that is, not taking into account the current normal form of x, since |
184 |
|
// this ensures these lemmas are still relevant after backtracking. |
185 |
1084 |
std::vector<Node> nfexp; |
186 |
1084 |
std::vector<Node> rnfexp; |
187 |
|
// The normal form of x is stored in nx, while x is left unchanged. |
188 |
1084 |
Node nx = x; |
189 |
606 |
if (!x.isConst()) |
190 |
|
{ |
191 |
587 |
nx = d_csolver.getNormalString(x, nfexp); |
192 |
|
} |
193 |
|
// If r is not a constant regular expression, we update it based on |
194 |
|
// normal forms, which may concretize its variables. |
195 |
606 |
if (!d_regexp_opr.checkConstRegExp(r)) |
196 |
|
{ |
197 |
111 |
r = getNormalSymRegExp(r, rnfexp); |
198 |
111 |
nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end()); |
199 |
111 |
changed = true; |
200 |
|
} |
201 |
1212 |
Trace("strings-regexp-nf") << "Term " << atom << " is normalized to " |
202 |
606 |
<< nx << " IN " << r << std::endl; |
203 |
606 |
if (nx != x || changed) |
204 |
|
{ |
205 |
|
// We rewrite the membership nx IN r. |
206 |
606 |
Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r)); |
207 |
349 |
Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl; |
208 |
349 |
if (tmp.isConst()) |
209 |
|
{ |
210 |
110 |
if (tmp.getConst<bool>() == polarity) |
211 |
|
{ |
212 |
|
// it is satisfied in this SAT context |
213 |
18 |
d_regexp_ccached.insert(assertion); |
214 |
18 |
continue; |
215 |
|
} |
216 |
|
else |
217 |
|
{ |
218 |
|
// we have a conflict |
219 |
148 |
std::vector<Node> iexp = nfexp; |
220 |
148 |
std::vector<Node> noExplain; |
221 |
74 |
iexp.push_back(assertion); |
222 |
74 |
noExplain.push_back(assertion); |
223 |
148 |
Node conc = Node::null(); |
224 |
74 |
d_im.sendInference( |
225 |
|
iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT); |
226 |
74 |
addedLemma = true; |
227 |
74 |
break; |
228 |
|
} |
229 |
|
} |
230 |
|
} |
231 |
514 |
if (e == 1 && repUnfold.find(rep) != repUnfold.end()) |
232 |
|
{ |
233 |
|
// do not unfold negative memberships of strings that have new |
234 |
|
// positive unfoldings. For example: |
235 |
|
// x in ("A")* ^ NOT x in ("B")* |
236 |
|
// We unfold x = "A" ++ x' only. The intution here is that positive |
237 |
|
// unfoldings lead to stronger constraints (equalities are stronger |
238 |
|
// than disequalities), and are easier to check. |
239 |
36 |
continue; |
240 |
|
} |
241 |
478 |
if (polarity) |
242 |
|
{ |
243 |
363 |
flag = checkPDerivative(x, r, atom, addedLemma, rnfexp); |
244 |
|
} |
245 |
|
else |
246 |
|
{ |
247 |
115 |
if (!options::stringExp()) |
248 |
|
{ |
249 |
|
throw LogicException( |
250 |
|
"Strings Incomplete (due to Negative Membership) by default, " |
251 |
|
"try --strings-exp option."); |
252 |
|
} |
253 |
|
} |
254 |
478 |
if (flag) |
255 |
|
{ |
256 |
|
// check if the term is atomic |
257 |
940 |
Trace("strings-regexp") |
258 |
470 |
<< "Unroll/simplify membership of atomic term " << rep |
259 |
470 |
<< std::endl; |
260 |
|
// if so, do simple unrolling |
261 |
470 |
Trace("strings-regexp") << "Simplify on " << atom << std::endl; |
262 |
940 |
Node conc = d_regexp_opr.simplify(atom, polarity); |
263 |
470 |
Trace("strings-regexp") << "...finished, got " << conc << std::endl; |
264 |
|
// if simplifying successfully generated a lemma |
265 |
470 |
if (!conc.isNull()) |
266 |
|
{ |
267 |
934 |
std::vector<Node> iexp; |
268 |
934 |
std::vector<Node> noExplain; |
269 |
467 |
iexp.push_back(assertion); |
270 |
467 |
noExplain.push_back(assertion); |
271 |
467 |
Assert(atom.getKind() == STRING_IN_REGEXP); |
272 |
467 |
if (polarity) |
273 |
|
{ |
274 |
353 |
d_statistics.d_regexpUnfoldingsPos << atom[1].getKind(); |
275 |
|
} |
276 |
|
else |
277 |
|
{ |
278 |
114 |
d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind(); |
279 |
|
} |
280 |
467 |
InferenceId inf = |
281 |
467 |
polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG; |
282 |
|
// in very rare cases, we may find out that the unfolding lemma |
283 |
|
// for a membership is equivalent to true, in spite of the RE |
284 |
|
// not being rewritten to true. |
285 |
467 |
if (d_im.sendInference(iexp, noExplain, conc, inf)) |
286 |
|
{ |
287 |
461 |
addedLemma = true; |
288 |
461 |
if (e == 0) |
289 |
|
{ |
290 |
|
// Remember that we have unfolded a membership for x |
291 |
|
// notice that we only do this here, after we have definitely |
292 |
|
// added a lemma. |
293 |
351 |
repUnfold.insert(rep); |
294 |
|
} |
295 |
|
} |
296 |
467 |
processed.push_back(assertion); |
297 |
|
} |
298 |
|
else |
299 |
|
{ |
300 |
|
// otherwise we are incomplete |
301 |
3 |
d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY); |
302 |
|
} |
303 |
|
} |
304 |
478 |
if (d_state.isInConflict()) |
305 |
|
{ |
306 |
|
break; |
307 |
|
} |
308 |
|
} |
309 |
|
} |
310 |
|
} |
311 |
8326 |
if (addedLemma) |
312 |
|
{ |
313 |
367 |
if (!d_state.isInConflict()) |
314 |
|
{ |
315 |
830 |
for (unsigned i = 0; i < processed.size(); i++) |
316 |
|
{ |
317 |
926 |
Trace("strings-regexp") |
318 |
463 |
<< "...add " << processed[i] << " to u-cache." << std::endl; |
319 |
463 |
d_regexp_ucached.insert(processed[i]); |
320 |
|
} |
321 |
|
} |
322 |
|
} |
323 |
|
} |
324 |
|
|
325 |
861 |
bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems) |
326 |
|
{ |
327 |
1722 |
std::unordered_set<Node> remove; |
328 |
|
|
329 |
2108 |
for (const Node& m1 : mems) |
330 |
|
{ |
331 |
1257 |
bool m1Neg = m1.getKind() == NOT; |
332 |
2452 |
Node m1Lit = m1Neg ? m1[0] : m1; |
333 |
|
|
334 |
1257 |
if (remove.find(m1) != remove.end()) |
335 |
|
{ |
336 |
|
// Skip memberships marked for removal |
337 |
52 |
continue; |
338 |
|
} |
339 |
|
|
340 |
3414 |
for (const Node& m2 : mems) |
341 |
|
{ |
342 |
2273 |
if (m1 == m2) |
343 |
|
{ |
344 |
1177 |
continue; |
345 |
|
} |
346 |
|
|
347 |
1096 |
bool m2Neg = m2.getKind() == NOT; |
348 |
2128 |
Node m2Lit = m2Neg ? m2[0] : m2; |
349 |
|
|
350 |
|
// Both regular expression memberships have the same polarity |
351 |
1096 |
if (m1Neg == m2Neg) |
352 |
|
{ |
353 |
714 |
if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1])) |
354 |
|
{ |
355 |
108 |
if (m1Neg) |
356 |
|
{ |
357 |
|
// ~str.in.re(x, R1) includes ~str.in.re(x, R2) ---> |
358 |
|
// mark ~str.in.re(x, R2) as reduced |
359 |
54 |
d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG); |
360 |
54 |
remove.insert(m2); |
361 |
|
} |
362 |
|
else |
363 |
|
{ |
364 |
|
// str.in.re(x, R1) includes str.in.re(x, R2) ---> |
365 |
|
// mark str.in.re(x, R1) as reduced |
366 |
54 |
d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE); |
367 |
54 |
remove.insert(m1); |
368 |
|
|
369 |
|
// We don't need to process m1 anymore |
370 |
54 |
break; |
371 |
|
} |
372 |
|
} |
373 |
|
} |
374 |
|
else |
375 |
|
{ |
376 |
754 |
Node pos = m1Neg ? m2Lit : m1Lit; |
377 |
754 |
Node neg = m1Neg ? m1Lit : m2Lit; |
378 |
382 |
if (d_regexp_opr.regExpIncludes(neg[1], pos[1])) |
379 |
|
{ |
380 |
|
// We have a conflict because we have a case where str.in.re(x, R1) |
381 |
|
// and ~str.in.re(x, R2) but R2 includes R1, so there is no |
382 |
|
// possible value for x that satisfies both memberships. |
383 |
20 |
std::vector<Node> vec_nodes; |
384 |
10 |
vec_nodes.push_back(pos); |
385 |
10 |
vec_nodes.push_back(neg.negate()); |
386 |
|
|
387 |
10 |
if (pos[0] != neg[0]) |
388 |
|
{ |
389 |
|
vec_nodes.push_back(pos[0].eqNode(neg[0])); |
390 |
|
} |
391 |
|
|
392 |
20 |
Node conc; |
393 |
10 |
d_im.sendInference( |
394 |
|
vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true); |
395 |
10 |
return false; |
396 |
|
} |
397 |
|
} |
398 |
|
} |
399 |
|
} |
400 |
|
|
401 |
2553 |
mems.erase(std::remove_if( |
402 |
|
mems.begin(), |
403 |
|
mems.end(), |
404 |
2094 |
[&remove](Node& n) { return remove.find(n) != remove.end(); }), |
405 |
3404 |
mems.end()); |
406 |
|
|
407 |
851 |
return true; |
408 |
|
} |
409 |
|
|
410 |
851 |
bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems) |
411 |
|
{ |
412 |
|
// do not compute intersections if the re intersection mode is none |
413 |
851 |
if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE) |
414 |
|
{ |
415 |
|
return true; |
416 |
|
} |
417 |
851 |
if (mems.empty()) |
418 |
|
{ |
419 |
|
// nothing to do |
420 |
22 |
return true; |
421 |
|
} |
422 |
|
// the initial regular expression membership and its constant type |
423 |
1658 |
Node mi; |
424 |
829 |
RegExpConstType rcti = RE_C_UNKNOWN; |
425 |
829 |
NodeManager* nm = NodeManager::currentNM(); |
426 |
1852 |
for (const Node& m : mems) |
427 |
|
{ |
428 |
1590 |
if (m.getKind() != STRING_IN_REGEXP) |
429 |
|
{ |
430 |
|
// do not do negative |
431 |
507 |
Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP); |
432 |
1523 |
continue; |
433 |
|
} |
434 |
576 |
RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]); |
435 |
576 |
if (rct == RE_C_VARIABLE |
436 |
921 |
|| (options::stringRegExpInterMode() |
437 |
|
== options::RegExpInterMode::CONSTANT |
438 |
428 |
&& rct != RE_C_CONRETE_CONSTANT)) |
439 |
|
{ |
440 |
|
// cannot do intersection on RE with variables, or with re.allchar based |
441 |
|
// on option. |
442 |
345 |
continue; |
443 |
|
} |
444 |
462 |
if (options::stringRegExpInterMode() |
445 |
231 |
== options::RegExpInterMode::ONE_CONSTANT) |
446 |
|
{ |
447 |
|
if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT) |
448 |
|
{ |
449 |
|
// if both have re.allchar, do not do intersection if the |
450 |
|
// options::RegExpInterMode::ONE_CONSTANT option is set. |
451 |
|
continue; |
452 |
|
} |
453 |
|
} |
454 |
395 |
if (mi.isNull()) |
455 |
|
{ |
456 |
|
// first regular expression seen |
457 |
164 |
mi = m; |
458 |
164 |
rcti = rct; |
459 |
164 |
continue; |
460 |
|
} |
461 |
74 |
Node resR = d_regexp_opr.intersect(mi[1], m[1]); |
462 |
|
// intersection should be computable |
463 |
67 |
Assert(!resR.isNull()); |
464 |
67 |
if (resR == d_emptyRegexp) |
465 |
|
{ |
466 |
|
// conflict, explain |
467 |
24 |
std::vector<Node> vec_nodes; |
468 |
12 |
vec_nodes.push_back(mi); |
469 |
12 |
vec_nodes.push_back(m); |
470 |
12 |
if (mi[0] != m[0]) |
471 |
|
{ |
472 |
8 |
vec_nodes.push_back(mi[0].eqNode(m[0])); |
473 |
|
} |
474 |
24 |
Node conc; |
475 |
12 |
d_im.sendInference( |
476 |
|
vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true); |
477 |
|
// conflict, return |
478 |
12 |
return false; |
479 |
|
} |
480 |
|
// rewrite to ensure the equality checks below are precise |
481 |
62 |
Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR); |
482 |
62 |
Node mresr = Rewriter::rewrite(mres); |
483 |
55 |
if (mresr == mi) |
484 |
|
{ |
485 |
|
// if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent |
486 |
|
// to x in R1, hence x in R2 can be marked redundant. |
487 |
|
d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME); |
488 |
|
} |
489 |
55 |
else if (mresr == m) |
490 |
|
{ |
491 |
|
// same as above, opposite direction |
492 |
7 |
d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME); |
493 |
|
} |
494 |
|
else |
495 |
|
{ |
496 |
|
// new conclusion |
497 |
|
// (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2)) |
498 |
96 |
std::vector<Node> vec_nodes; |
499 |
48 |
vec_nodes.push_back(mi); |
500 |
48 |
vec_nodes.push_back(m); |
501 |
48 |
if (mi[0] != m[0]) |
502 |
|
{ |
503 |
8 |
vec_nodes.push_back(mi[0].eqNode(m[0])); |
504 |
|
} |
505 |
48 |
d_im.sendInference( |
506 |
|
vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true); |
507 |
|
// both are reduced |
508 |
48 |
d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER); |
509 |
48 |
d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER); |
510 |
|
// do not send more than one lemma for this class |
511 |
48 |
return true; |
512 |
|
} |
513 |
|
} |
514 |
769 |
return true; |
515 |
|
} |
516 |
|
|
517 |
363 |
bool RegExpSolver::checkPDerivative( |
518 |
|
Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp) |
519 |
|
{ |
520 |
363 |
if (d_state.areEqual(x, d_emptyString)) |
521 |
|
{ |
522 |
4 |
Node exp; |
523 |
4 |
switch (d_regexp_opr.delta(r, exp)) |
524 |
|
{ |
525 |
|
case 0: |
526 |
|
{ |
527 |
|
std::vector<Node> noExplain; |
528 |
|
noExplain.push_back(atom); |
529 |
|
noExplain.push_back(x.eqNode(d_emptyString)); |
530 |
|
std::vector<Node> iexp = nf_exp; |
531 |
|
iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); |
532 |
|
d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA); |
533 |
|
addedLemma = true; |
534 |
|
d_regexp_ccached.insert(atom); |
535 |
|
return false; |
536 |
|
} |
537 |
|
case 1: |
538 |
|
{ |
539 |
|
d_regexp_ccached.insert(atom); |
540 |
|
break; |
541 |
|
} |
542 |
4 |
case 2: |
543 |
|
{ |
544 |
8 |
std::vector<Node> noExplain; |
545 |
4 |
noExplain.push_back(atom); |
546 |
4 |
noExplain.push_back(x.eqNode(d_emptyString)); |
547 |
8 |
std::vector<Node> iexp = nf_exp; |
548 |
4 |
iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); |
549 |
4 |
d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF); |
550 |
4 |
addedLemma = true; |
551 |
4 |
d_regexp_ccached.insert(atom); |
552 |
4 |
return false; |
553 |
|
} |
554 |
|
default: |
555 |
|
// Impossible |
556 |
|
break; |
557 |
|
} |
558 |
|
} |
559 |
|
else |
560 |
|
{ |
561 |
359 |
if (deriveRegExp(x, r, atom, nf_exp)) |
562 |
|
{ |
563 |
4 |
addedLemma = true; |
564 |
4 |
d_regexp_ccached.insert(atom); |
565 |
4 |
return false; |
566 |
|
} |
567 |
|
} |
568 |
355 |
return true; |
569 |
|
} |
570 |
|
|
571 |
359 |
cvc5::String RegExpSolver::getHeadConst(Node x) |
572 |
|
{ |
573 |
359 |
if (x.isConst()) |
574 |
|
{ |
575 |
2 |
return x.getConst<String>(); |
576 |
|
} |
577 |
357 |
else if (x.getKind() == STRING_CONCAT) |
578 |
|
{ |
579 |
50 |
if (x[0].isConst()) |
580 |
|
{ |
581 |
6 |
return x[0].getConst<String>(); |
582 |
|
} |
583 |
|
} |
584 |
351 |
return d_emptyString.getConst<String>(); |
585 |
|
} |
586 |
|
|
587 |
359 |
bool RegExpSolver::deriveRegExp(Node x, |
588 |
|
Node r, |
589 |
|
Node atom, |
590 |
|
std::vector<Node>& ant) |
591 |
|
{ |
592 |
359 |
Assert(x != d_emptyString); |
593 |
718 |
Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x |
594 |
359 |
<< ", r= " << r << std::endl; |
595 |
718 |
cvc5::String s = getHeadConst(x); |
596 |
|
// only allow RE_DERIVE for concrete constant regular expressions |
597 |
359 |
if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT) |
598 |
|
{ |
599 |
8 |
Node conc = Node::null(); |
600 |
8 |
Node dc = r; |
601 |
4 |
bool flag = true; |
602 |
4 |
for (unsigned i = 0; i < s.size(); ++i) |
603 |
|
{ |
604 |
4 |
cvc5::String c = s.substr(i, 1); |
605 |
4 |
Node dc2; |
606 |
4 |
int rt = d_regexp_opr.derivativeS(dc, c, dc2); |
607 |
4 |
dc = dc2; |
608 |
4 |
if (rt == 2) |
609 |
|
{ |
610 |
|
// CONFLICT |
611 |
4 |
flag = false; |
612 |
4 |
break; |
613 |
|
} |
614 |
|
} |
615 |
|
// send lemma |
616 |
4 |
if (flag) |
617 |
|
{ |
618 |
|
if (x.isConst()) |
619 |
|
{ |
620 |
|
Assert(false) |
621 |
|
<< "Impossible: RegExpSolver::deriveRegExp: const string in const " |
622 |
|
"regular expression."; |
623 |
|
return false; |
624 |
|
} |
625 |
|
else |
626 |
|
{ |
627 |
|
Assert(x.getKind() == STRING_CONCAT); |
628 |
|
std::vector<Node> vec_nodes; |
629 |
|
for (unsigned int i = 1; i < x.getNumChildren(); ++i) |
630 |
|
{ |
631 |
|
vec_nodes.push_back(x[i]); |
632 |
|
} |
633 |
|
Node left = utils::mkConcat(vec_nodes, x.getType()); |
634 |
|
left = Rewriter::rewrite(left); |
635 |
|
conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc); |
636 |
|
} |
637 |
|
} |
638 |
8 |
std::vector<Node> iexp = ant; |
639 |
8 |
std::vector<Node> noExplain; |
640 |
4 |
noExplain.push_back(atom); |
641 |
4 |
iexp.push_back(atom); |
642 |
4 |
d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE); |
643 |
4 |
return true; |
644 |
|
} |
645 |
355 |
return false; |
646 |
|
} |
647 |
|
|
648 |
577 |
Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp) |
649 |
|
{ |
650 |
577 |
Node ret = r; |
651 |
577 |
switch (r.getKind()) |
652 |
|
{ |
653 |
72 |
case REGEXP_EMPTY: |
654 |
|
case REGEXP_SIGMA: |
655 |
72 |
case REGEXP_RANGE: break; |
656 |
220 |
case STRING_TO_REGEXP: |
657 |
|
{ |
658 |
220 |
if (!r[0].isConst()) |
659 |
|
{ |
660 |
228 |
Node tmp = d_csolver.getNormalString(r[0], nf_exp); |
661 |
114 |
if (tmp != r[0]) |
662 |
|
{ |
663 |
100 |
ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp); |
664 |
|
} |
665 |
|
} |
666 |
220 |
break; |
667 |
|
} |
668 |
282 |
case REGEXP_CONCAT: |
669 |
|
case REGEXP_UNION: |
670 |
|
case REGEXP_INTER: |
671 |
|
case REGEXP_STAR: |
672 |
|
case REGEXP_COMPLEMENT: |
673 |
|
{ |
674 |
564 |
std::vector<Node> vec_nodes; |
675 |
748 |
for (const Node& cr : r) |
676 |
|
{ |
677 |
466 |
vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp)); |
678 |
|
} |
679 |
282 |
ret = Rewriter::rewrite( |
680 |
564 |
NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes)); |
681 |
282 |
break; |
682 |
|
} |
683 |
3 |
default: |
684 |
|
{ |
685 |
6 |
Trace("strings-error") << "Unsupported term: " << r |
686 |
3 |
<< " in normalization SymRegExp." << std::endl; |
687 |
3 |
Assert(!utils::isRegExpKind(r.getKind())); |
688 |
|
} |
689 |
|
} |
690 |
577 |
return ret; |
691 |
|
} |
692 |
|
|
693 |
|
} // namespace strings |
694 |
|
} // namespace theory |
695 |
29340 |
} // namespace cvc5 |