1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Tianyi Liang |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of the regular expression solver for the theory of strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/regexp_solver.h" |
17 |
|
|
18 |
|
#include <cmath> |
19 |
|
|
20 |
|
#include "options/strings_options.h" |
21 |
|
#include "smt/logic_exception.h" |
22 |
|
#include "theory/ext_theory.h" |
23 |
|
#include "theory/strings/theory_strings_utils.h" |
24 |
|
#include "theory/theory_model.h" |
25 |
|
#include "util/statistics_value.h" |
26 |
|
|
27 |
|
using namespace std; |
28 |
|
using namespace cvc5::context; |
29 |
|
using namespace cvc5::kind; |
30 |
|
|
31 |
|
namespace cvc5 { |
32 |
|
namespace theory { |
33 |
|
namespace strings { |
34 |
|
|
35 |
9459 |
RegExpSolver::RegExpSolver(SolverState& s, |
36 |
|
InferenceManager& im, |
37 |
|
SkolemCache* skc, |
38 |
|
CoreSolver& cs, |
39 |
|
ExtfSolver& es, |
40 |
9459 |
SequencesStatistics& stats) |
41 |
|
: d_state(s), |
42 |
|
d_im(im), |
43 |
|
d_csolver(cs), |
44 |
|
d_esolver(es), |
45 |
|
d_statistics(stats), |
46 |
9459 |
d_regexp_ucached(s.getUserContext()), |
47 |
|
d_regexp_ccached(s.getSatContext()), |
48 |
|
d_processed_memberships(s.getSatContext()), |
49 |
18918 |
d_regexp_opr(skc) |
50 |
|
{ |
51 |
9459 |
d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String("")); |
52 |
9459 |
d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY); |
53 |
9459 |
d_true = NodeManager::currentNM()->mkConst(true); |
54 |
9459 |
d_false = NodeManager::currentNM()->mkConst(false); |
55 |
9459 |
} |
56 |
|
|
57 |
|
Node RegExpSolver::mkAnd(Node c1, Node c2) |
58 |
|
{ |
59 |
|
return NodeManager::currentNM()->mkNode(AND, c1, c2); |
60 |
|
} |
61 |
|
|
62 |
7503 |
void RegExpSolver::checkMemberships() |
63 |
|
{ |
64 |
|
// add the memberships |
65 |
15006 |
std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP); |
66 |
|
// maps representatives to regular expression memberships in that class |
67 |
15006 |
std::map<Node, std::vector<Node> > assertedMems; |
68 |
7503 |
const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo(); |
69 |
7503 |
std::map<Node, ExtfInfoTmp>::const_iterator it; |
70 |
8498 |
for (unsigned i = 0; i < mems.size(); i++) |
71 |
|
{ |
72 |
1990 |
Node n = mems[i]; |
73 |
995 |
Assert(n.getKind() == STRING_IN_REGEXP); |
74 |
995 |
it = einfo.find(n); |
75 |
995 |
Assert(it != einfo.end()); |
76 |
995 |
if (!it->second.d_const.isNull()) |
77 |
|
{ |
78 |
973 |
bool pol = it->second.d_const.getConst<bool>(); |
79 |
1946 |
Trace("strings-process-debug") |
80 |
973 |
<< " add membership : " << n << ", pol = " << pol << std::endl; |
81 |
1946 |
Node r = d_state.getRepresentative(n[0]); |
82 |
973 |
assertedMems[r].push_back(pol ? n : n.negate()); |
83 |
|
} |
84 |
|
else |
85 |
|
{ |
86 |
44 |
Trace("strings-process-debug") |
87 |
22 |
<< " irrelevant (non-asserted) membership : " << n << std::endl; |
88 |
|
} |
89 |
|
} |
90 |
7503 |
check(assertedMems); |
91 |
7503 |
} |
92 |
|
|
93 |
7503 |
void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems) |
94 |
|
{ |
95 |
7503 |
bool addedLemma = false; |
96 |
7503 |
bool changed = false; |
97 |
14984 |
std::vector<Node> processed; |
98 |
|
|
99 |
7503 |
Trace("regexp-process") << "Checking Memberships ... " << std::endl; |
100 |
8162 |
for (const std::pair<const Node, std::vector<Node> >& mr : mems) |
101 |
|
{ |
102 |
1340 |
std::vector<Node> mems2 = mr.second; |
103 |
1362 |
Trace("regexp-process") |
104 |
681 |
<< "Memberships(" << mr.first << ") = " << mr.second << std::endl; |
105 |
681 |
if (!checkEqcInclusion(mems2)) |
106 |
|
{ |
107 |
|
// conflict discovered, return |
108 |
10 |
return; |
109 |
|
} |
110 |
671 |
if (!checkEqcIntersect(mems2)) |
111 |
|
{ |
112 |
|
// conflict discovered, return |
113 |
12 |
return; |
114 |
|
} |
115 |
|
} |
116 |
|
|
117 |
14962 |
Trace("regexp-debug") |
118 |
7481 |
<< "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma |
119 |
7481 |
<< std::endl; |
120 |
7481 |
if (!addedLemma) |
121 |
|
{ |
122 |
|
// get all memberships |
123 |
14962 |
std::map<Node, Node> allMems; |
124 |
8140 |
for (const std::pair<const Node, std::vector<Node> >& mr : mems) |
125 |
|
{ |
126 |
1573 |
for (const Node& m : mr.second) |
127 |
|
{ |
128 |
914 |
bool polarity = m.getKind() != NOT; |
129 |
1268 |
if (polarity || !options::stringIgnNegMembership()) |
130 |
|
{ |
131 |
908 |
allMems[m] = mr.first; |
132 |
|
} |
133 |
|
} |
134 |
|
} |
135 |
|
|
136 |
7481 |
NodeManager* nm = NodeManager::currentNM(); |
137 |
|
// representatives of strings that are the LHS of positive memberships that |
138 |
|
// we unfolded |
139 |
14962 |
std::unordered_set<Node> repUnfold; |
140 |
|
// check positive (e=0), then negative (e=1) memberships |
141 |
22443 |
for (unsigned e = 0; e < 2; e++) |
142 |
|
{ |
143 |
16726 |
for (const std::pair<const Node, Node>& mp : allMems) |
144 |
|
{ |
145 |
2188 |
Node assertion = mp.first; |
146 |
2188 |
Node rep = mp.second; |
147 |
|
// check regular expression membership |
148 |
3628 |
Trace("regexp-debug") |
149 |
1814 |
<< "Check : " << assertion << " " |
150 |
3628 |
<< (d_regexp_ucached.find(assertion) == d_regexp_ucached.end()) |
151 |
1814 |
<< " " |
152 |
3628 |
<< (d_regexp_ccached.find(assertion) == d_regexp_ccached.end()) |
153 |
1814 |
<< std::endl; |
154 |
3628 |
if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end() |
155 |
1814 |
|| d_regexp_ccached.find(assertion) != d_regexp_ccached.end()) |
156 |
|
{ |
157 |
906 |
continue; |
158 |
|
} |
159 |
1816 |
Trace("strings-regexp") |
160 |
908 |
<< "We have regular expression assertion : " << assertion |
161 |
908 |
<< std::endl; |
162 |
1282 |
Node atom = assertion.getKind() == NOT ? assertion[0] : assertion; |
163 |
908 |
Assert(atom == Rewriter::rewrite(atom)); |
164 |
908 |
bool polarity = assertion.getKind() != NOT; |
165 |
908 |
if (polarity != (e == 0)) |
166 |
|
{ |
167 |
448 |
continue; |
168 |
|
} |
169 |
460 |
bool flag = true; |
170 |
834 |
Node x = atom[0]; |
171 |
834 |
Node r = atom[1]; |
172 |
460 |
Assert(rep == d_state.getRepresentative(x)); |
173 |
|
// The following code takes normal forms into account for the purposes |
174 |
|
// of simplifying a regular expression membership x in R. For example, |
175 |
|
// if x = "A" in the current context, then we may be interested in |
176 |
|
// reasoning about ( x in R ) * { x -> "A" }. Say we update the |
177 |
|
// membership to nx in R', then: |
178 |
|
// - nfexp => ( x in R ) <=> nx in R' |
179 |
|
// - rnfexp => R = R' |
180 |
|
// We use these explanations below as assumptions on inferences when |
181 |
|
// appropriate. Notice that for inferring conflicts and tautologies, |
182 |
|
// we use the normal form of x always. This is because we always want to |
183 |
|
// discover conflicts/tautologies whenever possible. |
184 |
|
// For inferences based on regular expression unfolding, we do not use |
185 |
|
// the normal form of x. The reason is that it is better to unfold |
186 |
|
// regular expression memberships in a context-indepedent manner, |
187 |
|
// that is, not taking into account the current normal form of x, since |
188 |
|
// this ensures these lemmas are still relevant after backtracking. |
189 |
834 |
std::vector<Node> nfexp; |
190 |
834 |
std::vector<Node> rnfexp; |
191 |
|
// The normal form of x is stored in nx, while x is left unchanged. |
192 |
834 |
Node nx = x; |
193 |
460 |
if (!x.isConst()) |
194 |
|
{ |
195 |
447 |
nx = d_csolver.getNormalString(x, nfexp); |
196 |
|
} |
197 |
|
// If r is not a constant regular expression, we update it based on |
198 |
|
// normal forms, which may concretize its variables. |
199 |
460 |
if (!d_regexp_opr.checkConstRegExp(r)) |
200 |
|
{ |
201 |
93 |
r = getNormalSymRegExp(r, rnfexp); |
202 |
93 |
nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end()); |
203 |
93 |
changed = true; |
204 |
|
} |
205 |
920 |
Trace("strings-regexp-nf") << "Term " << atom << " is normalized to " |
206 |
460 |
<< nx << " IN " << r << std::endl; |
207 |
460 |
if (nx != x || changed) |
208 |
|
{ |
209 |
|
// We rewrite the membership nx IN r. |
210 |
430 |
Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r)); |
211 |
243 |
Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl; |
212 |
243 |
if (tmp.isConst()) |
213 |
|
{ |
214 |
62 |
if (tmp.getConst<bool>() == polarity) |
215 |
|
{ |
216 |
|
// it is satisfied in this SAT context |
217 |
6 |
d_regexp_ccached.insert(assertion); |
218 |
6 |
continue; |
219 |
|
} |
220 |
|
else |
221 |
|
{ |
222 |
|
// we have a conflict |
223 |
100 |
std::vector<Node> iexp = nfexp; |
224 |
100 |
std::vector<Node> noExplain; |
225 |
50 |
iexp.push_back(assertion); |
226 |
50 |
noExplain.push_back(assertion); |
227 |
100 |
Node conc = Node::null(); |
228 |
50 |
d_im.sendInference( |
229 |
|
iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT); |
230 |
50 |
addedLemma = true; |
231 |
50 |
break; |
232 |
|
} |
233 |
|
} |
234 |
|
} |
235 |
404 |
if (e == 1 && repUnfold.find(rep) != repUnfold.end()) |
236 |
|
{ |
237 |
|
// do not unfold negative memberships of strings that have new |
238 |
|
// positive unfoldings. For example: |
239 |
|
// x in ("A")* ^ NOT x in ("B")* |
240 |
|
// We unfold x = "A" ++ x' only. The intution here is that positive |
241 |
|
// unfoldings lead to stronger constraints (equalities are stronger |
242 |
|
// than disequalities), and are easier to check. |
243 |
30 |
continue; |
244 |
|
} |
245 |
374 |
if (polarity) |
246 |
|
{ |
247 |
286 |
flag = checkPDerivative(x, r, atom, addedLemma, rnfexp); |
248 |
|
} |
249 |
|
else |
250 |
|
{ |
251 |
88 |
if (!options::stringExp()) |
252 |
|
{ |
253 |
|
throw LogicException( |
254 |
|
"Strings Incomplete (due to Negative Membership) by default, " |
255 |
|
"try --strings-exp option."); |
256 |
|
} |
257 |
|
} |
258 |
374 |
if (flag) |
259 |
|
{ |
260 |
|
// check if the term is atomic |
261 |
732 |
Trace("strings-regexp") |
262 |
366 |
<< "Unroll/simplify membership of atomic term " << rep |
263 |
366 |
<< std::endl; |
264 |
|
// if so, do simple unrolling |
265 |
366 |
Trace("strings-regexp") << "Simplify on " << atom << std::endl; |
266 |
732 |
Node conc = d_regexp_opr.simplify(atom, polarity); |
267 |
366 |
Trace("strings-regexp") << "...finished, got " << conc << std::endl; |
268 |
|
// if simplifying successfully generated a lemma |
269 |
366 |
if (!conc.isNull()) |
270 |
|
{ |
271 |
726 |
std::vector<Node> iexp; |
272 |
726 |
std::vector<Node> noExplain; |
273 |
363 |
iexp.push_back(assertion); |
274 |
363 |
noExplain.push_back(assertion); |
275 |
363 |
Assert(atom.getKind() == STRING_IN_REGEXP); |
276 |
363 |
if (polarity) |
277 |
|
{ |
278 |
276 |
d_statistics.d_regexpUnfoldingsPos << atom[1].getKind(); |
279 |
|
} |
280 |
|
else |
281 |
|
{ |
282 |
87 |
d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind(); |
283 |
|
} |
284 |
363 |
InferenceId inf = |
285 |
363 |
polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG; |
286 |
|
// in very rare cases, we may find out that the unfolding lemma |
287 |
|
// for a membership is equivalent to true, in spite of the RE |
288 |
|
// not being rewritten to true. |
289 |
363 |
if (d_im.sendInference(iexp, noExplain, conc, inf)) |
290 |
|
{ |
291 |
361 |
addedLemma = true; |
292 |
361 |
if (e == 0) |
293 |
|
{ |
294 |
|
// Remember that we have unfolded a membership for x |
295 |
|
// notice that we only do this here, after we have definitely |
296 |
|
// added a lemma. |
297 |
274 |
repUnfold.insert(rep); |
298 |
|
} |
299 |
|
} |
300 |
363 |
processed.push_back(assertion); |
301 |
|
} |
302 |
|
else |
303 |
|
{ |
304 |
|
// otherwise we are incomplete |
305 |
3 |
d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY); |
306 |
|
} |
307 |
|
} |
308 |
374 |
if (d_state.isInConflict()) |
309 |
|
{ |
310 |
|
break; |
311 |
|
} |
312 |
|
} |
313 |
|
} |
314 |
|
} |
315 |
7481 |
if (addedLemma) |
316 |
|
{ |
317 |
295 |
if (!d_state.isInConflict()) |
318 |
|
{ |
319 |
658 |
for (unsigned i = 0; i < processed.size(); i++) |
320 |
|
{ |
321 |
726 |
Trace("strings-regexp") |
322 |
363 |
<< "...add " << processed[i] << " to u-cache." << std::endl; |
323 |
363 |
d_regexp_ucached.insert(processed[i]); |
324 |
|
} |
325 |
|
} |
326 |
|
} |
327 |
|
} |
328 |
|
|
329 |
681 |
bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems) |
330 |
|
{ |
331 |
1362 |
std::unordered_set<Node> remove; |
332 |
|
|
333 |
1626 |
for (const Node& m1 : mems) |
334 |
|
{ |
335 |
955 |
bool m1Neg = m1.getKind() == NOT; |
336 |
1897 |
Node m1Lit = m1Neg ? m1[0] : m1; |
337 |
|
|
338 |
955 |
if (remove.find(m1) != remove.end()) |
339 |
|
{ |
340 |
|
// Skip memberships marked for removal |
341 |
3 |
continue; |
342 |
|
} |
343 |
|
|
344 |
2599 |
for (const Node& m2 : mems) |
345 |
|
{ |
346 |
1667 |
if (m1 == m2) |
347 |
|
{ |
348 |
946 |
continue; |
349 |
|
} |
350 |
|
|
351 |
721 |
bool m2Neg = m2.getKind() == NOT; |
352 |
1422 |
Node m2Lit = m2Neg ? m2[0] : m2; |
353 |
|
|
354 |
|
// Both regular expression memberships have the same polarity |
355 |
721 |
if (m1Neg == m2Neg) |
356 |
|
{ |
357 |
423 |
if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1])) |
358 |
|
{ |
359 |
15 |
if (m1Neg) |
360 |
|
{ |
361 |
|
// ~str.in.re(x, R1) includes ~str.in.re(x, R2) ---> |
362 |
|
// mark ~str.in.re(x, R2) as reduced |
363 |
5 |
d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG); |
364 |
5 |
remove.insert(m2); |
365 |
|
} |
366 |
|
else |
367 |
|
{ |
368 |
|
// str.in.re(x, R1) includes str.in.re(x, R2) ---> |
369 |
|
// mark str.in.re(x, R1) as reduced |
370 |
10 |
d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE); |
371 |
10 |
remove.insert(m1); |
372 |
|
|
373 |
|
// We don't need to process m1 anymore |
374 |
10 |
break; |
375 |
|
} |
376 |
|
} |
377 |
|
} |
378 |
|
else |
379 |
|
{ |
380 |
586 |
Node pos = m1Neg ? m2Lit : m1Lit; |
381 |
586 |
Node neg = m1Neg ? m1Lit : m2Lit; |
382 |
298 |
if (d_regexp_opr.regExpIncludes(neg[1], pos[1])) |
383 |
|
{ |
384 |
|
// We have a conflict because we have a case where str.in.re(x, R1) |
385 |
|
// and ~str.in.re(x, R2) but R2 includes R1, so there is no |
386 |
|
// possible value for x that satisfies both memberships. |
387 |
20 |
std::vector<Node> vec_nodes; |
388 |
10 |
vec_nodes.push_back(pos); |
389 |
10 |
vec_nodes.push_back(neg.negate()); |
390 |
|
|
391 |
10 |
if (pos[0] != neg[0]) |
392 |
|
{ |
393 |
|
vec_nodes.push_back(pos[0].eqNode(neg[0])); |
394 |
|
} |
395 |
|
|
396 |
20 |
Node conc; |
397 |
10 |
d_im.sendInference( |
398 |
|
vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true); |
399 |
10 |
return false; |
400 |
|
} |
401 |
|
} |
402 |
|
} |
403 |
|
} |
404 |
|
|
405 |
2013 |
mems.erase(std::remove_if( |
406 |
|
mems.begin(), |
407 |
|
mems.end(), |
408 |
1612 |
[&remove](Node& n) { return remove.find(n) != remove.end(); }), |
409 |
2684 |
mems.end()); |
410 |
|
|
411 |
671 |
return true; |
412 |
|
} |
413 |
|
|
414 |
671 |
bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems) |
415 |
|
{ |
416 |
|
// do not compute intersections if the re intersection mode is none |
417 |
1966 |
if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE) |
418 |
|
{ |
419 |
|
return true; |
420 |
|
} |
421 |
671 |
if (mems.empty()) |
422 |
|
{ |
423 |
|
// nothing to do |
424 |
|
return true; |
425 |
|
} |
426 |
|
// the initial regular expression membership and its constant type |
427 |
1342 |
Node mi; |
428 |
671 |
RegExpConstType rcti = RE_C_UNKNOWN; |
429 |
671 |
NodeManager* nm = NodeManager::currentNM(); |
430 |
1531 |
for (const Node& m : mems) |
431 |
|
{ |
432 |
1255 |
if (m.getKind() != STRING_IN_REGEXP) |
433 |
|
{ |
434 |
|
// do not do negative |
435 |
347 |
Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP); |
436 |
1200 |
continue; |
437 |
|
} |
438 |
561 |
RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]); |
439 |
561 |
if (rct == RE_C_VARIABLE |
440 |
929 |
|| (options::stringRegExpInterMode() |
441 |
|
== options::RegExpInterMode::CONSTANT |
442 |
431 |
&& rct != RE_C_CONRETE_CONSTANT)) |
443 |
|
{ |
444 |
|
// cannot do intersection on RE with variables, or with re.allchar based |
445 |
|
// on option. |
446 |
368 |
continue; |
447 |
|
} |
448 |
386 |
if (options::stringRegExpInterMode() |
449 |
193 |
== options::RegExpInterMode::ONE_CONSTANT) |
450 |
|
{ |
451 |
|
if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT) |
452 |
|
{ |
453 |
|
// if both have re.allchar, do not do intersection if the |
454 |
|
// options::RegExpInterMode::ONE_CONSTANT option is set. |
455 |
|
continue; |
456 |
|
} |
457 |
|
} |
458 |
331 |
if (mi.isNull()) |
459 |
|
{ |
460 |
|
// first regular expression seen |
461 |
138 |
mi = m; |
462 |
138 |
rcti = rct; |
463 |
138 |
continue; |
464 |
|
} |
465 |
62 |
Node resR = d_regexp_opr.intersect(mi[1], m[1]); |
466 |
|
// intersection should be computable |
467 |
55 |
Assert(!resR.isNull()); |
468 |
55 |
if (resR == d_emptyRegexp) |
469 |
|
{ |
470 |
|
// conflict, explain |
471 |
24 |
std::vector<Node> vec_nodes; |
472 |
12 |
vec_nodes.push_back(mi); |
473 |
12 |
vec_nodes.push_back(m); |
474 |
12 |
if (mi[0] != m[0]) |
475 |
|
{ |
476 |
8 |
vec_nodes.push_back(mi[0].eqNode(m[0])); |
477 |
|
} |
478 |
24 |
Node conc; |
479 |
12 |
d_im.sendInference( |
480 |
|
vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true); |
481 |
|
// conflict, return |
482 |
12 |
return false; |
483 |
|
} |
484 |
|
// rewrite to ensure the equality checks below are precise |
485 |
50 |
Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR); |
486 |
50 |
Node mresr = Rewriter::rewrite(mres); |
487 |
43 |
if (mresr == mi) |
488 |
|
{ |
489 |
|
// if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent |
490 |
|
// to x in R1, hence x in R2 can be marked redundant. |
491 |
|
d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME); |
492 |
|
} |
493 |
43 |
else if (mresr == m) |
494 |
|
{ |
495 |
|
// same as above, opposite direction |
496 |
7 |
d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME); |
497 |
|
} |
498 |
|
else |
499 |
|
{ |
500 |
|
// new conclusion |
501 |
|
// (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2)) |
502 |
72 |
std::vector<Node> vec_nodes; |
503 |
36 |
vec_nodes.push_back(mi); |
504 |
36 |
vec_nodes.push_back(m); |
505 |
36 |
if (mi[0] != m[0]) |
506 |
|
{ |
507 |
|
vec_nodes.push_back(mi[0].eqNode(m[0])); |
508 |
|
} |
509 |
36 |
d_im.sendInference( |
510 |
|
vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true); |
511 |
|
// both are reduced |
512 |
36 |
d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER); |
513 |
36 |
d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER); |
514 |
|
// do not send more than one lemma for this class |
515 |
36 |
return true; |
516 |
|
} |
517 |
|
} |
518 |
623 |
return true; |
519 |
|
} |
520 |
|
|
521 |
286 |
bool RegExpSolver::checkPDerivative( |
522 |
|
Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp) |
523 |
|
{ |
524 |
286 |
if (d_state.areEqual(x, d_emptyString)) |
525 |
|
{ |
526 |
4 |
Node exp; |
527 |
4 |
switch (d_regexp_opr.delta(r, exp)) |
528 |
|
{ |
529 |
|
case 0: |
530 |
|
{ |
531 |
|
std::vector<Node> noExplain; |
532 |
|
noExplain.push_back(atom); |
533 |
|
noExplain.push_back(x.eqNode(d_emptyString)); |
534 |
|
std::vector<Node> iexp = nf_exp; |
535 |
|
iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); |
536 |
|
d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA); |
537 |
|
addedLemma = true; |
538 |
|
d_regexp_ccached.insert(atom); |
539 |
|
return false; |
540 |
|
} |
541 |
|
case 1: |
542 |
|
{ |
543 |
|
d_regexp_ccached.insert(atom); |
544 |
|
break; |
545 |
|
} |
546 |
4 |
case 2: |
547 |
|
{ |
548 |
8 |
std::vector<Node> noExplain; |
549 |
4 |
noExplain.push_back(atom); |
550 |
4 |
noExplain.push_back(x.eqNode(d_emptyString)); |
551 |
8 |
std::vector<Node> iexp = nf_exp; |
552 |
4 |
iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); |
553 |
4 |
d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF); |
554 |
4 |
addedLemma = true; |
555 |
4 |
d_regexp_ccached.insert(atom); |
556 |
4 |
return false; |
557 |
|
} |
558 |
|
default: |
559 |
|
// Impossible |
560 |
|
break; |
561 |
|
} |
562 |
|
} |
563 |
|
else |
564 |
|
{ |
565 |
282 |
if (deriveRegExp(x, r, atom, nf_exp)) |
566 |
|
{ |
567 |
4 |
addedLemma = true; |
568 |
4 |
d_regexp_ccached.insert(atom); |
569 |
4 |
return false; |
570 |
|
} |
571 |
|
} |
572 |
278 |
return true; |
573 |
|
} |
574 |
|
|
575 |
282 |
cvc5::String RegExpSolver::getHeadConst(Node x) |
576 |
|
{ |
577 |
282 |
if (x.isConst()) |
578 |
|
{ |
579 |
2 |
return x.getConst<String>(); |
580 |
|
} |
581 |
280 |
else if (x.getKind() == STRING_CONCAT) |
582 |
|
{ |
583 |
38 |
if (x[0].isConst()) |
584 |
|
{ |
585 |
6 |
return x[0].getConst<String>(); |
586 |
|
} |
587 |
|
} |
588 |
274 |
return d_emptyString.getConst<String>(); |
589 |
|
} |
590 |
|
|
591 |
282 |
bool RegExpSolver::deriveRegExp(Node x, |
592 |
|
Node r, |
593 |
|
Node atom, |
594 |
|
std::vector<Node>& ant) |
595 |
|
{ |
596 |
282 |
Assert(x != d_emptyString); |
597 |
564 |
Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x |
598 |
282 |
<< ", r= " << r << std::endl; |
599 |
564 |
cvc5::String s = getHeadConst(x); |
600 |
|
// only allow RE_DERIVE for concrete constant regular expressions |
601 |
282 |
if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT) |
602 |
|
{ |
603 |
8 |
Node conc = Node::null(); |
604 |
8 |
Node dc = r; |
605 |
4 |
bool flag = true; |
606 |
4 |
for (unsigned i = 0; i < s.size(); ++i) |
607 |
|
{ |
608 |
4 |
cvc5::String c = s.substr(i, 1); |
609 |
4 |
Node dc2; |
610 |
4 |
int rt = d_regexp_opr.derivativeS(dc, c, dc2); |
611 |
4 |
dc = dc2; |
612 |
4 |
if (rt == 2) |
613 |
|
{ |
614 |
|
// CONFLICT |
615 |
4 |
flag = false; |
616 |
4 |
break; |
617 |
|
} |
618 |
|
} |
619 |
|
// send lemma |
620 |
4 |
if (flag) |
621 |
|
{ |
622 |
|
if (x.isConst()) |
623 |
|
{ |
624 |
|
Assert(false) |
625 |
|
<< "Impossible: RegExpSolver::deriveRegExp: const string in const " |
626 |
|
"regular expression."; |
627 |
|
return false; |
628 |
|
} |
629 |
|
else |
630 |
|
{ |
631 |
|
Assert(x.getKind() == STRING_CONCAT); |
632 |
|
std::vector<Node> vec_nodes; |
633 |
|
for (unsigned int i = 1; i < x.getNumChildren(); ++i) |
634 |
|
{ |
635 |
|
vec_nodes.push_back(x[i]); |
636 |
|
} |
637 |
|
Node left = utils::mkConcat(vec_nodes, x.getType()); |
638 |
|
left = Rewriter::rewrite(left); |
639 |
|
conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc); |
640 |
|
} |
641 |
|
} |
642 |
8 |
std::vector<Node> iexp = ant; |
643 |
8 |
std::vector<Node> noExplain; |
644 |
4 |
noExplain.push_back(atom); |
645 |
4 |
iexp.push_back(atom); |
646 |
4 |
d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE); |
647 |
4 |
return true; |
648 |
|
} |
649 |
278 |
return false; |
650 |
|
} |
651 |
|
|
652 |
499 |
Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp) |
653 |
|
{ |
654 |
499 |
Node ret = r; |
655 |
499 |
switch (r.getKind()) |
656 |
|
{ |
657 |
72 |
case REGEXP_EMPTY: |
658 |
|
case REGEXP_SIGMA: |
659 |
72 |
case REGEXP_RANGE: break; |
660 |
182 |
case STRING_TO_REGEXP: |
661 |
|
{ |
662 |
182 |
if (!r[0].isConst()) |
663 |
|
{ |
664 |
188 |
Node tmp = d_csolver.getNormalString(r[0], nf_exp); |
665 |
94 |
if (tmp != r[0]) |
666 |
|
{ |
667 |
84 |
ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp); |
668 |
|
} |
669 |
|
} |
670 |
182 |
break; |
671 |
|
} |
672 |
242 |
case REGEXP_CONCAT: |
673 |
|
case REGEXP_UNION: |
674 |
|
case REGEXP_INTER: |
675 |
|
case REGEXP_STAR: |
676 |
|
case REGEXP_COMPLEMENT: |
677 |
|
{ |
678 |
484 |
std::vector<Node> vec_nodes; |
679 |
648 |
for (const Node& cr : r) |
680 |
|
{ |
681 |
406 |
vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp)); |
682 |
|
} |
683 |
242 |
ret = Rewriter::rewrite( |
684 |
484 |
NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes)); |
685 |
242 |
break; |
686 |
|
} |
687 |
3 |
default: |
688 |
|
{ |
689 |
6 |
Trace("strings-error") << "Unsupported term: " << r |
690 |
3 |
<< " in normalization SymRegExp." << std::endl; |
691 |
3 |
Assert(!utils::isRegExpKind(r.getKind())); |
692 |
|
} |
693 |
|
} |
694 |
499 |
return ret; |
695 |
|
} |
696 |
|
|
697 |
|
} // namespace strings |
698 |
|
} // namespace theory |
699 |
29840 |
} // namespace cvc5 |