1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Tianyi Liang |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of the regular expression solver for the theory of strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/regexp_solver.h" |
17 |
|
|
18 |
|
#include <cmath> |
19 |
|
|
20 |
|
#include "options/strings_options.h" |
21 |
|
#include "smt/logic_exception.h" |
22 |
|
#include "theory/ext_theory.h" |
23 |
|
#include "theory/strings/theory_strings_utils.h" |
24 |
|
#include "theory/theory_model.h" |
25 |
|
#include "util/statistics_value.h" |
26 |
|
|
27 |
|
using namespace std; |
28 |
|
using namespace cvc5::context; |
29 |
|
using namespace cvc5::kind; |
30 |
|
|
31 |
|
namespace cvc5 { |
32 |
|
namespace theory { |
33 |
|
namespace strings { |
34 |
|
|
35 |
8954 |
RegExpSolver::RegExpSolver(SolverState& s, |
36 |
|
InferenceManager& im, |
37 |
|
SkolemCache* skc, |
38 |
|
CoreSolver& cs, |
39 |
|
ExtfSolver& es, |
40 |
8954 |
SequencesStatistics& stats) |
41 |
|
: d_state(s), |
42 |
|
d_im(im), |
43 |
|
d_csolver(cs), |
44 |
|
d_esolver(es), |
45 |
|
d_statistics(stats), |
46 |
8954 |
d_regexp_ucached(s.getUserContext()), |
47 |
|
d_regexp_ccached(s.getSatContext()), |
48 |
|
d_processed_memberships(s.getSatContext()), |
49 |
17908 |
d_regexp_opr(skc) |
50 |
|
{ |
51 |
8954 |
d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String("")); |
52 |
17908 |
std::vector<Node> nvec; |
53 |
8954 |
d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY, nvec); |
54 |
8954 |
d_true = NodeManager::currentNM()->mkConst(true); |
55 |
8954 |
d_false = NodeManager::currentNM()->mkConst(false); |
56 |
8954 |
} |
57 |
|
|
58 |
|
Node RegExpSolver::mkAnd(Node c1, Node c2) |
59 |
|
{ |
60 |
|
return NodeManager::currentNM()->mkNode(AND, c1, c2); |
61 |
|
} |
62 |
|
|
63 |
7356 |
void RegExpSolver::checkMemberships() |
64 |
|
{ |
65 |
|
// add the memberships |
66 |
14712 |
std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP); |
67 |
|
// maps representatives to regular expression memberships in that class |
68 |
14712 |
std::map<Node, std::vector<Node> > assertedMems; |
69 |
7356 |
const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo(); |
70 |
7356 |
std::map<Node, ExtfInfoTmp>::const_iterator it; |
71 |
8351 |
for (unsigned i = 0; i < mems.size(); i++) |
72 |
|
{ |
73 |
1990 |
Node n = mems[i]; |
74 |
995 |
Assert(n.getKind() == STRING_IN_REGEXP); |
75 |
995 |
it = einfo.find(n); |
76 |
995 |
Assert(it != einfo.end()); |
77 |
995 |
if (!it->second.d_const.isNull()) |
78 |
|
{ |
79 |
973 |
bool pol = it->second.d_const.getConst<bool>(); |
80 |
1946 |
Trace("strings-process-debug") |
81 |
973 |
<< " add membership : " << n << ", pol = " << pol << std::endl; |
82 |
1946 |
Node r = d_state.getRepresentative(n[0]); |
83 |
973 |
assertedMems[r].push_back(pol ? n : n.negate()); |
84 |
|
} |
85 |
|
else |
86 |
|
{ |
87 |
44 |
Trace("strings-process-debug") |
88 |
22 |
<< " irrelevant (non-asserted) membership : " << n << std::endl; |
89 |
|
} |
90 |
|
} |
91 |
7356 |
check(assertedMems); |
92 |
7356 |
} |
93 |
|
|
94 |
7356 |
void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems) |
95 |
|
{ |
96 |
7356 |
bool addedLemma = false; |
97 |
7356 |
bool changed = false; |
98 |
14690 |
std::vector<Node> processed; |
99 |
|
|
100 |
7356 |
Trace("regexp-process") << "Checking Memberships ... " << std::endl; |
101 |
8015 |
for (const std::pair<const Node, std::vector<Node> >& mr : mems) |
102 |
|
{ |
103 |
1340 |
std::vector<Node> mems2 = mr.second; |
104 |
1362 |
Trace("regexp-process") |
105 |
681 |
<< "Memberships(" << mr.first << ") = " << mr.second << std::endl; |
106 |
681 |
if (!checkEqcInclusion(mems2)) |
107 |
|
{ |
108 |
|
// conflict discovered, return |
109 |
10 |
return; |
110 |
|
} |
111 |
671 |
if (!checkEqcIntersect(mems2)) |
112 |
|
{ |
113 |
|
// conflict discovered, return |
114 |
12 |
return; |
115 |
|
} |
116 |
|
} |
117 |
|
|
118 |
14668 |
Trace("regexp-debug") |
119 |
7334 |
<< "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma |
120 |
7334 |
<< std::endl; |
121 |
7334 |
if (!addedLemma) |
122 |
|
{ |
123 |
|
// get all memberships |
124 |
14668 |
std::map<Node, Node> allMems; |
125 |
7993 |
for (const std::pair<const Node, std::vector<Node> >& mr : mems) |
126 |
|
{ |
127 |
1573 |
for (const Node& m : mr.second) |
128 |
|
{ |
129 |
914 |
bool polarity = m.getKind() != NOT; |
130 |
1268 |
if (polarity || !options::stringIgnNegMembership()) |
131 |
|
{ |
132 |
908 |
allMems[m] = mr.first; |
133 |
|
} |
134 |
|
} |
135 |
|
} |
136 |
|
|
137 |
7334 |
NodeManager* nm = NodeManager::currentNM(); |
138 |
|
// representatives of strings that are the LHS of positive memberships that |
139 |
|
// we unfolded |
140 |
14668 |
std::unordered_set<Node> repUnfold; |
141 |
|
// check positive (e=0), then negative (e=1) memberships |
142 |
22002 |
for (unsigned e = 0; e < 2; e++) |
143 |
|
{ |
144 |
16432 |
for (const std::pair<const Node, Node>& mp : allMems) |
145 |
|
{ |
146 |
2188 |
Node assertion = mp.first; |
147 |
2188 |
Node rep = mp.second; |
148 |
|
// check regular expression membership |
149 |
3628 |
Trace("regexp-debug") |
150 |
1814 |
<< "Check : " << assertion << " " |
151 |
3628 |
<< (d_regexp_ucached.find(assertion) == d_regexp_ucached.end()) |
152 |
1814 |
<< " " |
153 |
3628 |
<< (d_regexp_ccached.find(assertion) == d_regexp_ccached.end()) |
154 |
1814 |
<< std::endl; |
155 |
3628 |
if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end() |
156 |
1814 |
|| d_regexp_ccached.find(assertion) != d_regexp_ccached.end()) |
157 |
|
{ |
158 |
906 |
continue; |
159 |
|
} |
160 |
1816 |
Trace("strings-regexp") |
161 |
908 |
<< "We have regular expression assertion : " << assertion |
162 |
908 |
<< std::endl; |
163 |
1282 |
Node atom = assertion.getKind() == NOT ? assertion[0] : assertion; |
164 |
908 |
Assert(atom == Rewriter::rewrite(atom)); |
165 |
908 |
bool polarity = assertion.getKind() != NOT; |
166 |
908 |
if (polarity != (e == 0)) |
167 |
|
{ |
168 |
448 |
continue; |
169 |
|
} |
170 |
460 |
bool flag = true; |
171 |
834 |
Node x = atom[0]; |
172 |
834 |
Node r = atom[1]; |
173 |
460 |
Assert(rep == d_state.getRepresentative(x)); |
174 |
|
// The following code takes normal forms into account for the purposes |
175 |
|
// of simplifying a regular expression membership x in R. For example, |
176 |
|
// if x = "A" in the current context, then we may be interested in |
177 |
|
// reasoning about ( x in R ) * { x -> "A" }. Say we update the |
178 |
|
// membership to nx in R', then: |
179 |
|
// - nfexp => ( x in R ) <=> nx in R' |
180 |
|
// - rnfexp => R = R' |
181 |
|
// We use these explanations below as assumptions on inferences when |
182 |
|
// appropriate. Notice that for inferring conflicts and tautologies, |
183 |
|
// we use the normal form of x always. This is because we always want to |
184 |
|
// discover conflicts/tautologies whenever possible. |
185 |
|
// For inferences based on regular expression unfolding, we do not use |
186 |
|
// the normal form of x. The reason is that it is better to unfold |
187 |
|
// regular expression memberships in a context-indepedent manner, |
188 |
|
// that is, not taking into account the current normal form of x, since |
189 |
|
// this ensures these lemmas are still relevant after backtracking. |
190 |
834 |
std::vector<Node> nfexp; |
191 |
834 |
std::vector<Node> rnfexp; |
192 |
|
// The normal form of x is stored in nx, while x is left unchanged. |
193 |
834 |
Node nx = x; |
194 |
460 |
if (!x.isConst()) |
195 |
|
{ |
196 |
447 |
nx = d_csolver.getNormalString(x, nfexp); |
197 |
|
} |
198 |
|
// If r is not a constant regular expression, we update it based on |
199 |
|
// normal forms, which may concretize its variables. |
200 |
460 |
if (!d_regexp_opr.checkConstRegExp(r)) |
201 |
|
{ |
202 |
93 |
r = getNormalSymRegExp(r, rnfexp); |
203 |
93 |
nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end()); |
204 |
93 |
changed = true; |
205 |
|
} |
206 |
920 |
Trace("strings-regexp-nf") << "Term " << atom << " is normalized to " |
207 |
460 |
<< nx << " IN " << r << std::endl; |
208 |
460 |
if (nx != x || changed) |
209 |
|
{ |
210 |
|
// We rewrite the membership nx IN r. |
211 |
430 |
Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r)); |
212 |
243 |
Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl; |
213 |
243 |
if (tmp.isConst()) |
214 |
|
{ |
215 |
62 |
if (tmp.getConst<bool>() == polarity) |
216 |
|
{ |
217 |
|
// it is satisfied in this SAT context |
218 |
6 |
d_regexp_ccached.insert(assertion); |
219 |
6 |
continue; |
220 |
|
} |
221 |
|
else |
222 |
|
{ |
223 |
|
// we have a conflict |
224 |
100 |
std::vector<Node> iexp = nfexp; |
225 |
100 |
std::vector<Node> noExplain; |
226 |
50 |
iexp.push_back(assertion); |
227 |
50 |
noExplain.push_back(assertion); |
228 |
100 |
Node conc = Node::null(); |
229 |
50 |
d_im.sendInference( |
230 |
|
iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT); |
231 |
50 |
addedLemma = true; |
232 |
50 |
break; |
233 |
|
} |
234 |
|
} |
235 |
|
} |
236 |
404 |
if (e == 1 && repUnfold.find(rep) != repUnfold.end()) |
237 |
|
{ |
238 |
|
// do not unfold negative memberships of strings that have new |
239 |
|
// positive unfoldings. For example: |
240 |
|
// x in ("A")* ^ NOT x in ("B")* |
241 |
|
// We unfold x = "A" ++ x' only. The intution here is that positive |
242 |
|
// unfoldings lead to stronger constraints (equalities are stronger |
243 |
|
// than disequalities), and are easier to check. |
244 |
30 |
continue; |
245 |
|
} |
246 |
374 |
if (polarity) |
247 |
|
{ |
248 |
286 |
flag = checkPDerivative(x, r, atom, addedLemma, rnfexp); |
249 |
|
} |
250 |
|
else |
251 |
|
{ |
252 |
88 |
if (!options::stringExp()) |
253 |
|
{ |
254 |
|
throw LogicException( |
255 |
|
"Strings Incomplete (due to Negative Membership) by default, " |
256 |
|
"try --strings-exp option."); |
257 |
|
} |
258 |
|
} |
259 |
374 |
if (flag) |
260 |
|
{ |
261 |
|
// check if the term is atomic |
262 |
732 |
Trace("strings-regexp") |
263 |
366 |
<< "Unroll/simplify membership of atomic term " << rep |
264 |
366 |
<< std::endl; |
265 |
|
// if so, do simple unrolling |
266 |
366 |
Trace("strings-regexp") << "Simplify on " << atom << std::endl; |
267 |
732 |
Node conc = d_regexp_opr.simplify(atom, polarity); |
268 |
366 |
Trace("strings-regexp") << "...finished, got " << conc << std::endl; |
269 |
|
// if simplifying successfully generated a lemma |
270 |
366 |
if (!conc.isNull()) |
271 |
|
{ |
272 |
726 |
std::vector<Node> iexp; |
273 |
726 |
std::vector<Node> noExplain; |
274 |
363 |
iexp.push_back(assertion); |
275 |
363 |
noExplain.push_back(assertion); |
276 |
363 |
Assert(atom.getKind() == STRING_IN_REGEXP); |
277 |
363 |
if (polarity) |
278 |
|
{ |
279 |
276 |
d_statistics.d_regexpUnfoldingsPos << atom[1].getKind(); |
280 |
|
} |
281 |
|
else |
282 |
|
{ |
283 |
87 |
d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind(); |
284 |
|
} |
285 |
363 |
InferenceId inf = |
286 |
363 |
polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG; |
287 |
|
// in very rare cases, we may find out that the unfolding lemma |
288 |
|
// for a membership is equivalent to true, in spite of the RE |
289 |
|
// not being rewritten to true. |
290 |
363 |
if (d_im.sendInference(iexp, noExplain, conc, inf)) |
291 |
|
{ |
292 |
361 |
addedLemma = true; |
293 |
361 |
if (e == 0) |
294 |
|
{ |
295 |
|
// Remember that we have unfolded a membership for x |
296 |
|
// notice that we only do this here, after we have definitely |
297 |
|
// added a lemma. |
298 |
274 |
repUnfold.insert(rep); |
299 |
|
} |
300 |
|
} |
301 |
363 |
processed.push_back(assertion); |
302 |
|
} |
303 |
|
else |
304 |
|
{ |
305 |
|
// otherwise we are incomplete |
306 |
3 |
d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY); |
307 |
|
} |
308 |
|
} |
309 |
374 |
if (d_state.isInConflict()) |
310 |
|
{ |
311 |
|
break; |
312 |
|
} |
313 |
|
} |
314 |
|
} |
315 |
|
} |
316 |
7334 |
if (addedLemma) |
317 |
|
{ |
318 |
295 |
if (!d_state.isInConflict()) |
319 |
|
{ |
320 |
658 |
for (unsigned i = 0; i < processed.size(); i++) |
321 |
|
{ |
322 |
726 |
Trace("strings-regexp") |
323 |
363 |
<< "...add " << processed[i] << " to u-cache." << std::endl; |
324 |
363 |
d_regexp_ucached.insert(processed[i]); |
325 |
|
} |
326 |
|
} |
327 |
|
} |
328 |
|
} |
329 |
|
|
330 |
681 |
bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems) |
331 |
|
{ |
332 |
1362 |
std::unordered_set<Node> remove; |
333 |
|
|
334 |
1626 |
for (const Node& m1 : mems) |
335 |
|
{ |
336 |
955 |
bool m1Neg = m1.getKind() == NOT; |
337 |
1897 |
Node m1Lit = m1Neg ? m1[0] : m1; |
338 |
|
|
339 |
955 |
if (remove.find(m1) != remove.end()) |
340 |
|
{ |
341 |
|
// Skip memberships marked for removal |
342 |
3 |
continue; |
343 |
|
} |
344 |
|
|
345 |
2599 |
for (const Node& m2 : mems) |
346 |
|
{ |
347 |
1667 |
if (m1 == m2) |
348 |
|
{ |
349 |
946 |
continue; |
350 |
|
} |
351 |
|
|
352 |
721 |
bool m2Neg = m2.getKind() == NOT; |
353 |
1422 |
Node m2Lit = m2Neg ? m2[0] : m2; |
354 |
|
|
355 |
|
// Both regular expression memberships have the same polarity |
356 |
721 |
if (m1Neg == m2Neg) |
357 |
|
{ |
358 |
423 |
if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1])) |
359 |
|
{ |
360 |
15 |
if (m1Neg) |
361 |
|
{ |
362 |
|
// ~str.in.re(x, R1) includes ~str.in.re(x, R2) ---> |
363 |
|
// mark ~str.in.re(x, R2) as reduced |
364 |
5 |
d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG); |
365 |
5 |
remove.insert(m2); |
366 |
|
} |
367 |
|
else |
368 |
|
{ |
369 |
|
// str.in.re(x, R1) includes str.in.re(x, R2) ---> |
370 |
|
// mark str.in.re(x, R1) as reduced |
371 |
10 |
d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE); |
372 |
10 |
remove.insert(m1); |
373 |
|
|
374 |
|
// We don't need to process m1 anymore |
375 |
10 |
break; |
376 |
|
} |
377 |
|
} |
378 |
|
} |
379 |
|
else |
380 |
|
{ |
381 |
586 |
Node pos = m1Neg ? m2Lit : m1Lit; |
382 |
586 |
Node neg = m1Neg ? m1Lit : m2Lit; |
383 |
298 |
if (d_regexp_opr.regExpIncludes(neg[1], pos[1])) |
384 |
|
{ |
385 |
|
// We have a conflict because we have a case where str.in.re(x, R1) |
386 |
|
// and ~str.in.re(x, R2) but R2 includes R1, so there is no |
387 |
|
// possible value for x that satisfies both memberships. |
388 |
20 |
std::vector<Node> vec_nodes; |
389 |
10 |
vec_nodes.push_back(pos); |
390 |
10 |
vec_nodes.push_back(neg.negate()); |
391 |
|
|
392 |
10 |
if (pos[0] != neg[0]) |
393 |
|
{ |
394 |
|
vec_nodes.push_back(pos[0].eqNode(neg[0])); |
395 |
|
} |
396 |
|
|
397 |
20 |
Node conc; |
398 |
10 |
d_im.sendInference( |
399 |
|
vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true); |
400 |
10 |
return false; |
401 |
|
} |
402 |
|
} |
403 |
|
} |
404 |
|
} |
405 |
|
|
406 |
2013 |
mems.erase(std::remove_if( |
407 |
|
mems.begin(), |
408 |
|
mems.end(), |
409 |
1612 |
[&remove](Node& n) { return remove.find(n) != remove.end(); }), |
410 |
2684 |
mems.end()); |
411 |
|
|
412 |
671 |
return true; |
413 |
|
} |
414 |
|
|
415 |
671 |
bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems) |
416 |
|
{ |
417 |
|
// do not compute intersections if the re intersection mode is none |
418 |
1966 |
if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE) |
419 |
|
{ |
420 |
|
return true; |
421 |
|
} |
422 |
671 |
if (mems.empty()) |
423 |
|
{ |
424 |
|
// nothing to do |
425 |
|
return true; |
426 |
|
} |
427 |
|
// the initial regular expression membership and its constant type |
428 |
1342 |
Node mi; |
429 |
671 |
RegExpConstType rcti = RE_C_UNKNOWN; |
430 |
671 |
NodeManager* nm = NodeManager::currentNM(); |
431 |
1531 |
for (const Node& m : mems) |
432 |
|
{ |
433 |
1255 |
if (m.getKind() != STRING_IN_REGEXP) |
434 |
|
{ |
435 |
|
// do not do negative |
436 |
347 |
Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP); |
437 |
1200 |
continue; |
438 |
|
} |
439 |
561 |
RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]); |
440 |
561 |
if (rct == RE_C_VARIABLE |
441 |
929 |
|| (options::stringRegExpInterMode() |
442 |
|
== options::RegExpInterMode::CONSTANT |
443 |
431 |
&& rct != RE_C_CONRETE_CONSTANT)) |
444 |
|
{ |
445 |
|
// cannot do intersection on RE with variables, or with re.allchar based |
446 |
|
// on option. |
447 |
368 |
continue; |
448 |
|
} |
449 |
386 |
if (options::stringRegExpInterMode() |
450 |
193 |
== options::RegExpInterMode::ONE_CONSTANT) |
451 |
|
{ |
452 |
|
if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT) |
453 |
|
{ |
454 |
|
// if both have re.allchar, do not do intersection if the |
455 |
|
// options::RegExpInterMode::ONE_CONSTANT option is set. |
456 |
|
continue; |
457 |
|
} |
458 |
|
} |
459 |
331 |
if (mi.isNull()) |
460 |
|
{ |
461 |
|
// first regular expression seen |
462 |
138 |
mi = m; |
463 |
138 |
rcti = rct; |
464 |
138 |
continue; |
465 |
|
} |
466 |
62 |
Node resR = d_regexp_opr.intersect(mi[1], m[1]); |
467 |
|
// intersection should be computable |
468 |
55 |
Assert(!resR.isNull()); |
469 |
55 |
if (resR == d_emptyRegexp) |
470 |
|
{ |
471 |
|
// conflict, explain |
472 |
24 |
std::vector<Node> vec_nodes; |
473 |
12 |
vec_nodes.push_back(mi); |
474 |
12 |
vec_nodes.push_back(m); |
475 |
12 |
if (mi[0] != m[0]) |
476 |
|
{ |
477 |
8 |
vec_nodes.push_back(mi[0].eqNode(m[0])); |
478 |
|
} |
479 |
24 |
Node conc; |
480 |
12 |
d_im.sendInference( |
481 |
|
vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true); |
482 |
|
// conflict, return |
483 |
12 |
return false; |
484 |
|
} |
485 |
|
// rewrite to ensure the equality checks below are precise |
486 |
50 |
Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR); |
487 |
50 |
Node mresr = Rewriter::rewrite(mres); |
488 |
43 |
if (mresr == mi) |
489 |
|
{ |
490 |
|
// if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent |
491 |
|
// to x in R1, hence x in R2 can be marked redundant. |
492 |
|
d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME); |
493 |
|
} |
494 |
43 |
else if (mresr == m) |
495 |
|
{ |
496 |
|
// same as above, opposite direction |
497 |
7 |
d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME); |
498 |
|
} |
499 |
|
else |
500 |
|
{ |
501 |
|
// new conclusion |
502 |
|
// (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2)) |
503 |
72 |
std::vector<Node> vec_nodes; |
504 |
36 |
vec_nodes.push_back(mi); |
505 |
36 |
vec_nodes.push_back(m); |
506 |
36 |
if (mi[0] != m[0]) |
507 |
|
{ |
508 |
|
vec_nodes.push_back(mi[0].eqNode(m[0])); |
509 |
|
} |
510 |
36 |
d_im.sendInference( |
511 |
|
vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true); |
512 |
|
// both are reduced |
513 |
36 |
d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER); |
514 |
36 |
d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER); |
515 |
|
// do not send more than one lemma for this class |
516 |
36 |
return true; |
517 |
|
} |
518 |
|
} |
519 |
623 |
return true; |
520 |
|
} |
521 |
|
|
522 |
286 |
bool RegExpSolver::checkPDerivative( |
523 |
|
Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp) |
524 |
|
{ |
525 |
286 |
if (d_state.areEqual(x, d_emptyString)) |
526 |
|
{ |
527 |
4 |
Node exp; |
528 |
4 |
switch (d_regexp_opr.delta(r, exp)) |
529 |
|
{ |
530 |
|
case 0: |
531 |
|
{ |
532 |
|
std::vector<Node> noExplain; |
533 |
|
noExplain.push_back(atom); |
534 |
|
noExplain.push_back(x.eqNode(d_emptyString)); |
535 |
|
std::vector<Node> iexp = nf_exp; |
536 |
|
iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); |
537 |
|
d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA); |
538 |
|
addedLemma = true; |
539 |
|
d_regexp_ccached.insert(atom); |
540 |
|
return false; |
541 |
|
} |
542 |
|
case 1: |
543 |
|
{ |
544 |
|
d_regexp_ccached.insert(atom); |
545 |
|
break; |
546 |
|
} |
547 |
4 |
case 2: |
548 |
|
{ |
549 |
8 |
std::vector<Node> noExplain; |
550 |
4 |
noExplain.push_back(atom); |
551 |
4 |
noExplain.push_back(x.eqNode(d_emptyString)); |
552 |
8 |
std::vector<Node> iexp = nf_exp; |
553 |
4 |
iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); |
554 |
4 |
d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF); |
555 |
4 |
addedLemma = true; |
556 |
4 |
d_regexp_ccached.insert(atom); |
557 |
4 |
return false; |
558 |
|
} |
559 |
|
default: |
560 |
|
// Impossible |
561 |
|
break; |
562 |
|
} |
563 |
|
} |
564 |
|
else |
565 |
|
{ |
566 |
282 |
if (deriveRegExp(x, r, atom, nf_exp)) |
567 |
|
{ |
568 |
4 |
addedLemma = true; |
569 |
4 |
d_regexp_ccached.insert(atom); |
570 |
4 |
return false; |
571 |
|
} |
572 |
|
} |
573 |
278 |
return true; |
574 |
|
} |
575 |
|
|
576 |
282 |
cvc5::String RegExpSolver::getHeadConst(Node x) |
577 |
|
{ |
578 |
282 |
if (x.isConst()) |
579 |
|
{ |
580 |
2 |
return x.getConst<String>(); |
581 |
|
} |
582 |
280 |
else if (x.getKind() == STRING_CONCAT) |
583 |
|
{ |
584 |
38 |
if (x[0].isConst()) |
585 |
|
{ |
586 |
6 |
return x[0].getConst<String>(); |
587 |
|
} |
588 |
|
} |
589 |
274 |
return d_emptyString.getConst<String>(); |
590 |
|
} |
591 |
|
|
592 |
282 |
bool RegExpSolver::deriveRegExp(Node x, |
593 |
|
Node r, |
594 |
|
Node atom, |
595 |
|
std::vector<Node>& ant) |
596 |
|
{ |
597 |
282 |
Assert(x != d_emptyString); |
598 |
564 |
Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x |
599 |
282 |
<< ", r= " << r << std::endl; |
600 |
564 |
cvc5::String s = getHeadConst(x); |
601 |
|
// only allow RE_DERIVE for concrete constant regular expressions |
602 |
282 |
if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT) |
603 |
|
{ |
604 |
8 |
Node conc = Node::null(); |
605 |
8 |
Node dc = r; |
606 |
4 |
bool flag = true; |
607 |
4 |
for (unsigned i = 0; i < s.size(); ++i) |
608 |
|
{ |
609 |
4 |
cvc5::String c = s.substr(i, 1); |
610 |
4 |
Node dc2; |
611 |
4 |
int rt = d_regexp_opr.derivativeS(dc, c, dc2); |
612 |
4 |
dc = dc2; |
613 |
4 |
if (rt == 2) |
614 |
|
{ |
615 |
|
// CONFLICT |
616 |
4 |
flag = false; |
617 |
4 |
break; |
618 |
|
} |
619 |
|
} |
620 |
|
// send lemma |
621 |
4 |
if (flag) |
622 |
|
{ |
623 |
|
if (x.isConst()) |
624 |
|
{ |
625 |
|
Assert(false) |
626 |
|
<< "Impossible: RegExpSolver::deriveRegExp: const string in const " |
627 |
|
"regular expression."; |
628 |
|
return false; |
629 |
|
} |
630 |
|
else |
631 |
|
{ |
632 |
|
Assert(x.getKind() == STRING_CONCAT); |
633 |
|
std::vector<Node> vec_nodes; |
634 |
|
for (unsigned int i = 1; i < x.getNumChildren(); ++i) |
635 |
|
{ |
636 |
|
vec_nodes.push_back(x[i]); |
637 |
|
} |
638 |
|
Node left = utils::mkConcat(vec_nodes, x.getType()); |
639 |
|
left = Rewriter::rewrite(left); |
640 |
|
conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc); |
641 |
|
} |
642 |
|
} |
643 |
8 |
std::vector<Node> iexp = ant; |
644 |
8 |
std::vector<Node> noExplain; |
645 |
4 |
noExplain.push_back(atom); |
646 |
4 |
iexp.push_back(atom); |
647 |
4 |
d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE); |
648 |
4 |
return true; |
649 |
|
} |
650 |
278 |
return false; |
651 |
|
} |
652 |
|
|
653 |
499 |
Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp) |
654 |
|
{ |
655 |
499 |
Node ret = r; |
656 |
499 |
switch (r.getKind()) |
657 |
|
{ |
658 |
72 |
case REGEXP_EMPTY: |
659 |
|
case REGEXP_SIGMA: |
660 |
72 |
case REGEXP_RANGE: break; |
661 |
182 |
case STRING_TO_REGEXP: |
662 |
|
{ |
663 |
182 |
if (!r[0].isConst()) |
664 |
|
{ |
665 |
188 |
Node tmp = d_csolver.getNormalString(r[0], nf_exp); |
666 |
94 |
if (tmp != r[0]) |
667 |
|
{ |
668 |
84 |
ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp); |
669 |
|
} |
670 |
|
} |
671 |
182 |
break; |
672 |
|
} |
673 |
242 |
case REGEXP_CONCAT: |
674 |
|
case REGEXP_UNION: |
675 |
|
case REGEXP_INTER: |
676 |
|
case REGEXP_STAR: |
677 |
|
case REGEXP_COMPLEMENT: |
678 |
|
{ |
679 |
484 |
std::vector<Node> vec_nodes; |
680 |
648 |
for (const Node& cr : r) |
681 |
|
{ |
682 |
406 |
vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp)); |
683 |
|
} |
684 |
242 |
ret = Rewriter::rewrite( |
685 |
484 |
NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes)); |
686 |
242 |
break; |
687 |
|
} |
688 |
3 |
default: |
689 |
|
{ |
690 |
6 |
Trace("strings-error") << "Unsupported term: " << r |
691 |
3 |
<< " in normalization SymRegExp." << std::endl; |
692 |
3 |
Assert(!utils::isRegExpKind(r.getKind())); |
693 |
|
} |
694 |
|
} |
695 |
499 |
return ret; |
696 |
|
} |
697 |
|
|
698 |
|
} // namespace strings |
699 |
|
} // namespace theory |
700 |
29384 |
} // namespace cvc5 |