1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Tianyi Liang |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of the regular expression solver for the theory of strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/regexp_solver.h" |
17 |
|
|
18 |
|
#include <cmath> |
19 |
|
|
20 |
|
#include "options/strings_options.h" |
21 |
|
#include "smt/logic_exception.h" |
22 |
|
#include "theory/ext_theory.h" |
23 |
|
#include "theory/strings/term_registry.h" |
24 |
|
#include "theory/strings/theory_strings_utils.h" |
25 |
|
#include "theory/theory_model.h" |
26 |
|
#include "util/statistics_value.h" |
27 |
|
|
28 |
|
using namespace std; |
29 |
|
using namespace cvc5::context; |
30 |
|
using namespace cvc5::kind; |
31 |
|
|
32 |
|
namespace cvc5 { |
33 |
|
namespace theory { |
34 |
|
namespace strings { |
35 |
|
|
36 |
15272 |
RegExpSolver::RegExpSolver(Env& env, |
37 |
|
SolverState& s, |
38 |
|
InferenceManager& im, |
39 |
|
TermRegistry& tr, |
40 |
|
CoreSolver& cs, |
41 |
|
ExtfSolver& es, |
42 |
15272 |
SequencesStatistics& stats) |
43 |
|
: EnvObj(env), |
44 |
|
d_state(s), |
45 |
|
d_im(im), |
46 |
|
d_csolver(cs), |
47 |
|
d_esolver(es), |
48 |
|
d_statistics(stats), |
49 |
15272 |
d_regexp_ucached(userContext()), |
50 |
|
d_regexp_ccached(context()), |
51 |
|
d_processed_memberships(context()), |
52 |
30544 |
d_regexp_opr(env, tr.getSkolemCache()) |
53 |
|
{ |
54 |
15272 |
d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String("")); |
55 |
15272 |
d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY); |
56 |
15272 |
d_true = NodeManager::currentNM()->mkConst(true); |
57 |
15272 |
d_false = NodeManager::currentNM()->mkConst(false); |
58 |
15272 |
} |
59 |
|
|
60 |
|
Node RegExpSolver::mkAnd(Node c1, Node c2) |
61 |
|
{ |
62 |
|
return NodeManager::currentNM()->mkNode(AND, c1, c2); |
63 |
|
} |
64 |
|
|
65 |
10650 |
void RegExpSolver::checkMemberships() |
66 |
|
{ |
67 |
|
// add the memberships |
68 |
21300 |
std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP); |
69 |
|
// maps representatives to regular expression memberships in that class |
70 |
21300 |
std::map<Node, std::vector<Node> > assertedMems; |
71 |
10650 |
const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo(); |
72 |
10650 |
std::map<Node, ExtfInfoTmp>::const_iterator it; |
73 |
12185 |
for (unsigned i = 0; i < mems.size(); i++) |
74 |
|
{ |
75 |
3070 |
Node n = mems[i]; |
76 |
1535 |
Assert(n.getKind() == STRING_IN_REGEXP); |
77 |
1535 |
it = einfo.find(n); |
78 |
1535 |
Assert(it != einfo.end()); |
79 |
1535 |
if (!it->second.d_const.isNull()) |
80 |
|
{ |
81 |
1503 |
bool pol = it->second.d_const.getConst<bool>(); |
82 |
3006 |
Trace("strings-process-debug") |
83 |
1503 |
<< " add membership : " << n << ", pol = " << pol << std::endl; |
84 |
3006 |
Node r = d_state.getRepresentative(n[0]); |
85 |
1503 |
assertedMems[r].push_back(pol ? n : n.negate()); |
86 |
|
} |
87 |
|
else |
88 |
|
{ |
89 |
64 |
Trace("strings-process-debug") |
90 |
32 |
<< " irrelevant (non-asserted) membership : " << n << std::endl; |
91 |
|
} |
92 |
|
} |
93 |
10650 |
check(assertedMems); |
94 |
10650 |
} |
95 |
|
|
96 |
10650 |
void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems) |
97 |
|
{ |
98 |
10650 |
bool addedLemma = false; |
99 |
10650 |
bool changed = false; |
100 |
21278 |
std::vector<Node> processed; |
101 |
|
|
102 |
10650 |
Trace("regexp-process") << "Checking Memberships ... " << std::endl; |
103 |
11653 |
for (const std::pair<const Node, std::vector<Node> >& mr : mems) |
104 |
|
{ |
105 |
2028 |
std::vector<Node> mems2 = mr.second; |
106 |
2050 |
Trace("regexp-process") |
107 |
1025 |
<< "Memberships(" << mr.first << ") = " << mr.second << std::endl; |
108 |
1025 |
if (!checkEqcInclusion(mems2)) |
109 |
|
{ |
110 |
|
// conflict discovered, return |
111 |
10 |
return; |
112 |
|
} |
113 |
1015 |
if (!checkEqcIntersect(mems2)) |
114 |
|
{ |
115 |
|
// conflict discovered, return |
116 |
12 |
return; |
117 |
|
} |
118 |
|
} |
119 |
|
|
120 |
21256 |
Trace("regexp-debug") |
121 |
10628 |
<< "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma |
122 |
10628 |
<< std::endl; |
123 |
10628 |
if (!addedLemma) |
124 |
|
{ |
125 |
|
// get all memberships |
126 |
21256 |
std::map<Node, Node> allMems; |
127 |
11631 |
for (const std::pair<const Node, std::vector<Node> >& mr : mems) |
128 |
|
{ |
129 |
2447 |
for (const Node& m : mr.second) |
130 |
|
{ |
131 |
1444 |
allMems[m] = mr.first; |
132 |
|
} |
133 |
|
} |
134 |
|
|
135 |
10628 |
NodeManager* nm = NodeManager::currentNM(); |
136 |
|
// representatives of strings that are the LHS of positive memberships that |
137 |
|
// we unfolded |
138 |
21256 |
std::unordered_set<Node> repUnfold; |
139 |
|
// check positive (e=0), then negative (e=1) memberships |
140 |
31884 |
for (unsigned e = 0; e < 2; e++) |
141 |
|
{ |
142 |
24066 |
for (const std::pair<const Node, Node>& mp : allMems) |
143 |
|
{ |
144 |
3450 |
Node assertion = mp.first; |
145 |
3450 |
Node rep = mp.second; |
146 |
|
// check regular expression membership |
147 |
5740 |
Trace("regexp-debug") |
148 |
2870 |
<< "Check : " << assertion << " " |
149 |
5740 |
<< (d_regexp_ucached.find(assertion) == d_regexp_ucached.end()) |
150 |
2870 |
<< " " |
151 |
5740 |
<< (d_regexp_ccached.find(assertion) == d_regexp_ccached.end()) |
152 |
2870 |
<< std::endl; |
153 |
5740 |
if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end() |
154 |
2870 |
|| d_regexp_ccached.find(assertion) != d_regexp_ccached.end()) |
155 |
|
{ |
156 |
1470 |
continue; |
157 |
|
} |
158 |
2800 |
Trace("strings-regexp") |
159 |
1400 |
<< "We have regular expression assertion : " << assertion |
160 |
1400 |
<< std::endl; |
161 |
1980 |
Node atom = assertion.getKind() == NOT ? assertion[0] : assertion; |
162 |
1400 |
Assert(atom == rewrite(atom)); |
163 |
1400 |
bool polarity = assertion.getKind() != NOT; |
164 |
1400 |
if (polarity != (e == 0)) |
165 |
|
{ |
166 |
686 |
continue; |
167 |
|
} |
168 |
714 |
bool flag = true; |
169 |
1294 |
Node x = atom[0]; |
170 |
1294 |
Node r = atom[1]; |
171 |
714 |
Assert(rep == d_state.getRepresentative(x)); |
172 |
|
// The following code takes normal forms into account for the purposes |
173 |
|
// of simplifying a regular expression membership x in R. For example, |
174 |
|
// if x = "A" in the current context, then we may be interested in |
175 |
|
// reasoning about ( x in R ) * { x -> "A" }. Say we update the |
176 |
|
// membership to nx in R', then: |
177 |
|
// - nfexp => ( x in R ) <=> nx in R' |
178 |
|
// - rnfexp => R = R' |
179 |
|
// We use these explanations below as assumptions on inferences when |
180 |
|
// appropriate. Notice that for inferring conflicts and tautologies, |
181 |
|
// we use the normal form of x always. This is because we always want to |
182 |
|
// discover conflicts/tautologies whenever possible. |
183 |
|
// For inferences based on regular expression unfolding, we do not use |
184 |
|
// the normal form of x. The reason is that it is better to unfold |
185 |
|
// regular expression memberships in a context-indepedent manner, |
186 |
|
// that is, not taking into account the current normal form of x, since |
187 |
|
// this ensures these lemmas are still relevant after backtracking. |
188 |
1294 |
std::vector<Node> nfexp; |
189 |
1294 |
std::vector<Node> rnfexp; |
190 |
|
// The normal form of x is stored in nx, while x is left unchanged. |
191 |
1294 |
Node nx = x; |
192 |
714 |
if (!x.isConst()) |
193 |
|
{ |
194 |
664 |
nx = d_csolver.getNormalString(x, nfexp); |
195 |
|
} |
196 |
|
// If r is not a constant regular expression, we update it based on |
197 |
|
// normal forms, which may concretize its variables. |
198 |
714 |
if (!d_regexp_opr.checkConstRegExp(r)) |
199 |
|
{ |
200 |
142 |
r = getNormalSymRegExp(r, rnfexp); |
201 |
142 |
nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end()); |
202 |
142 |
changed = true; |
203 |
|
} |
204 |
1428 |
Trace("strings-regexp-nf") << "Term " << atom << " is normalized to " |
205 |
714 |
<< nx << " IN " << r << std::endl; |
206 |
714 |
if (nx != x || changed) |
207 |
|
{ |
208 |
|
// We rewrite the membership nx IN r. |
209 |
726 |
Node tmp = rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r)); |
210 |
399 |
Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl; |
211 |
399 |
if (tmp.isConst()) |
212 |
|
{ |
213 |
84 |
if (tmp.getConst<bool>() == polarity) |
214 |
|
{ |
215 |
|
// it is satisfied in this SAT context |
216 |
12 |
d_regexp_ccached.insert(assertion); |
217 |
12 |
continue; |
218 |
|
} |
219 |
|
else |
220 |
|
{ |
221 |
|
// we have a conflict |
222 |
120 |
std::vector<Node> iexp = nfexp; |
223 |
120 |
std::vector<Node> noExplain; |
224 |
60 |
iexp.push_back(assertion); |
225 |
60 |
noExplain.push_back(assertion); |
226 |
120 |
Node conc = Node::null(); |
227 |
60 |
d_im.sendInference( |
228 |
|
iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT); |
229 |
60 |
addedLemma = true; |
230 |
60 |
break; |
231 |
|
} |
232 |
|
} |
233 |
|
} |
234 |
642 |
if (e == 1 && repUnfold.find(rep) != repUnfold.end()) |
235 |
|
{ |
236 |
|
// do not unfold negative memberships of strings that have new |
237 |
|
// positive unfoldings. For example: |
238 |
|
// x in ("A")* ^ NOT x in ("B")* |
239 |
|
// We unfold x = "A" ++ x' only. The intution here is that positive |
240 |
|
// unfoldings lead to stronger constraints (equalities are stronger |
241 |
|
// than disequalities), and are easier to check. |
242 |
62 |
continue; |
243 |
|
} |
244 |
580 |
if (polarity) |
245 |
|
{ |
246 |
436 |
flag = checkPDerivative(x, r, atom, addedLemma, rnfexp); |
247 |
|
} |
248 |
|
else |
249 |
|
{ |
250 |
144 |
if (!options::stringExp()) |
251 |
|
{ |
252 |
|
throw LogicException( |
253 |
|
"Strings Incomplete (due to Negative Membership) by default, " |
254 |
|
"try --strings-exp option."); |
255 |
|
} |
256 |
|
} |
257 |
580 |
if (flag) |
258 |
|
{ |
259 |
|
// check if the term is atomic |
260 |
1120 |
Trace("strings-regexp") |
261 |
560 |
<< "Unroll/simplify membership of atomic term " << rep |
262 |
560 |
<< std::endl; |
263 |
|
// if so, do simple unrolling |
264 |
560 |
Trace("strings-regexp") << "Simplify on " << atom << std::endl; |
265 |
1120 |
Node conc = d_regexp_opr.simplify(atom, polarity); |
266 |
560 |
Trace("strings-regexp") << "...finished, got " << conc << std::endl; |
267 |
|
// if simplifying successfully generated a lemma |
268 |
560 |
if (!conc.isNull()) |
269 |
|
{ |
270 |
1108 |
std::vector<Node> iexp; |
271 |
1108 |
std::vector<Node> noExplain; |
272 |
554 |
iexp.push_back(assertion); |
273 |
554 |
noExplain.push_back(assertion); |
274 |
554 |
Assert(atom.getKind() == STRING_IN_REGEXP); |
275 |
554 |
if (polarity) |
276 |
|
{ |
277 |
412 |
d_statistics.d_regexpUnfoldingsPos << atom[1].getKind(); |
278 |
|
} |
279 |
|
else |
280 |
|
{ |
281 |
142 |
d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind(); |
282 |
|
} |
283 |
554 |
InferenceId inf = |
284 |
554 |
polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG; |
285 |
|
// in very rare cases, we may find out that the unfolding lemma |
286 |
|
// for a membership is equivalent to true, in spite of the RE |
287 |
|
// not being rewritten to true. |
288 |
554 |
if (d_im.sendInference(iexp, noExplain, conc, inf)) |
289 |
|
{ |
290 |
548 |
addedLemma = true; |
291 |
548 |
if (e == 0) |
292 |
|
{ |
293 |
|
// Remember that we have unfolded a membership for x |
294 |
|
// notice that we only do this here, after we have definitely |
295 |
|
// added a lemma. |
296 |
410 |
repUnfold.insert(rep); |
297 |
|
} |
298 |
|
} |
299 |
554 |
processed.push_back(assertion); |
300 |
|
} |
301 |
|
else |
302 |
|
{ |
303 |
|
// otherwise we are incomplete |
304 |
6 |
d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY); |
305 |
|
} |
306 |
|
} |
307 |
580 |
if (d_state.isInConflict()) |
308 |
|
{ |
309 |
|
break; |
310 |
|
} |
311 |
|
} |
312 |
|
} |
313 |
|
} |
314 |
10628 |
if (addedLemma) |
315 |
|
{ |
316 |
412 |
if (!d_state.isInConflict()) |
317 |
|
{ |
318 |
962 |
for (unsigned i = 0; i < processed.size(); i++) |
319 |
|
{ |
320 |
1100 |
Trace("strings-regexp") |
321 |
550 |
<< "...add " << processed[i] << " to u-cache." << std::endl; |
322 |
550 |
d_regexp_ucached.insert(processed[i]); |
323 |
|
} |
324 |
|
} |
325 |
|
} |
326 |
|
} |
327 |
|
|
328 |
1025 |
bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems) |
329 |
|
{ |
330 |
2050 |
std::unordered_set<Node> remove; |
331 |
|
|
332 |
2500 |
for (const Node& m1 : mems) |
333 |
|
{ |
334 |
1485 |
bool m1Neg = m1.getKind() == NOT; |
335 |
2922 |
Node m1Lit = m1Neg ? m1[0] : m1; |
336 |
|
|
337 |
1485 |
if (remove.find(m1) != remove.end()) |
338 |
|
{ |
339 |
|
// Skip memberships marked for removal |
340 |
38 |
continue; |
341 |
|
} |
342 |
|
|
343 |
4112 |
for (const Node& m2 : mems) |
344 |
|
{ |
345 |
2729 |
if (m1 == m2) |
346 |
|
{ |
347 |
1419 |
continue; |
348 |
|
} |
349 |
|
|
350 |
1310 |
bool m2Neg = m2.getKind() == NOT; |
351 |
2556 |
Node m2Lit = m2Neg ? m2[0] : m2; |
352 |
|
|
353 |
|
// Both regular expression memberships have the same polarity |
354 |
1310 |
if (m1Neg == m2Neg) |
355 |
|
{ |
356 |
864 |
if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1])) |
357 |
|
{ |
358 |
94 |
if (m1Neg) |
359 |
|
{ |
360 |
|
// ~str.in.re(x, R1) includes ~str.in.re(x, R2) ---> |
361 |
|
// mark ~str.in.re(x, R2) as reduced |
362 |
40 |
d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG); |
363 |
40 |
remove.insert(m2); |
364 |
|
} |
365 |
|
else |
366 |
|
{ |
367 |
|
// str.in.re(x, R1) includes str.in.re(x, R2) ---> |
368 |
|
// mark str.in.re(x, R1) as reduced |
369 |
54 |
d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE); |
370 |
54 |
remove.insert(m1); |
371 |
|
|
372 |
|
// We don't need to process m1 anymore |
373 |
54 |
break; |
374 |
|
} |
375 |
|
} |
376 |
|
} |
377 |
|
else |
378 |
|
{ |
379 |
882 |
Node pos = m1Neg ? m2Lit : m1Lit; |
380 |
882 |
Node neg = m1Neg ? m1Lit : m2Lit; |
381 |
446 |
if (d_regexp_opr.regExpIncludes(neg[1], pos[1])) |
382 |
|
{ |
383 |
|
// We have a conflict because we have a case where str.in.re(x, R1) |
384 |
|
// and ~str.in.re(x, R2) but R2 includes R1, so there is no |
385 |
|
// possible value for x that satisfies both memberships. |
386 |
20 |
std::vector<Node> vec_nodes; |
387 |
10 |
vec_nodes.push_back(pos); |
388 |
10 |
vec_nodes.push_back(neg.negate()); |
389 |
|
|
390 |
10 |
if (pos[0] != neg[0]) |
391 |
|
{ |
392 |
|
vec_nodes.push_back(pos[0].eqNode(neg[0])); |
393 |
|
} |
394 |
|
|
395 |
20 |
Node conc; |
396 |
10 |
d_im.sendInference( |
397 |
|
vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true); |
398 |
10 |
return false; |
399 |
|
} |
400 |
|
} |
401 |
|
} |
402 |
|
} |
403 |
|
|
404 |
3045 |
mems.erase(std::remove_if( |
405 |
|
mems.begin(), |
406 |
|
mems.end(), |
407 |
2486 |
[&remove](Node& n) { return remove.find(n) != remove.end(); }), |
408 |
4060 |
mems.end()); |
409 |
|
|
410 |
1015 |
return true; |
411 |
|
} |
412 |
|
|
413 |
1015 |
bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems) |
414 |
|
{ |
415 |
|
// do not compute intersections if the re intersection mode is none |
416 |
1015 |
if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE) |
417 |
|
{ |
418 |
|
return true; |
419 |
|
} |
420 |
1015 |
if (mems.empty()) |
421 |
|
{ |
422 |
|
// nothing to do |
423 |
22 |
return true; |
424 |
|
} |
425 |
|
// the initial regular expression membership and its constant type |
426 |
1986 |
Node mi; |
427 |
993 |
RegExpConstType rcti = RE_C_UNKNOWN; |
428 |
993 |
NodeManager* nm = NodeManager::currentNM(); |
429 |
2262 |
for (const Node& m : mems) |
430 |
|
{ |
431 |
1835 |
if (m.getKind() != STRING_IN_REGEXP) |
432 |
|
{ |
433 |
|
// do not do negative |
434 |
506 |
Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP); |
435 |
1768 |
continue; |
436 |
|
} |
437 |
823 |
RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]); |
438 |
823 |
if (rct == RE_C_VARIABLE |
439 |
1415 |
|| (options::stringRegExpInterMode() |
440 |
|
== options::RegExpInterMode::CONSTANT |
441 |
645 |
&& rct != RE_C_CONRETE_CONSTANT)) |
442 |
|
{ |
443 |
|
// cannot do intersection on RE with variables, or with re.allchar based |
444 |
|
// on option. |
445 |
592 |
continue; |
446 |
|
} |
447 |
462 |
if (options::stringRegExpInterMode() |
448 |
231 |
== options::RegExpInterMode::ONE_CONSTANT) |
449 |
|
{ |
450 |
|
if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT) |
451 |
|
{ |
452 |
|
// if both have re.allchar, do not do intersection if the |
453 |
|
// options::RegExpInterMode::ONE_CONSTANT option is set. |
454 |
|
continue; |
455 |
|
} |
456 |
|
} |
457 |
395 |
if (mi.isNull()) |
458 |
|
{ |
459 |
|
// first regular expression seen |
460 |
164 |
mi = m; |
461 |
164 |
rcti = rct; |
462 |
164 |
continue; |
463 |
|
} |
464 |
74 |
Node resR = d_regexp_opr.intersect(mi[1], m[1]); |
465 |
|
// intersection should be computable |
466 |
67 |
Assert(!resR.isNull()); |
467 |
67 |
if (resR == d_emptyRegexp) |
468 |
|
{ |
469 |
|
// conflict, explain |
470 |
24 |
std::vector<Node> vec_nodes; |
471 |
12 |
vec_nodes.push_back(mi); |
472 |
12 |
vec_nodes.push_back(m); |
473 |
12 |
if (mi[0] != m[0]) |
474 |
|
{ |
475 |
8 |
vec_nodes.push_back(mi[0].eqNode(m[0])); |
476 |
|
} |
477 |
24 |
Node conc; |
478 |
12 |
d_im.sendInference( |
479 |
|
vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true); |
480 |
|
// conflict, return |
481 |
12 |
return false; |
482 |
|
} |
483 |
|
// rewrite to ensure the equality checks below are precise |
484 |
62 |
Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR); |
485 |
62 |
Node mresr = rewrite(mres); |
486 |
55 |
if (mresr == mi) |
487 |
|
{ |
488 |
|
// if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent |
489 |
|
// to x in R1, hence x in R2 can be marked redundant. |
490 |
|
d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME); |
491 |
|
} |
492 |
55 |
else if (mresr == m) |
493 |
|
{ |
494 |
|
// same as above, opposite direction |
495 |
7 |
d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME); |
496 |
|
} |
497 |
|
else |
498 |
|
{ |
499 |
|
// new conclusion |
500 |
|
// (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2)) |
501 |
96 |
std::vector<Node> vec_nodes; |
502 |
48 |
vec_nodes.push_back(mi); |
503 |
48 |
vec_nodes.push_back(m); |
504 |
48 |
if (mi[0] != m[0]) |
505 |
|
{ |
506 |
8 |
vec_nodes.push_back(mi[0].eqNode(m[0])); |
507 |
|
} |
508 |
48 |
d_im.sendInference( |
509 |
|
vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true); |
510 |
|
// both are reduced |
511 |
48 |
d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER); |
512 |
48 |
d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER); |
513 |
|
// do not send more than one lemma for this class |
514 |
48 |
return true; |
515 |
|
} |
516 |
|
} |
517 |
933 |
return true; |
518 |
|
} |
519 |
|
|
520 |
436 |
bool RegExpSolver::checkPDerivative( |
521 |
|
Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp) |
522 |
|
{ |
523 |
436 |
if (d_state.areEqual(x, d_emptyString)) |
524 |
|
{ |
525 |
16 |
Node exp; |
526 |
16 |
switch (d_regexp_opr.delta(r, exp)) |
527 |
|
{ |
528 |
|
case 0: |
529 |
|
{ |
530 |
|
std::vector<Node> noExplain; |
531 |
|
noExplain.push_back(atom); |
532 |
|
noExplain.push_back(x.eqNode(d_emptyString)); |
533 |
|
std::vector<Node> iexp = nf_exp; |
534 |
|
iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); |
535 |
|
d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA); |
536 |
|
addedLemma = true; |
537 |
|
d_regexp_ccached.insert(atom); |
538 |
|
return false; |
539 |
|
} |
540 |
|
case 1: |
541 |
|
{ |
542 |
|
d_regexp_ccached.insert(atom); |
543 |
|
break; |
544 |
|
} |
545 |
16 |
case 2: |
546 |
|
{ |
547 |
32 |
std::vector<Node> noExplain; |
548 |
16 |
noExplain.push_back(atom); |
549 |
16 |
if (x != d_emptyString) |
550 |
|
{ |
551 |
|
noExplain.push_back(x.eqNode(d_emptyString)); |
552 |
|
} |
553 |
32 |
std::vector<Node> iexp = nf_exp; |
554 |
16 |
iexp.insert(iexp.end(), noExplain.begin(), noExplain.end()); |
555 |
16 |
d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF); |
556 |
16 |
addedLemma = true; |
557 |
16 |
d_regexp_ccached.insert(atom); |
558 |
16 |
return false; |
559 |
|
} |
560 |
|
default: |
561 |
|
// Impossible |
562 |
|
break; |
563 |
|
} |
564 |
|
} |
565 |
|
else |
566 |
|
{ |
567 |
420 |
if (deriveRegExp(x, r, atom, nf_exp)) |
568 |
|
{ |
569 |
4 |
addedLemma = true; |
570 |
4 |
d_regexp_ccached.insert(atom); |
571 |
4 |
return false; |
572 |
|
} |
573 |
|
} |
574 |
416 |
return true; |
575 |
|
} |
576 |
|
|
577 |
420 |
cvc5::String RegExpSolver::getHeadConst(Node x) |
578 |
|
{ |
579 |
420 |
if (x.isConst()) |
580 |
|
{ |
581 |
4 |
return x.getConst<String>(); |
582 |
|
} |
583 |
416 |
else if (x.getKind() == STRING_CONCAT) |
584 |
|
{ |
585 |
50 |
if (x[0].isConst()) |
586 |
|
{ |
587 |
6 |
return x[0].getConst<String>(); |
588 |
|
} |
589 |
|
} |
590 |
410 |
return d_emptyString.getConst<String>(); |
591 |
|
} |
592 |
|
|
593 |
420 |
bool RegExpSolver::deriveRegExp(Node x, |
594 |
|
Node r, |
595 |
|
Node atom, |
596 |
|
std::vector<Node>& ant) |
597 |
|
{ |
598 |
420 |
Assert(x != d_emptyString); |
599 |
840 |
Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x |
600 |
420 |
<< ", r= " << r << std::endl; |
601 |
840 |
cvc5::String s = getHeadConst(x); |
602 |
|
// only allow RE_DERIVE for concrete constant regular expressions |
603 |
420 |
if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT) |
604 |
|
{ |
605 |
8 |
Node conc = Node::null(); |
606 |
8 |
Node dc = r; |
607 |
4 |
bool flag = true; |
608 |
4 |
for (unsigned i = 0; i < s.size(); ++i) |
609 |
|
{ |
610 |
4 |
cvc5::String c = s.substr(i, 1); |
611 |
4 |
Node dc2; |
612 |
4 |
int rt = d_regexp_opr.derivativeS(dc, c, dc2); |
613 |
4 |
dc = dc2; |
614 |
4 |
if (rt == 2) |
615 |
|
{ |
616 |
|
// CONFLICT |
617 |
4 |
flag = false; |
618 |
4 |
break; |
619 |
|
} |
620 |
|
} |
621 |
|
// send lemma |
622 |
4 |
if (flag) |
623 |
|
{ |
624 |
|
if (x.isConst()) |
625 |
|
{ |
626 |
|
Assert(false) |
627 |
|
<< "Impossible: RegExpSolver::deriveRegExp: const string in const " |
628 |
|
"regular expression."; |
629 |
|
return false; |
630 |
|
} |
631 |
|
else |
632 |
|
{ |
633 |
|
Assert(x.getKind() == STRING_CONCAT); |
634 |
|
std::vector<Node> vec_nodes; |
635 |
|
for (unsigned int i = 1; i < x.getNumChildren(); ++i) |
636 |
|
{ |
637 |
|
vec_nodes.push_back(x[i]); |
638 |
|
} |
639 |
|
Node left = utils::mkConcat(vec_nodes, x.getType()); |
640 |
|
left = rewrite(left); |
641 |
|
conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc); |
642 |
|
} |
643 |
|
} |
644 |
8 |
std::vector<Node> iexp = ant; |
645 |
8 |
std::vector<Node> noExplain; |
646 |
4 |
noExplain.push_back(atom); |
647 |
4 |
iexp.push_back(atom); |
648 |
4 |
d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE); |
649 |
4 |
return true; |
650 |
|
} |
651 |
416 |
return false; |
652 |
|
} |
653 |
|
|
654 |
808 |
Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp) |
655 |
|
{ |
656 |
808 |
Node ret = r; |
657 |
808 |
switch (r.getKind()) |
658 |
|
{ |
659 |
108 |
case REGEXP_EMPTY: |
660 |
|
case REGEXP_SIGMA: |
661 |
108 |
case REGEXP_RANGE: break; |
662 |
272 |
case STRING_TO_REGEXP: |
663 |
|
{ |
664 |
272 |
if (!r[0].isConst()) |
665 |
|
{ |
666 |
332 |
Node tmp = d_csolver.getNormalString(r[0], nf_exp); |
667 |
166 |
if (tmp != r[0]) |
668 |
|
{ |
669 |
120 |
ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp); |
670 |
|
} |
671 |
|
} |
672 |
272 |
break; |
673 |
|
} |
674 |
422 |
case REGEXP_CONCAT: |
675 |
|
case REGEXP_UNION: |
676 |
|
case REGEXP_INTER: |
677 |
|
case REGEXP_STAR: |
678 |
|
case REGEXP_COMPLEMENT: |
679 |
|
{ |
680 |
844 |
std::vector<Node> vec_nodes; |
681 |
1088 |
for (const Node& cr : r) |
682 |
|
{ |
683 |
666 |
vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp)); |
684 |
|
} |
685 |
422 |
ret = rewrite(NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes)); |
686 |
422 |
break; |
687 |
|
} |
688 |
6 |
default: |
689 |
|
{ |
690 |
12 |
Trace("strings-error") << "Unsupported term: " << r |
691 |
6 |
<< " in normalization SymRegExp." << std::endl; |
692 |
6 |
Assert(!utils::isRegExpKind(r.getKind())); |
693 |
|
} |
694 |
|
} |
695 |
808 |
return ret; |
696 |
|
} |
697 |
|
|
698 |
|
} // namespace strings |
699 |
|
} // namespace theory |
700 |
31137 |
} // namespace cvc5 |