1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Mathias Preiner, Andres Noetzli |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of techniques for eliminating regular expressions. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/regexp_elim.h" |
17 |
|
|
18 |
|
#include "expr/proof_node_manager.h" |
19 |
|
#include "options/strings_options.h" |
20 |
|
#include "theory/rewriter.h" |
21 |
|
#include "theory/strings/regexp_entail.h" |
22 |
|
#include "theory/strings/theory_strings_utils.h" |
23 |
|
|
24 |
|
using namespace cvc5::kind; |
25 |
|
|
26 |
|
namespace cvc5 { |
27 |
|
namespace theory { |
28 |
|
namespace strings { |
29 |
|
|
30 |
9459 |
RegExpElimination::RegExpElimination(bool isAgg, |
31 |
|
ProofNodeManager* pnm, |
32 |
9459 |
context::Context* c) |
33 |
|
: d_isAggressive(isAgg), |
34 |
|
d_pnm(pnm), |
35 |
|
d_epg(pnm == nullptr |
36 |
|
? nullptr |
37 |
9459 |
: new EagerProofGenerator(pnm, c, "RegExpElimination::epg")) |
38 |
|
{ |
39 |
9459 |
} |
40 |
|
|
41 |
58 |
Node RegExpElimination::eliminate(Node atom, bool isAgg) |
42 |
|
{ |
43 |
58 |
Assert(atom.getKind() == STRING_IN_REGEXP); |
44 |
58 |
if (atom[1].getKind() == REGEXP_CONCAT) |
45 |
|
{ |
46 |
38 |
return eliminateConcat(atom, isAgg); |
47 |
|
} |
48 |
20 |
else if (atom[1].getKind() == REGEXP_STAR) |
49 |
|
{ |
50 |
20 |
return eliminateStar(atom, isAgg); |
51 |
|
} |
52 |
|
return Node::null(); |
53 |
|
} |
54 |
|
|
55 |
56 |
TrustNode RegExpElimination::eliminateTrusted(Node atom) |
56 |
|
{ |
57 |
112 |
Node eatom = eliminate(atom, d_isAggressive); |
58 |
56 |
if (!eatom.isNull()) |
59 |
|
{ |
60 |
|
// Currently aggressive doesnt work due to fresh bound variables |
61 |
46 |
if (isProofEnabled() && !d_isAggressive) |
62 |
|
{ |
63 |
4 |
Node eq = atom.eqNode(eatom); |
64 |
4 |
Node aggn = NodeManager::currentNM()->mkConst(d_isAggressive); |
65 |
|
std::shared_ptr<ProofNode> pn = |
66 |
4 |
d_pnm->mkNode(PfRule::RE_ELIM, {}, {atom, aggn}, eq); |
67 |
2 |
d_epg->setProofFor(eq, pn); |
68 |
2 |
return TrustNode::mkTrustRewrite(atom, eatom, d_epg.get()); |
69 |
|
} |
70 |
44 |
return TrustNode::mkTrustRewrite(atom, eatom, nullptr); |
71 |
|
} |
72 |
10 |
return TrustNode::null(); |
73 |
|
} |
74 |
|
|
75 |
38 |
Node RegExpElimination::eliminateConcat(Node atom, bool isAgg) |
76 |
|
{ |
77 |
38 |
NodeManager* nm = NodeManager::currentNM(); |
78 |
76 |
Node x = atom[0]; |
79 |
76 |
Node lenx = nm->mkNode(STRING_LENGTH, x); |
80 |
76 |
Node re = atom[1]; |
81 |
76 |
Node zero = nm->mkConst(Rational(0)); |
82 |
76 |
std::vector<Node> children; |
83 |
38 |
utils::getConcat(re, children); |
84 |
|
|
85 |
|
// If it can be reduced to memberships in fixed length regular expressions. |
86 |
|
// This includes concatenations where at most one child is of the form |
87 |
|
// (re.* re.allchar), which we abbreviate _* below, and all other children |
88 |
|
// have a fixed length. |
89 |
|
// The intuition why this is a "non-aggressive" rewrite is that membership |
90 |
|
// into fixed length regular expressions are easy to handle. |
91 |
38 |
bool hasFixedLength = true; |
92 |
|
// the index of _* in re |
93 |
38 |
unsigned pivotIndex = 0; |
94 |
38 |
bool hasPivotIndex = false; |
95 |
76 |
std::vector<Node> childLengths; |
96 |
76 |
std::vector<Node> childLengthsPostPivot; |
97 |
228 |
for (unsigned i = 0, size = children.size(); i < size; i++) |
98 |
|
{ |
99 |
394 |
Node c = children[i]; |
100 |
394 |
Node fl = RegExpEntail::getFixedLengthForRegexp(c); |
101 |
204 |
if (fl.isNull()) |
102 |
|
{ |
103 |
80 |
if (!hasPivotIndex && c.getKind() == REGEXP_STAR |
104 |
80 |
&& c[0].getKind() == REGEXP_SIGMA) |
105 |
|
{ |
106 |
16 |
hasPivotIndex = true; |
107 |
16 |
pivotIndex = i; |
108 |
|
// set to zero for the sum below |
109 |
16 |
fl = zero; |
110 |
|
} |
111 |
|
else |
112 |
|
{ |
113 |
14 |
hasFixedLength = false; |
114 |
14 |
break; |
115 |
|
} |
116 |
|
} |
117 |
190 |
childLengths.push_back(fl); |
118 |
190 |
if (hasPivotIndex) |
119 |
|
{ |
120 |
106 |
childLengthsPostPivot.push_back(fl); |
121 |
|
} |
122 |
|
} |
123 |
38 |
if (hasFixedLength) |
124 |
|
{ |
125 |
24 |
Assert(re.getNumChildren() == children.size()); |
126 |
48 |
Node sum = nm->mkNode(PLUS, childLengths); |
127 |
48 |
std::vector<Node> conc; |
128 |
24 |
conc.push_back(nm->mkNode(hasPivotIndex ? GEQ : EQUAL, lenx, sum)); |
129 |
48 |
Node currEnd = zero; |
130 |
184 |
for (unsigned i = 0, size = childLengths.size(); i < size; i++) |
131 |
|
{ |
132 |
160 |
if (hasPivotIndex && i == pivotIndex) |
133 |
|
{ |
134 |
6 |
Node ppSum = childLengthsPostPivot.size() == 1 |
135 |
|
? childLengthsPostPivot[0] |
136 |
12 |
: nm->mkNode(PLUS, childLengthsPostPivot); |
137 |
12 |
currEnd = nm->mkNode(MINUS, lenx, ppSum); |
138 |
|
} |
139 |
|
else |
140 |
|
{ |
141 |
308 |
Node curr = nm->mkNode(STRING_SUBSTR, x, currEnd, childLengths[i]); |
142 |
|
// We do not need to include memberships of the form |
143 |
|
// (str.substr x n 1) in re.allchar |
144 |
|
// since we know that by construction, n < len( x ). |
145 |
154 |
if (re[i].getKind() != REGEXP_SIGMA) |
146 |
|
{ |
147 |
164 |
Node currMem = nm->mkNode(STRING_IN_REGEXP, curr, re[i]); |
148 |
82 |
conc.push_back(currMem); |
149 |
|
} |
150 |
154 |
currEnd = nm->mkNode(PLUS, currEnd, childLengths[i]); |
151 |
154 |
currEnd = Rewriter::rewrite(currEnd); |
152 |
|
} |
153 |
|
} |
154 |
48 |
Node res = nm->mkNode(AND, conc); |
155 |
|
// For example: |
156 |
|
// x in re.++(re.union(re.range("A", "J"), re.range("N", "Z")), "AB") --> |
157 |
|
// len( x ) = 3 ^ |
158 |
|
// substr(x,0,1) in re.union(re.range("A", "J"), re.range("N", "Z")) ^ |
159 |
|
// substr(x,1,2) in "AB" |
160 |
|
// An example with a pivot index: |
161 |
|
// x in re.++( "AB" ++ _* ++ "C" ) --> |
162 |
|
// len( x ) >= 3 ^ |
163 |
|
// substr( x, 0, 2 ) in "AB" ^ |
164 |
|
// substr( x, len( x ) - 1, 1 ) in "C" |
165 |
24 |
return returnElim(atom, res, "concat-fixed-len"); |
166 |
|
} |
167 |
|
|
168 |
|
// memberships of the form x in re.++ * s1 * ... * sn *, where * are |
169 |
|
// any number of repetitions (exact or indefinite) of re.allchar. |
170 |
14 |
Trace("re-elim-debug") << "Try re concat with gaps " << atom << std::endl; |
171 |
14 |
bool onlySigmasAndConsts = true; |
172 |
28 |
std::vector<Node> sep_children; |
173 |
28 |
std::vector<unsigned> gap_minsize; |
174 |
28 |
std::vector<bool> gap_exact; |
175 |
|
// the first gap is initially strict zero |
176 |
14 |
gap_minsize.push_back(0); |
177 |
14 |
gap_exact.push_back(true); |
178 |
84 |
for (const Node& c : children) |
179 |
|
{ |
180 |
74 |
Trace("re-elim-debug") << " " << c << std::endl; |
181 |
74 |
onlySigmasAndConsts = false; |
182 |
74 |
if (c.getKind() == STRING_TO_REGEXP) |
183 |
|
{ |
184 |
40 |
onlySigmasAndConsts = true; |
185 |
40 |
sep_children.push_back(c[0]); |
186 |
|
// the next gap is initially strict zero |
187 |
40 |
gap_minsize.push_back(0); |
188 |
40 |
gap_exact.push_back(true); |
189 |
|
} |
190 |
34 |
else if (c.getKind() == REGEXP_STAR && c[0].getKind() == REGEXP_SIGMA) |
191 |
|
{ |
192 |
|
// found a gap of any size |
193 |
30 |
onlySigmasAndConsts = true; |
194 |
30 |
gap_exact[gap_exact.size() - 1] = false; |
195 |
|
} |
196 |
4 |
else if (c.getKind() == REGEXP_SIGMA) |
197 |
|
{ |
198 |
|
// add one to the minimum size of the gap |
199 |
|
onlySigmasAndConsts = true; |
200 |
|
gap_minsize[gap_minsize.size() - 1]++; |
201 |
|
} |
202 |
74 |
if (!onlySigmasAndConsts) |
203 |
|
{ |
204 |
4 |
Trace("re-elim-debug") << "...cannot handle " << c << std::endl; |
205 |
4 |
break; |
206 |
|
} |
207 |
|
} |
208 |
|
// we should always rewrite concatenations that are purely re.allchar |
209 |
|
// and re.*( re.allchar ). |
210 |
14 |
Assert(!onlySigmasAndConsts || !sep_children.empty()); |
211 |
14 |
if (onlySigmasAndConsts && !sep_children.empty()) |
212 |
|
{ |
213 |
10 |
bool canProcess = true; |
214 |
10 |
std::vector<Node> conj; |
215 |
|
// The following constructs a set of constraints that encodes that a |
216 |
|
// set of string terms are found, in order, in string x. |
217 |
|
// prev_end stores the current (symbolic) index in x that we are |
218 |
|
// searching. |
219 |
10 |
Node prev_end = zero; |
220 |
|
// the symbolic index we start searching, for each child in sep_children. |
221 |
10 |
std::vector<Node> prev_ends; |
222 |
10 |
unsigned gap_minsize_end = gap_minsize.back(); |
223 |
10 |
bool gap_exact_end = gap_exact.back(); |
224 |
10 |
std::vector<Node> non_greedy_find_vars; |
225 |
50 |
for (unsigned i = 0, size = sep_children.size(); i < size; i++) |
226 |
|
{ |
227 |
40 |
if (gap_minsize[i] > 0) |
228 |
|
{ |
229 |
|
// the gap to this child is at least gap_minsize[i] |
230 |
|
prev_end = |
231 |
|
nm->mkNode(PLUS, prev_end, nm->mkConst(Rational(gap_minsize[i]))); |
232 |
|
} |
233 |
40 |
prev_ends.push_back(prev_end); |
234 |
80 |
Node sc = sep_children[i]; |
235 |
80 |
Node lensc = nm->mkNode(STRING_LENGTH, sc); |
236 |
40 |
if (gap_exact[i]) |
237 |
|
{ |
238 |
|
// if the gap is exact, it is a substring constraint |
239 |
20 |
Node curr = prev_end; |
240 |
20 |
Node ss = nm->mkNode(STRING_SUBSTR, x, curr, lensc); |
241 |
10 |
conj.push_back(ss.eqNode(sc)); |
242 |
10 |
prev_end = nm->mkNode(PLUS, curr, lensc); |
243 |
|
} |
244 |
|
else |
245 |
|
{ |
246 |
|
// otherwise, we can use indexof to represent some next occurrence |
247 |
30 |
if (gap_exact[i + 1] && i + 1 != size) |
248 |
|
{ |
249 |
|
if (!isAgg) |
250 |
|
{ |
251 |
|
canProcess = false; |
252 |
|
break; |
253 |
|
} |
254 |
|
// if the gap after this one is strict, we need a non-greedy find |
255 |
|
// thus, we add a symbolic constant |
256 |
|
Node k = nm->mkBoundVar(nm->integerType()); |
257 |
|
non_greedy_find_vars.push_back(k); |
258 |
|
prev_end = nm->mkNode(PLUS, prev_end, k); |
259 |
|
} |
260 |
60 |
Node curr = nm->mkNode(STRING_STRIDOF, x, sc, prev_end); |
261 |
60 |
Node idofFind = curr.eqNode(nm->mkConst(Rational(-1))).negate(); |
262 |
30 |
conj.push_back(idofFind); |
263 |
30 |
prev_end = nm->mkNode(PLUS, curr, lensc); |
264 |
|
} |
265 |
|
} |
266 |
|
|
267 |
10 |
if (canProcess) |
268 |
|
{ |
269 |
|
// since sep_children is non-empty, conj is non-empty |
270 |
10 |
Assert(!conj.empty()); |
271 |
|
// Process the last gap, if necessary. |
272 |
|
// Notice that if the last gap is not exact and its minsize is zero, |
273 |
|
// then the last indexof/substr constraint entails the following |
274 |
|
// constraint, so it is not necessary to add. |
275 |
|
// Below, we may write "A" for (str.to.re "A") and _ for re.allchar: |
276 |
20 |
Node cEnd = nm->mkConst(Rational(gap_minsize_end)); |
277 |
10 |
if (gap_exact_end) |
278 |
|
{ |
279 |
10 |
Assert(!sep_children.empty()); |
280 |
|
// if it is strict, it corresponds to a substr case. |
281 |
|
// For example: |
282 |
|
// x in (re.++ "A" (re.* _) "B" _ _) ---> |
283 |
|
// ... ^ "B" = substr( x, len( x ) - 3, 1 ) ^ ... |
284 |
20 |
Node sc = sep_children.back(); |
285 |
20 |
Node lenSc = nm->mkNode(STRING_LENGTH, sc); |
286 |
20 |
Node loc = nm->mkNode(MINUS, lenx, nm->mkNode(PLUS, lenSc, cEnd)); |
287 |
20 |
Node scc = sc.eqNode(nm->mkNode(STRING_SUBSTR, x, loc, lenSc)); |
288 |
|
// We also must ensure that we fit. This constraint is necessary in |
289 |
|
// addition to the constraint above. Take this example: |
290 |
|
// x in (re.++ "A" _ (re.* _) "B" _) ---> |
291 |
|
// substr( x, 0, 1 ) = "A" ^ // find "A" |
292 |
|
// indexof( x, "B", 2 ) != -1 ^ // find "B" >=1 after "A" |
293 |
|
// substr( x, len(x)-2, 1 ) = "B" ^ // "B" is at end - 2. |
294 |
|
// indexof( x, "B", 2 ) <= len( x ) - 2 |
295 |
|
// The last constaint ensures that the second and third constraints |
296 |
|
// may refer to the same "B". If it were not for the last constraint, it |
297 |
|
// would have been the case than "ABB" would be a model for x, where |
298 |
|
// the second constraint refers to the third position, and the third |
299 |
|
// constraint refers to the second position. |
300 |
|
// |
301 |
|
// With respect to the above example, the following is an optimization. |
302 |
|
// For that example, we instead produce: |
303 |
|
// x in (re.++ "A" _ (re.* _) "B" _) ---> |
304 |
|
// substr( x, 0, 1 ) = "A" ^ // find "A" |
305 |
|
// substr( x, len(x)-2, 1 ) = "B" ^ // "B" is at end - 2 |
306 |
|
// 2 <= len( x ) - 2 |
307 |
|
// The intuition is that above, there are two constraints that insist |
308 |
|
// that "B" is found, whereas we only need one. The last constraint |
309 |
|
// above says that the "B" we find at end-2 can be found >=1 after |
310 |
|
// the "A". |
311 |
10 |
conj.pop_back(); |
312 |
20 |
Node fit = nm->mkNode(gap_exact[sep_children.size() - 1] ? EQUAL : LEQ, |
313 |
10 |
prev_ends.back(), |
314 |
50 |
loc); |
315 |
|
|
316 |
10 |
conj.push_back(scc); |
317 |
10 |
conj.push_back(fit); |
318 |
|
} |
319 |
|
else if (gap_minsize_end > 0) |
320 |
|
{ |
321 |
|
// if it is non-strict, we are in a "greedy find" situtation where |
322 |
|
// we just need to ensure that the next occurrence fits. |
323 |
|
// For example: |
324 |
|
// x in (re.++ "A" (re.* _) "B" _ _ (re.* _)) ---> |
325 |
|
// ... ^ indexof( x, "B", 1 ) + 2 <= len( x ) |
326 |
|
Node fit = nm->mkNode(LEQ, nm->mkNode(PLUS, prev_end, cEnd), lenx); |
327 |
|
conj.push_back(fit); |
328 |
|
} |
329 |
20 |
Node res = conj.size() == 1 ? conj[0] : nm->mkNode(AND, conj); |
330 |
|
// process the non-greedy find variables |
331 |
10 |
if (!non_greedy_find_vars.empty()) |
332 |
|
{ |
333 |
|
std::vector<Node> children2; |
334 |
|
for (const Node& v : non_greedy_find_vars) |
335 |
|
{ |
336 |
|
Node bound = nm->mkNode( |
337 |
|
AND, nm->mkNode(LEQ, zero, v), nm->mkNode(LT, v, lenx)); |
338 |
|
children2.push_back(bound); |
339 |
|
} |
340 |
|
children2.push_back(res); |
341 |
|
Node body = nm->mkNode(AND, children2); |
342 |
|
Node bvl = nm->mkNode(BOUND_VAR_LIST, non_greedy_find_vars); |
343 |
|
res = nm->mkNode(EXISTS, bvl, body); |
344 |
|
} |
345 |
|
// Examples of this elimination: |
346 |
|
// x in (re.++ "A" (re.* _) "B" (re.* _)) ---> |
347 |
|
// substr(x,0,1)="A" ^ indexof(x,"B",1)!=-1 |
348 |
|
// x in (re.++ (re.* _) "A" _ _ _ (re.* _) "B" _ _ (re.* _)) ---> |
349 |
|
// indexof(x,"A",0)!=-1 ^ |
350 |
|
// indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 ) != -1 ^ |
351 |
|
// indexof( x, "B", indexof( x, "A", 0 ) + 1 + 3 )+1+2 <= len(x) |
352 |
|
|
353 |
|
// An example of a non-greedy find: |
354 |
|
// x in re.++( re.*( _ ), "A", _, "B", re.*( _ ) ) ---> |
355 |
|
// exists k. 0 <= k < len( x ) ^ |
356 |
|
// indexof( x, "A", k ) != -1 ^ |
357 |
|
// substr( x, indexof( x, "A", k )+2, 1 ) = "B" |
358 |
10 |
return returnElim(atom, res, "concat-with-gaps"); |
359 |
|
} |
360 |
|
} |
361 |
|
|
362 |
4 |
if (!isAgg) |
363 |
|
{ |
364 |
|
return Node::null(); |
365 |
|
} |
366 |
|
// only aggressive rewrites below here |
367 |
|
|
368 |
|
// if the first or last child is constant string, we can split the membership |
369 |
|
// into a conjunction of two memberships. |
370 |
8 |
Node sStartIndex = zero; |
371 |
8 |
Node sLength = lenx; |
372 |
8 |
std::vector<Node> sConstraints; |
373 |
8 |
std::vector<Node> rexpElimChildren; |
374 |
4 |
unsigned nchildren = children.size(); |
375 |
4 |
Assert(nchildren > 1); |
376 |
12 |
for (unsigned r = 0; r < 2; r++) |
377 |
|
{ |
378 |
8 |
unsigned index = r == 0 ? 0 : nchildren - 1; |
379 |
16 |
Node c = children[index]; |
380 |
8 |
if (c.getKind() == STRING_TO_REGEXP) |
381 |
|
{ |
382 |
|
Assert(children[index + (r == 0 ? 1 : -1)].getKind() != STRING_TO_REGEXP); |
383 |
|
Node s = c[0]; |
384 |
|
Node lens = nm->mkNode(STRING_LENGTH, s); |
385 |
|
Node sss = r == 0 ? zero : nm->mkNode(MINUS, lenx, lens); |
386 |
|
Node ss = nm->mkNode(STRING_SUBSTR, x, sss, lens); |
387 |
|
sConstraints.push_back(ss.eqNode(s)); |
388 |
|
if (r == 0) |
389 |
|
{ |
390 |
|
sStartIndex = lens; |
391 |
|
} |
392 |
|
sLength = nm->mkNode(MINUS, sLength, lens); |
393 |
|
} |
394 |
8 |
if (r == 1 && !sConstraints.empty()) |
395 |
|
{ |
396 |
|
// add the middle children |
397 |
|
for (unsigned i = 1; i < (nchildren - 1); i++) |
398 |
|
{ |
399 |
|
rexpElimChildren.push_back(children[i]); |
400 |
|
} |
401 |
|
} |
402 |
8 |
if (c.getKind() != STRING_TO_REGEXP) |
403 |
|
{ |
404 |
8 |
rexpElimChildren.push_back(c); |
405 |
|
} |
406 |
|
} |
407 |
4 |
if (!sConstraints.empty()) |
408 |
|
{ |
409 |
|
Assert(rexpElimChildren.size() + sConstraints.size() == nchildren); |
410 |
|
Node ss = nm->mkNode(STRING_SUBSTR, x, sStartIndex, sLength); |
411 |
|
Assert(!rexpElimChildren.empty()); |
412 |
|
Node regElim = utils::mkConcat(rexpElimChildren, nm->regExpType()); |
413 |
|
sConstraints.push_back(nm->mkNode(STRING_IN_REGEXP, ss, regElim)); |
414 |
|
Node ret = nm->mkNode(AND, sConstraints); |
415 |
|
// e.g. |
416 |
|
// x in re.++( "A", R ) ---> substr(x,0,1)="A" ^ substr(x,1,len(x)-1) in R |
417 |
|
return returnElim(atom, ret, "concat-splice"); |
418 |
|
} |
419 |
4 |
Assert(nchildren > 1); |
420 |
8 |
for (unsigned i = 0; i < nchildren; i++) |
421 |
|
{ |
422 |
8 |
if (children[i].getKind() == STRING_TO_REGEXP) |
423 |
|
{ |
424 |
8 |
Node s = children[i][0]; |
425 |
8 |
Node lens = nm->mkNode(STRING_LENGTH, s); |
426 |
|
// there exists an index in this string such that the substring is this |
427 |
8 |
Node k; |
428 |
8 |
std::vector<Node> echildren; |
429 |
4 |
if (i == 0) |
430 |
|
{ |
431 |
|
k = zero; |
432 |
|
} |
433 |
4 |
else if (i + 1 == nchildren) |
434 |
|
{ |
435 |
|
k = nm->mkNode(MINUS, lenx, lens); |
436 |
|
} |
437 |
|
else |
438 |
|
{ |
439 |
4 |
k = nm->mkBoundVar(nm->integerType()); |
440 |
|
Node bound = |
441 |
|
nm->mkNode(AND, |
442 |
8 |
nm->mkNode(LEQ, zero, k), |
443 |
16 |
nm->mkNode(LEQ, k, nm->mkNode(MINUS, lenx, lens))); |
444 |
4 |
echildren.push_back(bound); |
445 |
|
} |
446 |
8 |
Node substrEq = nm->mkNode(STRING_SUBSTR, x, k, lens).eqNode(s); |
447 |
4 |
echildren.push_back(substrEq); |
448 |
4 |
if (i > 0) |
449 |
|
{ |
450 |
8 |
std::vector<Node> rprefix; |
451 |
4 |
rprefix.insert(rprefix.end(), children.begin(), children.begin() + i); |
452 |
8 |
Node rpn = utils::mkConcat(rprefix, nm->regExpType()); |
453 |
|
Node substrPrefix = nm->mkNode( |
454 |
8 |
STRING_IN_REGEXP, nm->mkNode(STRING_SUBSTR, x, zero, k), rpn); |
455 |
4 |
echildren.push_back(substrPrefix); |
456 |
|
} |
457 |
4 |
if (i + 1 < nchildren) |
458 |
|
{ |
459 |
8 |
std::vector<Node> rsuffix; |
460 |
4 |
rsuffix.insert(rsuffix.end(), children.begin() + i + 1, children.end()); |
461 |
8 |
Node rps = utils::mkConcat(rsuffix, nm->regExpType()); |
462 |
8 |
Node ks = nm->mkNode(PLUS, k, lens); |
463 |
|
Node substrSuffix = nm->mkNode( |
464 |
|
STRING_IN_REGEXP, |
465 |
8 |
nm->mkNode(STRING_SUBSTR, x, ks, nm->mkNode(MINUS, lenx, ks)), |
466 |
16 |
rps); |
467 |
4 |
echildren.push_back(substrSuffix); |
468 |
|
} |
469 |
8 |
Node body = nm->mkNode(AND, echildren); |
470 |
4 |
if (k.getKind() == BOUND_VARIABLE) |
471 |
|
{ |
472 |
8 |
Node bvl = nm->mkNode(BOUND_VAR_LIST, k); |
473 |
4 |
body = nm->mkNode(EXISTS, bvl, body); |
474 |
|
} |
475 |
|
// e.g. x in re.++( R1, "AB", R2 ) ---> |
476 |
|
// exists k. |
477 |
|
// 0 <= k <= (len(x)-2) ^ |
478 |
|
// substr( x, k, 2 ) = "AB" ^ |
479 |
|
// substr( x, 0, k ) in R1 ^ |
480 |
|
// substr( x, k+2, len(x)-(k+2) ) in R2 |
481 |
4 |
return returnElim(atom, body, "concat-find"); |
482 |
|
} |
483 |
|
} |
484 |
|
return Node::null(); |
485 |
|
} |
486 |
|
|
487 |
20 |
Node RegExpElimination::eliminateStar(Node atom, bool isAgg) |
488 |
|
{ |
489 |
20 |
if (!isAgg) |
490 |
|
{ |
491 |
6 |
return Node::null(); |
492 |
|
} |
493 |
|
// only aggressive rewrites below here |
494 |
|
|
495 |
14 |
NodeManager* nm = NodeManager::currentNM(); |
496 |
28 |
Node x = atom[0]; |
497 |
28 |
Node lenx = nm->mkNode(STRING_LENGTH, x); |
498 |
28 |
Node re = atom[1]; |
499 |
28 |
Node zero = nm->mkConst(Rational(0)); |
500 |
|
// for regular expression star, |
501 |
|
// if the period is a fixed constant, we can turn it into a bounded |
502 |
|
// quantifier |
503 |
28 |
std::vector<Node> disj; |
504 |
14 |
if (re[0].getKind() == REGEXP_UNION) |
505 |
|
{ |
506 |
12 |
for (const Node& r : re[0]) |
507 |
|
{ |
508 |
8 |
disj.push_back(r); |
509 |
|
} |
510 |
|
} |
511 |
|
else |
512 |
|
{ |
513 |
10 |
disj.push_back(re[0]); |
514 |
|
} |
515 |
14 |
bool lenOnePeriod = true; |
516 |
28 |
std::vector<Node> char_constraints; |
517 |
28 |
Node index = nm->mkBoundVar(nm->integerType()); |
518 |
|
Node substr_ch = |
519 |
28 |
nm->mkNode(STRING_SUBSTR, x, index, nm->mkConst(Rational(1))); |
520 |
14 |
substr_ch = Rewriter::rewrite(substr_ch); |
521 |
|
// handle the case where it is purely characters |
522 |
28 |
for (const Node& r : disj) |
523 |
|
{ |
524 |
18 |
Assert(r.getKind() != REGEXP_UNION); |
525 |
18 |
Assert(r.getKind() != REGEXP_SIGMA); |
526 |
18 |
lenOnePeriod = false; |
527 |
|
// lenOnePeriod is true if this regular expression is a single character |
528 |
|
// regular expression |
529 |
18 |
if (r.getKind() == STRING_TO_REGEXP) |
530 |
|
{ |
531 |
22 |
Node s = r[0]; |
532 |
11 |
if (s.isConst() && s.getConst<String>().size() == 1) |
533 |
|
{ |
534 |
11 |
lenOnePeriod = true; |
535 |
|
} |
536 |
|
} |
537 |
7 |
else if (r.getKind() == REGEXP_RANGE) |
538 |
|
{ |
539 |
3 |
lenOnePeriod = true; |
540 |
|
} |
541 |
18 |
if (!lenOnePeriod) |
542 |
|
{ |
543 |
4 |
break; |
544 |
|
} |
545 |
|
else |
546 |
|
{ |
547 |
28 |
Node regexp_ch = nm->mkNode(STRING_IN_REGEXP, substr_ch, r); |
548 |
14 |
regexp_ch = Rewriter::rewrite(regexp_ch); |
549 |
14 |
Assert(regexp_ch.getKind() != STRING_IN_REGEXP); |
550 |
14 |
char_constraints.push_back(regexp_ch); |
551 |
|
} |
552 |
|
} |
553 |
14 |
if (lenOnePeriod) |
554 |
|
{ |
555 |
10 |
Assert(!char_constraints.empty()); |
556 |
|
Node bound = nm->mkNode( |
557 |
20 |
AND, nm->mkNode(LEQ, zero, index), nm->mkNode(LT, index, lenx)); |
558 |
20 |
Node conc = char_constraints.size() == 1 ? char_constraints[0] |
559 |
30 |
: nm->mkNode(OR, char_constraints); |
560 |
20 |
Node body = nm->mkNode(OR, bound.negate(), conc); |
561 |
20 |
Node bvl = nm->mkNode(BOUND_VAR_LIST, index); |
562 |
20 |
Node res = nm->mkNode(FORALL, bvl, body); |
563 |
|
// e.g. |
564 |
|
// x in (re.* (re.union "A" "B" )) ---> |
565 |
|
// forall k. 0<=k<len(x) => (substr(x,k,1) in "A" OR substr(x,k,1) in "B") |
566 |
10 |
return returnElim(atom, res, "star-char"); |
567 |
|
} |
568 |
|
// otherwise, for stars of constant length these are periodic |
569 |
4 |
if (disj.size() == 1) |
570 |
|
{ |
571 |
|
Node r = disj[0]; |
572 |
|
if (r.getKind() == STRING_TO_REGEXP) |
573 |
|
{ |
574 |
|
Node s = r[0]; |
575 |
|
if (s.isConst()) |
576 |
|
{ |
577 |
|
Node lens = nm->mkNode(STRING_LENGTH, s); |
578 |
|
lens = Rewriter::rewrite(lens); |
579 |
|
Assert(lens.isConst()); |
580 |
|
Assert(lens.getConst<Rational>().sgn() > 0); |
581 |
|
std::vector<Node> conj; |
582 |
|
// lens is a positive constant, so it is safe to use total div/mod here. |
583 |
|
Node bound = nm->mkNode( |
584 |
|
AND, |
585 |
|
nm->mkNode(LEQ, zero, index), |
586 |
|
nm->mkNode(LT, index, nm->mkNode(INTS_DIVISION_TOTAL, lenx, lens))); |
587 |
|
Node conc = |
588 |
|
nm->mkNode(STRING_SUBSTR, x, nm->mkNode(MULT, index, lens), lens) |
589 |
|
.eqNode(s); |
590 |
|
Node body = nm->mkNode(OR, bound.negate(), conc); |
591 |
|
Node bvl = nm->mkNode(BOUND_VAR_LIST, index); |
592 |
|
Node res = nm->mkNode(FORALL, bvl, body); |
593 |
|
res = nm->mkNode( |
594 |
|
AND, nm->mkNode(INTS_MODULUS_TOTAL, lenx, lens).eqNode(zero), res); |
595 |
|
// e.g. |
596 |
|
// x in ("abc")* ---> |
597 |
|
// forall k. 0 <= k < (len( x ) div 3) => substr(x,3*k,3) = "abc" ^ |
598 |
|
// len(x) mod 3 = 0 |
599 |
|
return returnElim(atom, res, "star-constant"); |
600 |
|
} |
601 |
|
} |
602 |
|
} |
603 |
4 |
return Node::null(); |
604 |
|
} |
605 |
|
|
606 |
48 |
Node RegExpElimination::returnElim(Node atom, Node atomElim, const char* id) |
607 |
|
{ |
608 |
96 |
Trace("re-elim") << "re-elim: " << atom << " to " << atomElim << " by " << id |
609 |
48 |
<< "." << std::endl; |
610 |
48 |
return atomElim; |
611 |
|
} |
612 |
46 |
bool RegExpElimination::isProofEnabled() const { return d_pnm != nullptr; } |
613 |
|
|
614 |
|
} // namespace strings |
615 |
|
} // namespace theory |
616 |
28191 |
} // namespace cvc5 |