1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Yoni Zohar |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Util functions for theory strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/theory_strings_utils.h" |
17 |
|
|
18 |
|
#include <sstream> |
19 |
|
|
20 |
|
#include "expr/attribute.h" |
21 |
|
#include "expr/skolem_manager.h" |
22 |
|
#include "options/strings_options.h" |
23 |
|
#include "theory/quantifiers/quantifiers_attributes.h" |
24 |
|
#include "theory/rewriter.h" |
25 |
|
#include "theory/strings/arith_entail.h" |
26 |
|
#include "theory/strings/strings_entail.h" |
27 |
|
#include "theory/strings/word.h" |
28 |
|
#include "util/rational.h" |
29 |
|
#include "util/regexp.h" |
30 |
|
#include "util/string.h" |
31 |
|
|
32 |
|
using namespace cvc5::kind; |
33 |
|
|
34 |
|
namespace cvc5 { |
35 |
|
namespace theory { |
36 |
|
namespace strings { |
37 |
|
namespace utils { |
38 |
|
|
39 |
43281 |
uint32_t getAlphabetCardinality() |
40 |
|
{ |
41 |
|
// 3*16^4 = 196608 values in the SMT-LIB standard for Unicode strings |
42 |
43281 |
Assert(196608 <= String::num_codes()); |
43 |
43281 |
return 196608; |
44 |
|
} |
45 |
|
|
46 |
361117 |
Node mkAnd(const std::vector<Node>& a) |
47 |
|
{ |
48 |
722234 |
std::vector<Node> au; |
49 |
801539 |
for (const Node& ai : a) |
50 |
|
{ |
51 |
440422 |
if (std::find(au.begin(), au.end(), ai) == au.end()) |
52 |
|
{ |
53 |
433162 |
au.push_back(ai); |
54 |
|
} |
55 |
|
} |
56 |
361117 |
if (au.empty()) |
57 |
|
{ |
58 |
169232 |
return NodeManager::currentNM()->mkConst(true); |
59 |
|
} |
60 |
191885 |
else if (au.size() == 1) |
61 |
|
{ |
62 |
96540 |
return au[0]; |
63 |
|
} |
64 |
95345 |
return NodeManager::currentNM()->mkNode(AND, au); |
65 |
|
} |
66 |
|
|
67 |
230192 |
void flattenOp(Kind k, Node n, std::vector<Node>& conj) |
68 |
|
{ |
69 |
230192 |
if (n.getKind() != k) |
70 |
|
{ |
71 |
|
// easy case, just add to conj if non-duplicate |
72 |
209774 |
if (std::find(conj.begin(), conj.end(), n) == conj.end()) |
73 |
|
{ |
74 |
205097 |
conj.push_back(n); |
75 |
|
} |
76 |
209774 |
return; |
77 |
|
} |
78 |
|
// otherwise, traverse |
79 |
40836 |
std::unordered_set<TNode> visited; |
80 |
20418 |
std::unordered_set<TNode>::iterator it; |
81 |
40836 |
std::vector<TNode> visit; |
82 |
40836 |
TNode cur; |
83 |
20418 |
visit.push_back(n); |
84 |
43124 |
do |
85 |
|
{ |
86 |
63542 |
cur = visit.back(); |
87 |
63542 |
visit.pop_back(); |
88 |
63542 |
it = visited.find(cur); |
89 |
|
|
90 |
63542 |
if (it == visited.end()) |
91 |
|
{ |
92 |
63542 |
visited.insert(cur); |
93 |
63542 |
if (cur.getKind() == k) |
94 |
|
{ |
95 |
|
// Add in reverse order, so that we traverse left to right. |
96 |
|
// This is important so that explantaions aren't reversed when they |
97 |
|
// are flattened, which is important for proofs involving substitutions. |
98 |
40836 |
std::vector<Node> newChildren; |
99 |
20418 |
newChildren.insert(newChildren.end(), cur.begin(), cur.end()); |
100 |
20418 |
visit.insert(visit.end(), newChildren.rbegin(), newChildren.rend()); |
101 |
|
} |
102 |
43124 |
else if (std::find(conj.begin(), conj.end(), cur) == conj.end()) |
103 |
|
{ |
104 |
42319 |
conj.push_back(cur); |
105 |
|
} |
106 |
|
} |
107 |
63542 |
} while (!visit.empty()); |
108 |
|
} |
109 |
|
|
110 |
762639 |
void getConcat(Node n, std::vector<Node>& c) |
111 |
|
{ |
112 |
762639 |
Kind k = n.getKind(); |
113 |
762639 |
if (k == STRING_CONCAT || k == REGEXP_CONCAT) |
114 |
|
{ |
115 |
257696 |
for (const Node& nc : n) |
116 |
|
{ |
117 |
192424 |
c.push_back(nc); |
118 |
65272 |
} |
119 |
|
} |
120 |
|
else |
121 |
|
{ |
122 |
697367 |
c.push_back(n); |
123 |
|
} |
124 |
762639 |
} |
125 |
|
|
126 |
739880 |
Node mkConcat(const std::vector<Node>& c, TypeNode tn) |
127 |
|
{ |
128 |
739880 |
Assert(tn.isStringLike() || tn.isRegExp()); |
129 |
739880 |
if (c.empty()) |
130 |
|
{ |
131 |
54061 |
Assert(tn.isStringLike()); |
132 |
54061 |
return Word::mkEmptyWord(tn); |
133 |
|
} |
134 |
685819 |
else if (c.size() == 1) |
135 |
|
{ |
136 |
269120 |
return c[0]; |
137 |
|
} |
138 |
416699 |
Kind k = tn.isStringLike() ? STRING_CONCAT : REGEXP_CONCAT; |
139 |
416699 |
return NodeManager::currentNM()->mkNode(k, c); |
140 |
|
} |
141 |
|
|
142 |
1136 |
Node mkNConcat(Node n1, Node n2) |
143 |
|
{ |
144 |
|
return Rewriter::rewrite( |
145 |
1136 |
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2)); |
146 |
|
} |
147 |
|
|
148 |
4134 |
Node mkNConcat(Node n1, Node n2, Node n3) |
149 |
|
{ |
150 |
|
return Rewriter::rewrite( |
151 |
4134 |
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2, n3)); |
152 |
|
} |
153 |
|
|
154 |
593098 |
Node mkNConcat(const std::vector<Node>& c, TypeNode tn) |
155 |
|
{ |
156 |
593098 |
return Rewriter::rewrite(mkConcat(c, tn)); |
157 |
|
} |
158 |
|
|
159 |
1152208 |
Node mkNLength(Node t) |
160 |
|
{ |
161 |
1152208 |
return Rewriter::rewrite(NodeManager::currentNM()->mkNode(STRING_LENGTH, t)); |
162 |
|
} |
163 |
|
|
164 |
18534 |
Node mkPrefix(Node t, Node n) |
165 |
|
{ |
166 |
18534 |
NodeManager* nm = NodeManager::currentNM(); |
167 |
18534 |
return nm->mkNode(STRING_SUBSTR, t, nm->mkConst(Rational(0)), n); |
168 |
|
} |
169 |
|
|
170 |
14817 |
Node mkSuffix(Node t, Node n) |
171 |
|
{ |
172 |
14817 |
NodeManager* nm = NodeManager::currentNM(); |
173 |
|
return nm->mkNode( |
174 |
14817 |
STRING_SUBSTR, t, n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, t), n)); |
175 |
|
} |
176 |
|
|
177 |
197749 |
Node getConstantComponent(Node t) |
178 |
|
{ |
179 |
197749 |
if (t.getKind() == STRING_TO_REGEXP) |
180 |
|
{ |
181 |
9133 |
return t[0].isConst() ? t[0] : Node::null(); |
182 |
|
} |
183 |
188616 |
return t.isConst() ? t : Node::null(); |
184 |
|
} |
185 |
|
|
186 |
163685 |
Node getConstantEndpoint(Node e, bool isSuf) |
187 |
|
{ |
188 |
163685 |
Kind ek = e.getKind(); |
189 |
163685 |
if (ek == STRING_IN_REGEXP) |
190 |
|
{ |
191 |
2748 |
e = e[1]; |
192 |
2748 |
ek = e.getKind(); |
193 |
|
} |
194 |
163685 |
if (ek == STRING_CONCAT || ek == REGEXP_CONCAT) |
195 |
|
{ |
196 |
84405 |
return getConstantComponent(e[isSuf ? e.getNumChildren() - 1 : 0]); |
197 |
|
} |
198 |
79280 |
return getConstantComponent(e); |
199 |
|
} |
200 |
|
|
201 |
132162 |
Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls) |
202 |
|
{ |
203 |
132162 |
Assert(ss.empty()); |
204 |
132162 |
Assert(ls.empty()); |
205 |
257470 |
while (s.getKind() == STRING_SUBSTR) |
206 |
|
{ |
207 |
62654 |
ss.push_back(s[1]); |
208 |
62654 |
ls.push_back(s[2]); |
209 |
62654 |
s = s[0]; |
210 |
|
} |
211 |
132162 |
std::reverse(ss.begin(), ss.end()); |
212 |
132162 |
std::reverse(ls.begin(), ls.end()); |
213 |
132162 |
return s; |
214 |
|
} |
215 |
|
|
216 |
|
Node mkSubstrChain(Node base, |
217 |
|
const std::vector<Node>& ss, |
218 |
|
const std::vector<Node>& ls) |
219 |
|
{ |
220 |
|
NodeManager* nm = NodeManager::currentNM(); |
221 |
|
for (unsigned i = 0, size = ss.size(); i < size; i++) |
222 |
|
{ |
223 |
|
base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]); |
224 |
|
} |
225 |
|
return base; |
226 |
|
} |
227 |
|
|
228 |
500 |
std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x) |
229 |
|
{ |
230 |
|
// Collect the equalities of the form (= x "") (sorted) |
231 |
1000 |
std::set<TNode> emptyNodes; |
232 |
500 |
bool allEmptyEqs = true; |
233 |
500 |
if (x.getKind() == EQUAL) |
234 |
|
{ |
235 |
394 |
if (Word::isEmpty(x[0])) |
236 |
|
{ |
237 |
170 |
emptyNodes.insert(x[1]); |
238 |
|
} |
239 |
224 |
else if (Word::isEmpty(x[1])) |
240 |
|
{ |
241 |
84 |
emptyNodes.insert(x[0]); |
242 |
|
} |
243 |
|
else |
244 |
|
{ |
245 |
140 |
allEmptyEqs = false; |
246 |
|
} |
247 |
|
} |
248 |
106 |
else if (x.getKind() == AND) |
249 |
|
{ |
250 |
276 |
for (const Node& c : x) |
251 |
|
{ |
252 |
198 |
if (c.getKind() != EQUAL) |
253 |
|
{ |
254 |
2 |
allEmptyEqs = false; |
255 |
2 |
continue; |
256 |
|
} |
257 |
|
|
258 |
194 |
if (Word::isEmpty(c[0])) |
259 |
|
{ |
260 |
156 |
emptyNodes.insert(c[1]); |
261 |
|
} |
262 |
38 |
else if (Word::isEmpty(c[1])) |
263 |
|
{ |
264 |
20 |
emptyNodes.insert(c[0]); |
265 |
|
} |
266 |
|
else |
267 |
|
{ |
268 |
18 |
allEmptyEqs = false; |
269 |
|
} |
270 |
|
} |
271 |
|
} |
272 |
|
|
273 |
500 |
if (emptyNodes.size() == 0) |
274 |
|
{ |
275 |
164 |
allEmptyEqs = false; |
276 |
|
} |
277 |
|
|
278 |
|
return std::make_pair( |
279 |
1000 |
allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end())); |
280 |
|
} |
281 |
|
|
282 |
13365016 |
bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; } |
283 |
|
|
284 |
1754 |
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start) |
285 |
|
{ |
286 |
1754 |
size_t i = start; |
287 |
2850 |
while (i < rs.size() && rs[i].getKind() == REGEXP_SIGMA) |
288 |
|
{ |
289 |
548 |
i++; |
290 |
|
} |
291 |
|
|
292 |
1754 |
if (i >= rs.size()) |
293 |
|
{ |
294 |
12 |
return false; |
295 |
|
} |
296 |
|
|
297 |
1742 |
return rs[i].getKind() == REGEXP_STAR && rs[i][0].getKind() == REGEXP_SIGMA; |
298 |
|
} |
299 |
|
|
300 |
882 |
bool isSimpleRegExp(Node r) |
301 |
|
{ |
302 |
882 |
Assert(r.getType().isRegExp()); |
303 |
|
|
304 |
1764 |
std::vector<Node> v; |
305 |
882 |
utils::getConcat(r, v); |
306 |
2254 |
for (const Node& n : v) |
307 |
|
{ |
308 |
1900 |
if (n.getKind() == STRING_TO_REGEXP) |
309 |
|
{ |
310 |
650 |
if (!n[0].isConst()) |
311 |
|
{ |
312 |
584 |
return false; |
313 |
|
} |
314 |
|
} |
315 |
2500 |
else if (n.getKind() != REGEXP_SIGMA |
316 |
2500 |
&& (n.getKind() != REGEXP_STAR || n[0].getKind() != REGEXP_SIGMA)) |
317 |
|
{ |
318 |
472 |
return false; |
319 |
|
} |
320 |
|
} |
321 |
354 |
return true; |
322 |
|
} |
323 |
|
|
324 |
1492 |
void getRegexpComponents(Node r, std::vector<Node>& result) |
325 |
|
{ |
326 |
1492 |
Assert(r.getType().isRegExp()); |
327 |
|
|
328 |
1492 |
NodeManager* nm = NodeManager::currentNM(); |
329 |
1492 |
if (r.getKind() == REGEXP_CONCAT) |
330 |
|
{ |
331 |
1420 |
for (const Node& n : r) |
332 |
|
{ |
333 |
1168 |
getRegexpComponents(n, result); |
334 |
|
} |
335 |
|
} |
336 |
1240 |
else if (r.getKind() == STRING_TO_REGEXP && r[0].isConst()) |
337 |
|
{ |
338 |
488 |
size_t rlen = Word::getLength(r[0]); |
339 |
1738 |
for (size_t i = 0; i < rlen; i++) |
340 |
|
{ |
341 |
1250 |
result.push_back(nm->mkNode(STRING_TO_REGEXP, Word::substr(r[0], i, 1))); |
342 |
|
} |
343 |
|
} |
344 |
|
else |
345 |
|
{ |
346 |
752 |
result.push_back(r); |
347 |
|
} |
348 |
1492 |
} |
349 |
|
|
350 |
|
void printConcat(std::ostream& out, std::vector<Node>& n) |
351 |
|
{ |
352 |
|
for (unsigned i = 0, nsize = n.size(); i < nsize; i++) |
353 |
|
{ |
354 |
|
if (i > 0) |
355 |
|
{ |
356 |
|
out << " ++ "; |
357 |
|
} |
358 |
|
out << n[i]; |
359 |
|
} |
360 |
|
} |
361 |
|
|
362 |
|
void printConcatTrace(std::vector<Node>& n, const char* c) |
363 |
|
{ |
364 |
|
std::stringstream ss; |
365 |
|
printConcat(ss, n); |
366 |
|
Trace(c) << ss.str(); |
367 |
|
} |
368 |
|
|
369 |
7636 |
bool isStringKind(Kind k) |
370 |
|
{ |
371 |
7512 |
return k == STRING_STOI || k == STRING_ITOS || k == STRING_TOLOWER |
372 |
7458 |
|| k == STRING_TOUPPER || k == STRING_LEQ || k == STRING_LT |
373 |
15056 |
|| k == STRING_FROM_CODE || k == STRING_TO_CODE; |
374 |
|
} |
375 |
|
|
376 |
730 |
bool isRegExpKind(Kind k) |
377 |
|
{ |
378 |
730 |
return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP |
379 |
730 |
|| k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER |
380 |
308 |
|| k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT |
381 |
29 |
|| k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV |
382 |
759 |
|| k == REGEXP_COMPLEMENT; |
383 |
|
} |
384 |
|
|
385 |
36682 |
TypeNode getOwnerStringType(Node n) |
386 |
|
{ |
387 |
36682 |
TypeNode tn; |
388 |
36682 |
Kind k = n.getKind(); |
389 |
36682 |
if (k == STRING_INDEXOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH |
390 |
10261 |
|| k == STRING_CONTAINS || k == SEQ_NTH || k == STRING_PREFIX |
391 |
7636 |
|| k == STRING_SUFFIX) |
392 |
|
{ |
393 |
|
// owning string type is the type of first argument |
394 |
29046 |
tn = n[0].getType(); |
395 |
|
} |
396 |
7636 |
else if (isStringKind(k)) |
397 |
|
{ |
398 |
2220 |
tn = NodeManager::currentNM()->stringType(); |
399 |
|
} |
400 |
|
else |
401 |
|
{ |
402 |
5416 |
tn = n.getType(); |
403 |
|
} |
404 |
73364 |
AlwaysAssert(tn.isStringLike()) |
405 |
36682 |
<< "Unexpected term in getOwnerStringType : " << n << ", type " << tn; |
406 |
36682 |
return tn; |
407 |
|
} |
408 |
|
|
409 |
4 |
unsigned getRepeatAmount(TNode node) |
410 |
|
{ |
411 |
4 |
Assert(node.getKind() == REGEXP_REPEAT); |
412 |
4 |
return node.getOperator().getConst<RegExpRepeat>().d_repeatAmount; |
413 |
|
} |
414 |
|
|
415 |
32 |
unsigned getLoopMaxOccurrences(TNode node) |
416 |
|
{ |
417 |
32 |
Assert(node.getKind() == REGEXP_LOOP); |
418 |
32 |
return node.getOperator().getConst<RegExpLoop>().d_loopMaxOcc; |
419 |
|
} |
420 |
|
|
421 |
32 |
unsigned getLoopMinOccurrences(TNode node) |
422 |
|
{ |
423 |
32 |
Assert(node.getKind() == REGEXP_LOOP); |
424 |
32 |
return node.getOperator().getConst<RegExpLoop>().d_loopMinOcc; |
425 |
|
} |
426 |
|
|
427 |
|
/** |
428 |
|
* Mapping to a dummy node for marking an attribute on internal quantified |
429 |
|
* formulas. This ensures that reductions are deterministic. |
430 |
|
*/ |
431 |
|
struct QInternalVarAttributeId |
432 |
|
{ |
433 |
|
}; |
434 |
|
typedef expr::Attribute<QInternalVarAttributeId, Node> QInternalVarAttribute; |
435 |
|
|
436 |
627 |
Node mkForallInternal(Node bvl, Node body) |
437 |
|
{ |
438 |
627 |
NodeManager* nm = NodeManager::currentNM(); |
439 |
|
QInternalVarAttribute qiva; |
440 |
1254 |
Node qvar; |
441 |
627 |
if (bvl.hasAttribute(qiva)) |
442 |
|
{ |
443 |
104 |
qvar = bvl.getAttribute(qiva); |
444 |
|
} |
445 |
|
else |
446 |
|
{ |
447 |
523 |
SkolemManager* sm = nm->getSkolemManager(); |
448 |
523 |
qvar = sm->mkDummySkolem("qinternal", nm->booleanType()); |
449 |
|
// this dummy variable marks that the quantified formula is internal |
450 |
523 |
qvar.setAttribute(InternalQuantAttribute(), true); |
451 |
|
// remember the dummy variable |
452 |
523 |
bvl.setAttribute(qiva, qvar); |
453 |
|
} |
454 |
|
// make the internal attribute, and put it in a singleton list |
455 |
1254 |
Node ip = nm->mkNode(INST_ATTRIBUTE, qvar); |
456 |
1254 |
Node ipl = nm->mkNode(INST_PATTERN_LIST, ip); |
457 |
|
// make the overall formula |
458 |
1254 |
return nm->mkNode(FORALL, bvl, body, ipl); |
459 |
|
} |
460 |
|
|
461 |
|
} // namespace utils |
462 |
|
} // namespace strings |
463 |
|
} // namespace theory |
464 |
29502 |
} // namespace cvc5 |