1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Yoni Zohar |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Util functions for theory strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/theory_strings_utils.h" |
17 |
|
|
18 |
|
#include <sstream> |
19 |
|
|
20 |
|
#include "expr/attribute.h" |
21 |
|
#include "expr/skolem_manager.h" |
22 |
|
#include "options/strings_options.h" |
23 |
|
#include "theory/quantifiers/fmf/bounded_integers.h" |
24 |
|
#include "theory/quantifiers/quantifiers_attributes.h" |
25 |
|
#include "theory/rewriter.h" |
26 |
|
#include "theory/strings/arith_entail.h" |
27 |
|
#include "theory/strings/strings_entail.h" |
28 |
|
#include "theory/strings/word.h" |
29 |
|
#include "util/rational.h" |
30 |
|
#include "util/regexp.h" |
31 |
|
#include "util/string.h" |
32 |
|
|
33 |
|
using namespace cvc5::kind; |
34 |
|
|
35 |
|
namespace cvc5 { |
36 |
|
namespace theory { |
37 |
|
namespace strings { |
38 |
|
namespace utils { |
39 |
|
|
40 |
619 |
uint32_t getDefaultAlphabetCardinality() |
41 |
|
{ |
42 |
|
// 3*16^4 = 196608 values in the SMT-LIB standard for Unicode strings |
43 |
619 |
Assert(196608 <= String::num_codes()); |
44 |
619 |
return 196608; |
45 |
|
} |
46 |
|
|
47 |
362426 |
Node mkAnd(const std::vector<Node>& a) |
48 |
|
{ |
49 |
724852 |
std::vector<Node> au; |
50 |
801201 |
for (const Node& ai : a) |
51 |
|
{ |
52 |
438775 |
if (std::find(au.begin(), au.end(), ai) == au.end()) |
53 |
|
{ |
54 |
431519 |
au.push_back(ai); |
55 |
|
} |
56 |
|
} |
57 |
362426 |
if (au.empty()) |
58 |
|
{ |
59 |
171976 |
return NodeManager::currentNM()->mkConst(true); |
60 |
|
} |
61 |
190450 |
else if (au.size() == 1) |
62 |
|
{ |
63 |
95128 |
return au[0]; |
64 |
|
} |
65 |
95322 |
return NodeManager::currentNM()->mkNode(AND, au); |
66 |
|
} |
67 |
|
|
68 |
229813 |
void flattenOp(Kind k, Node n, std::vector<Node>& conj) |
69 |
|
{ |
70 |
229813 |
if (n.getKind() != k) |
71 |
|
{ |
72 |
|
// easy case, just add to conj if non-duplicate |
73 |
209710 |
if (std::find(conj.begin(), conj.end(), n) == conj.end()) |
74 |
|
{ |
75 |
205045 |
conj.push_back(n); |
76 |
|
} |
77 |
209710 |
return; |
78 |
|
} |
79 |
|
// otherwise, traverse |
80 |
40206 |
std::unordered_set<TNode> visited; |
81 |
20103 |
std::unordered_set<TNode>::iterator it; |
82 |
40206 |
std::vector<TNode> visit; |
83 |
40206 |
TNode cur; |
84 |
20103 |
visit.push_back(n); |
85 |
42620 |
do |
86 |
|
{ |
87 |
62723 |
cur = visit.back(); |
88 |
62723 |
visit.pop_back(); |
89 |
62723 |
it = visited.find(cur); |
90 |
|
|
91 |
62723 |
if (it == visited.end()) |
92 |
|
{ |
93 |
62723 |
visited.insert(cur); |
94 |
62723 |
if (cur.getKind() == k) |
95 |
|
{ |
96 |
|
// Add in reverse order, so that we traverse left to right. |
97 |
|
// This is important so that explantaions aren't reversed when they |
98 |
|
// are flattened, which is important for proofs involving substitutions. |
99 |
40206 |
std::vector<Node> newChildren; |
100 |
20103 |
newChildren.insert(newChildren.end(), cur.begin(), cur.end()); |
101 |
20103 |
visit.insert(visit.end(), newChildren.rbegin(), newChildren.rend()); |
102 |
|
} |
103 |
42620 |
else if (std::find(conj.begin(), conj.end(), cur) == conj.end()) |
104 |
|
{ |
105 |
41813 |
conj.push_back(cur); |
106 |
|
} |
107 |
|
} |
108 |
62723 |
} while (!visit.empty()); |
109 |
|
} |
110 |
|
|
111 |
778146 |
void getConcat(Node n, std::vector<Node>& c) |
112 |
|
{ |
113 |
778146 |
Kind k = n.getKind(); |
114 |
778146 |
if (k == STRING_CONCAT || k == REGEXP_CONCAT) |
115 |
|
{ |
116 |
249058 |
for (const Node& nc : n) |
117 |
|
{ |
118 |
185860 |
c.push_back(nc); |
119 |
63198 |
} |
120 |
|
} |
121 |
|
else |
122 |
|
{ |
123 |
714948 |
c.push_back(n); |
124 |
|
} |
125 |
778146 |
} |
126 |
|
|
127 |
742213 |
Node mkConcat(const std::vector<Node>& c, TypeNode tn) |
128 |
|
{ |
129 |
742213 |
Assert(tn.isStringLike() || tn.isRegExp()); |
130 |
742213 |
if (c.empty()) |
131 |
|
{ |
132 |
54445 |
Assert(tn.isStringLike()); |
133 |
54445 |
return Word::mkEmptyWord(tn); |
134 |
|
} |
135 |
687768 |
else if (c.size() == 1) |
136 |
|
{ |
137 |
275208 |
return c[0]; |
138 |
|
} |
139 |
412560 |
Kind k = tn.isStringLike() ? STRING_CONCAT : REGEXP_CONCAT; |
140 |
412560 |
return NodeManager::currentNM()->mkNode(k, c); |
141 |
|
} |
142 |
|
|
143 |
1136 |
Node mkNConcat(Node n1, Node n2) |
144 |
|
{ |
145 |
|
return Rewriter::rewrite( |
146 |
1136 |
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2)); |
147 |
|
} |
148 |
|
|
149 |
4103 |
Node mkNConcat(Node n1, Node n2, Node n3) |
150 |
|
{ |
151 |
|
return Rewriter::rewrite( |
152 |
4103 |
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2, n3)); |
153 |
|
} |
154 |
|
|
155 |
596550 |
Node mkNConcat(const std::vector<Node>& c, TypeNode tn) |
156 |
|
{ |
157 |
596550 |
return Rewriter::rewrite(mkConcat(c, tn)); |
158 |
|
} |
159 |
|
|
160 |
1150222 |
Node mkNLength(Node t) |
161 |
|
{ |
162 |
1150222 |
return Rewriter::rewrite(NodeManager::currentNM()->mkNode(STRING_LENGTH, t)); |
163 |
|
} |
164 |
|
|
165 |
18602 |
Node mkPrefix(Node t, Node n) |
166 |
|
{ |
167 |
18602 |
NodeManager* nm = NodeManager::currentNM(); |
168 |
18602 |
return nm->mkNode(STRING_SUBSTR, t, nm->mkConst(Rational(0)), n); |
169 |
|
} |
170 |
|
|
171 |
14891 |
Node mkSuffix(Node t, Node n) |
172 |
|
{ |
173 |
14891 |
NodeManager* nm = NodeManager::currentNM(); |
174 |
|
return nm->mkNode( |
175 |
14891 |
STRING_SUBSTR, t, n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, t), n)); |
176 |
|
} |
177 |
|
|
178 |
190839 |
Node getConstantComponent(Node t) |
179 |
|
{ |
180 |
190839 |
if (t.getKind() == STRING_TO_REGEXP) |
181 |
|
{ |
182 |
8064 |
return t[0].isConst() ? t[0] : Node::null(); |
183 |
|
} |
184 |
182775 |
return t.isConst() ? t : Node::null(); |
185 |
|
} |
186 |
|
|
187 |
157621 |
Node getConstantEndpoint(Node e, bool isSuf) |
188 |
|
{ |
189 |
157621 |
Kind ek = e.getKind(); |
190 |
157621 |
if (ek == STRING_IN_REGEXP) |
191 |
|
{ |
192 |
2628 |
e = e[1]; |
193 |
2628 |
ek = e.getKind(); |
194 |
|
} |
195 |
157621 |
if (ek == STRING_CONCAT || ek == REGEXP_CONCAT) |
196 |
|
{ |
197 |
81379 |
return getConstantComponent(e[isSuf ? e.getNumChildren() - 1 : 0]); |
198 |
|
} |
199 |
76242 |
return getConstantComponent(e); |
200 |
|
} |
201 |
|
|
202 |
143296 |
Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls) |
203 |
|
{ |
204 |
143296 |
Assert(ss.empty()); |
205 |
143296 |
Assert(ls.empty()); |
206 |
265836 |
while (s.getKind() == STRING_SUBSTR) |
207 |
|
{ |
208 |
61270 |
ss.push_back(s[1]); |
209 |
61270 |
ls.push_back(s[2]); |
210 |
61270 |
s = s[0]; |
211 |
|
} |
212 |
143296 |
std::reverse(ss.begin(), ss.end()); |
213 |
143296 |
std::reverse(ls.begin(), ls.end()); |
214 |
143296 |
return s; |
215 |
|
} |
216 |
|
|
217 |
|
Node mkSubstrChain(Node base, |
218 |
|
const std::vector<Node>& ss, |
219 |
|
const std::vector<Node>& ls) |
220 |
|
{ |
221 |
|
NodeManager* nm = NodeManager::currentNM(); |
222 |
|
for (unsigned i = 0, size = ss.size(); i < size; i++) |
223 |
|
{ |
224 |
|
base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]); |
225 |
|
} |
226 |
|
return base; |
227 |
|
} |
228 |
|
|
229 |
400 |
std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x) |
230 |
|
{ |
231 |
|
// Collect the equalities of the form (= x "") (sorted) |
232 |
800 |
std::set<TNode> emptyNodes; |
233 |
400 |
bool allEmptyEqs = true; |
234 |
400 |
if (x.getKind() == EQUAL) |
235 |
|
{ |
236 |
298 |
if (Word::isEmpty(x[0])) |
237 |
|
{ |
238 |
168 |
emptyNodes.insert(x[1]); |
239 |
|
} |
240 |
130 |
else if (Word::isEmpty(x[1])) |
241 |
|
{ |
242 |
84 |
emptyNodes.insert(x[0]); |
243 |
|
} |
244 |
|
else |
245 |
|
{ |
246 |
46 |
allEmptyEqs = false; |
247 |
|
} |
248 |
|
} |
249 |
102 |
else if (x.getKind() == AND) |
250 |
|
{ |
251 |
270 |
for (const Node& c : x) |
252 |
|
{ |
253 |
194 |
if (c.getKind() != EQUAL) |
254 |
|
{ |
255 |
2 |
allEmptyEqs = false; |
256 |
2 |
continue; |
257 |
|
} |
258 |
|
|
259 |
190 |
if (Word::isEmpty(c[0])) |
260 |
|
{ |
261 |
154 |
emptyNodes.insert(c[1]); |
262 |
|
} |
263 |
36 |
else if (Word::isEmpty(c[1])) |
264 |
|
{ |
265 |
20 |
emptyNodes.insert(c[0]); |
266 |
|
} |
267 |
|
else |
268 |
|
{ |
269 |
16 |
allEmptyEqs = false; |
270 |
|
} |
271 |
|
} |
272 |
|
} |
273 |
|
|
274 |
400 |
if (emptyNodes.size() == 0) |
275 |
|
{ |
276 |
68 |
allEmptyEqs = false; |
277 |
|
} |
278 |
|
|
279 |
|
return std::make_pair( |
280 |
800 |
allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end())); |
281 |
|
} |
282 |
|
|
283 |
13182054 |
bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; } |
284 |
|
|
285 |
1794 |
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start) |
286 |
|
{ |
287 |
1794 |
size_t i = start; |
288 |
2910 |
while (i < rs.size() && rs[i].getKind() == REGEXP_SIGMA) |
289 |
|
{ |
290 |
558 |
i++; |
291 |
|
} |
292 |
|
|
293 |
1794 |
if (i >= rs.size()) |
294 |
|
{ |
295 |
12 |
return false; |
296 |
|
} |
297 |
|
|
298 |
1782 |
return rs[i].getKind() == REGEXP_STAR && rs[i][0].getKind() == REGEXP_SIGMA; |
299 |
|
} |
300 |
|
|
301 |
992 |
bool isSimpleRegExp(Node r) |
302 |
|
{ |
303 |
992 |
Assert(r.getType().isRegExp()); |
304 |
|
|
305 |
1984 |
std::vector<Node> v; |
306 |
992 |
utils::getConcat(r, v); |
307 |
2634 |
for (const Node& n : v) |
308 |
|
{ |
309 |
2252 |
if (n.getKind() == STRING_TO_REGEXP) |
310 |
|
{ |
311 |
800 |
if (!n[0].isConst()) |
312 |
|
{ |
313 |
682 |
return false; |
314 |
|
} |
315 |
|
} |
316 |
2904 |
else if (n.getKind() != REGEXP_SIGMA |
317 |
2904 |
&& (n.getKind() != REGEXP_STAR || n[0].getKind() != REGEXP_SIGMA)) |
318 |
|
{ |
319 |
538 |
return false; |
320 |
|
} |
321 |
|
} |
322 |
382 |
return true; |
323 |
|
} |
324 |
|
|
325 |
1746 |
void getRegexpComponents(Node r, std::vector<Node>& result) |
326 |
|
{ |
327 |
1746 |
Assert(r.getType().isRegExp()); |
328 |
|
|
329 |
1746 |
NodeManager* nm = NodeManager::currentNM(); |
330 |
1746 |
if (r.getKind() == REGEXP_CONCAT) |
331 |
|
{ |
332 |
1674 |
for (const Node& n : r) |
333 |
|
{ |
334 |
1410 |
getRegexpComponents(n, result); |
335 |
|
} |
336 |
|
} |
337 |
1482 |
else if (r.getKind() == STRING_TO_REGEXP && r[0].isConst()) |
338 |
|
{ |
339 |
606 |
size_t rlen = Word::getLength(r[0]); |
340 |
2006 |
for (size_t i = 0; i < rlen; i++) |
341 |
|
{ |
342 |
1400 |
result.push_back(nm->mkNode(STRING_TO_REGEXP, Word::substr(r[0], i, 1))); |
343 |
|
} |
344 |
|
} |
345 |
|
else |
346 |
|
{ |
347 |
876 |
result.push_back(r); |
348 |
|
} |
349 |
1746 |
} |
350 |
|
|
351 |
|
void printConcat(std::ostream& out, std::vector<Node>& n) |
352 |
|
{ |
353 |
|
for (unsigned i = 0, nsize = n.size(); i < nsize; i++) |
354 |
|
{ |
355 |
|
if (i > 0) |
356 |
|
{ |
357 |
|
out << " ++ "; |
358 |
|
} |
359 |
|
out << n[i]; |
360 |
|
} |
361 |
|
} |
362 |
|
|
363 |
|
void printConcatTrace(std::vector<Node>& n, const char* c) |
364 |
|
{ |
365 |
|
std::stringstream ss; |
366 |
|
printConcat(ss, n); |
367 |
|
Trace(c) << ss.str(); |
368 |
|
} |
369 |
|
|
370 |
7668 |
bool isStringKind(Kind k) |
371 |
|
{ |
372 |
7544 |
return k == STRING_STOI || k == STRING_ITOS || k == STRING_TOLOWER |
373 |
7490 |
|| k == STRING_TOUPPER || k == STRING_LEQ || k == STRING_LT |
374 |
15120 |
|| k == STRING_FROM_CODE || k == STRING_TO_CODE; |
375 |
|
} |
376 |
|
|
377 |
807 |
bool isRegExpKind(Kind k) |
378 |
|
{ |
379 |
807 |
return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP |
380 |
807 |
|| k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER |
381 |
345 |
|| k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT |
382 |
48 |
|| k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV |
383 |
855 |
|| k == REGEXP_COMPLEMENT; |
384 |
|
} |
385 |
|
|
386 |
38302 |
TypeNode getOwnerStringType(Node n) |
387 |
|
{ |
388 |
38302 |
TypeNode tn; |
389 |
38302 |
Kind k = n.getKind(); |
390 |
38302 |
if (k == STRING_INDEXOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH |
391 |
10297 |
|| k == STRING_CONTAINS || k == SEQ_NTH || k == STRING_PREFIX |
392 |
7668 |
|| k == STRING_SUFFIX) |
393 |
|
{ |
394 |
|
// owning string type is the type of first argument |
395 |
30634 |
tn = n[0].getType(); |
396 |
|
} |
397 |
7668 |
else if (isStringKind(k)) |
398 |
|
{ |
399 |
2220 |
tn = NodeManager::currentNM()->stringType(); |
400 |
|
} |
401 |
|
else |
402 |
|
{ |
403 |
5448 |
tn = n.getType(); |
404 |
|
} |
405 |
76604 |
AlwaysAssert(tn.isStringLike()) |
406 |
38302 |
<< "Unexpected term in getOwnerStringType : " << n << ", type " << tn; |
407 |
38302 |
return tn; |
408 |
|
} |
409 |
|
|
410 |
4 |
unsigned getRepeatAmount(TNode node) |
411 |
|
{ |
412 |
4 |
Assert(node.getKind() == REGEXP_REPEAT); |
413 |
4 |
return node.getOperator().getConst<RegExpRepeat>().d_repeatAmount; |
414 |
|
} |
415 |
|
|
416 |
32 |
unsigned getLoopMaxOccurrences(TNode node) |
417 |
|
{ |
418 |
32 |
Assert(node.getKind() == REGEXP_LOOP); |
419 |
32 |
return node.getOperator().getConst<RegExpLoop>().d_loopMaxOcc; |
420 |
|
} |
421 |
|
|
422 |
32 |
unsigned getLoopMinOccurrences(TNode node) |
423 |
|
{ |
424 |
32 |
Assert(node.getKind() == REGEXP_LOOP); |
425 |
32 |
return node.getOperator().getConst<RegExpLoop>().d_loopMinOcc; |
426 |
|
} |
427 |
|
|
428 |
681 |
Node mkForallInternal(Node bvl, Node body) |
429 |
|
{ |
430 |
681 |
return quantifiers::BoundedIntegers::mkBoundedForall(bvl, body); |
431 |
|
} |
432 |
|
|
433 |
|
} // namespace utils |
434 |
|
} // namespace strings |
435 |
|
} // namespace theory |
436 |
31137 |
} // namespace cvc5 |