1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Yoni Zohar |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Util functions for theory strings. |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "theory/strings/theory_strings_utils.h" |
17 |
|
|
18 |
|
#include <sstream> |
19 |
|
|
20 |
|
#include "options/strings_options.h" |
21 |
|
#include "theory/rewriter.h" |
22 |
|
#include "theory/strings/arith_entail.h" |
23 |
|
#include "theory/strings/strings_entail.h" |
24 |
|
#include "theory/strings/word.h" |
25 |
|
|
26 |
|
using namespace cvc5::kind; |
27 |
|
|
28 |
|
namespace cvc5 { |
29 |
|
namespace theory { |
30 |
|
namespace strings { |
31 |
|
namespace utils { |
32 |
|
|
33 |
38610 |
uint32_t getAlphabetCardinality() |
34 |
|
{ |
35 |
|
// 3*16^4 = 196608 values in the SMT-LIB standard for Unicode strings |
36 |
38610 |
Assert(196608 <= String::num_codes()); |
37 |
38610 |
return 196608; |
38 |
|
} |
39 |
|
|
40 |
195881 |
Node mkAnd(const std::vector<Node>& a) |
41 |
|
{ |
42 |
391762 |
std::vector<Node> au; |
43 |
416434 |
for (const Node& ai : a) |
44 |
|
{ |
45 |
220553 |
if (std::find(au.begin(), au.end(), ai) == au.end()) |
46 |
|
{ |
47 |
214787 |
au.push_back(ai); |
48 |
|
} |
49 |
|
} |
50 |
195881 |
if (au.empty()) |
51 |
|
{ |
52 |
108456 |
return NodeManager::currentNM()->mkConst(true); |
53 |
|
} |
54 |
87425 |
else if (au.size() == 1) |
55 |
|
{ |
56 |
39485 |
return au[0]; |
57 |
|
} |
58 |
47940 |
return NodeManager::currentNM()->mkNode(AND, au); |
59 |
|
} |
60 |
|
|
61 |
95223 |
void flattenOp(Kind k, Node n, std::vector<Node>& conj) |
62 |
|
{ |
63 |
95223 |
if (n.getKind() != k) |
64 |
|
{ |
65 |
|
// easy case, just add to conj if non-duplicate |
66 |
93374 |
if (std::find(conj.begin(), conj.end(), n) == conj.end()) |
67 |
|
{ |
68 |
90223 |
conj.push_back(n); |
69 |
|
} |
70 |
93374 |
return; |
71 |
|
} |
72 |
|
// otherwise, traverse |
73 |
3698 |
std::unordered_set<TNode> visited; |
74 |
1849 |
std::unordered_set<TNode>::iterator it; |
75 |
3698 |
std::vector<TNode> visit; |
76 |
3698 |
TNode cur; |
77 |
1849 |
visit.push_back(n); |
78 |
4261 |
do |
79 |
|
{ |
80 |
6110 |
cur = visit.back(); |
81 |
6110 |
visit.pop_back(); |
82 |
6110 |
it = visited.find(cur); |
83 |
|
|
84 |
6110 |
if (it == visited.end()) |
85 |
|
{ |
86 |
6110 |
visited.insert(cur); |
87 |
6110 |
if (cur.getKind() == k) |
88 |
|
{ |
89 |
|
// Add in reverse order, so that we traverse left to right. |
90 |
|
// This is important so that explantaions aren't reversed when they |
91 |
|
// are flattened, which is important for proofs involving substitutions. |
92 |
3698 |
std::vector<Node> newChildren; |
93 |
1849 |
newChildren.insert(newChildren.end(), cur.begin(), cur.end()); |
94 |
1849 |
visit.insert(visit.end(), newChildren.rbegin(), newChildren.rend()); |
95 |
|
} |
96 |
4261 |
else if (std::find(conj.begin(), conj.end(), cur) == conj.end()) |
97 |
|
{ |
98 |
4249 |
conj.push_back(cur); |
99 |
|
} |
100 |
|
} |
101 |
6110 |
} while (!visit.empty()); |
102 |
|
} |
103 |
|
|
104 |
1120702 |
void getConcat(Node n, std::vector<Node>& c) |
105 |
|
{ |
106 |
1120702 |
Kind k = n.getKind(); |
107 |
1120702 |
if (k == STRING_CONCAT || k == REGEXP_CONCAT) |
108 |
|
{ |
109 |
887066 |
for (const Node& nc : n) |
110 |
|
{ |
111 |
658379 |
c.push_back(nc); |
112 |
228687 |
} |
113 |
|
} |
114 |
|
else |
115 |
|
{ |
116 |
892015 |
c.push_back(n); |
117 |
|
} |
118 |
1120702 |
} |
119 |
|
|
120 |
428678 |
Node mkConcat(const std::vector<Node>& c, TypeNode tn) |
121 |
|
{ |
122 |
428678 |
Assert(tn.isStringLike() || tn.isRegExp()); |
123 |
428678 |
if (c.empty()) |
124 |
|
{ |
125 |
28540 |
Assert(tn.isStringLike()); |
126 |
28540 |
return Word::mkEmptyWord(tn); |
127 |
|
} |
128 |
400138 |
else if (c.size() == 1) |
129 |
|
{ |
130 |
188043 |
return c[0]; |
131 |
|
} |
132 |
212095 |
Kind k = tn.isStringLike() ? STRING_CONCAT : REGEXP_CONCAT; |
133 |
212095 |
return NodeManager::currentNM()->mkNode(k, c); |
134 |
|
} |
135 |
|
|
136 |
1104 |
Node mkNConcat(Node n1, Node n2) |
137 |
|
{ |
138 |
|
return Rewriter::rewrite( |
139 |
1104 |
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2)); |
140 |
|
} |
141 |
|
|
142 |
1967 |
Node mkNConcat(Node n1, Node n2, Node n3) |
143 |
|
{ |
144 |
|
return Rewriter::rewrite( |
145 |
1967 |
NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2, n3)); |
146 |
|
} |
147 |
|
|
148 |
325683 |
Node mkNConcat(const std::vector<Node>& c, TypeNode tn) |
149 |
|
{ |
150 |
325683 |
return Rewriter::rewrite(mkConcat(c, tn)); |
151 |
|
} |
152 |
|
|
153 |
450345 |
Node mkNLength(Node t) |
154 |
|
{ |
155 |
450345 |
return Rewriter::rewrite(NodeManager::currentNM()->mkNode(STRING_LENGTH, t)); |
156 |
|
} |
157 |
|
|
158 |
10348 |
Node mkPrefix(Node t, Node n) |
159 |
|
{ |
160 |
10348 |
NodeManager* nm = NodeManager::currentNM(); |
161 |
10348 |
return nm->mkNode(STRING_SUBSTR, t, nm->mkConst(Rational(0)), n); |
162 |
|
} |
163 |
|
|
164 |
8384 |
Node mkSuffix(Node t, Node n) |
165 |
|
{ |
166 |
8384 |
NodeManager* nm = NodeManager::currentNM(); |
167 |
|
return nm->mkNode( |
168 |
8384 |
STRING_SUBSTR, t, n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, t), n)); |
169 |
|
} |
170 |
|
|
171 |
60624 |
Node getConstantComponent(Node t) |
172 |
|
{ |
173 |
60624 |
if (t.getKind() == STRING_TO_REGEXP) |
174 |
|
{ |
175 |
1748 |
return t[0].isConst() ? t[0] : Node::null(); |
176 |
|
} |
177 |
58876 |
return t.isConst() ? t : Node::null(); |
178 |
|
} |
179 |
|
|
180 |
44714 |
Node getConstantEndpoint(Node e, bool isSuf) |
181 |
|
{ |
182 |
44714 |
Kind ek = e.getKind(); |
183 |
44714 |
if (ek == STRING_IN_REGEXP) |
184 |
|
{ |
185 |
769 |
e = e[1]; |
186 |
769 |
ek = e.getKind(); |
187 |
|
} |
188 |
44714 |
if (ek == STRING_CONCAT || ek == REGEXP_CONCAT) |
189 |
|
{ |
190 |
23081 |
return getConstantComponent(e[isSuf ? e.getNumChildren() - 1 : 0]); |
191 |
|
} |
192 |
21633 |
return getConstantComponent(e); |
193 |
|
} |
194 |
|
|
195 |
265121 |
Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls) |
196 |
|
{ |
197 |
265121 |
Assert(ss.empty()); |
198 |
265121 |
Assert(ls.empty()); |
199 |
338879 |
while (s.getKind() == STRING_SUBSTR) |
200 |
|
{ |
201 |
36879 |
ss.push_back(s[1]); |
202 |
36879 |
ls.push_back(s[2]); |
203 |
36879 |
s = s[0]; |
204 |
|
} |
205 |
265121 |
std::reverse(ss.begin(), ss.end()); |
206 |
265121 |
std::reverse(ls.begin(), ls.end()); |
207 |
265121 |
return s; |
208 |
|
} |
209 |
|
|
210 |
|
Node mkSubstrChain(Node base, |
211 |
|
const std::vector<Node>& ss, |
212 |
|
const std::vector<Node>& ls) |
213 |
|
{ |
214 |
|
NodeManager* nm = NodeManager::currentNM(); |
215 |
|
for (unsigned i = 0, size = ss.size(); i < size; i++) |
216 |
|
{ |
217 |
|
base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]); |
218 |
|
} |
219 |
|
return base; |
220 |
|
} |
221 |
|
|
222 |
340 |
std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x) |
223 |
|
{ |
224 |
|
// Collect the equalities of the form (= x "") (sorted) |
225 |
680 |
std::set<TNode> emptyNodes; |
226 |
340 |
bool allEmptyEqs = true; |
227 |
340 |
if (x.getKind() == EQUAL) |
228 |
|
{ |
229 |
294 |
if (Word::isEmpty(x[0])) |
230 |
|
{ |
231 |
116 |
emptyNodes.insert(x[1]); |
232 |
|
} |
233 |
178 |
else if (Word::isEmpty(x[1])) |
234 |
|
{ |
235 |
60 |
emptyNodes.insert(x[0]); |
236 |
|
} |
237 |
|
else |
238 |
|
{ |
239 |
118 |
allEmptyEqs = false; |
240 |
|
} |
241 |
|
} |
242 |
46 |
else if (x.getKind() == AND) |
243 |
|
{ |
244 |
98 |
for (const Node& c : x) |
245 |
|
{ |
246 |
72 |
if (c.getKind() != EQUAL) |
247 |
|
{ |
248 |
2 |
allEmptyEqs = false; |
249 |
2 |
continue; |
250 |
|
} |
251 |
|
|
252 |
68 |
if (Word::isEmpty(c[0])) |
253 |
|
{ |
254 |
38 |
emptyNodes.insert(c[1]); |
255 |
|
} |
256 |
30 |
else if (Word::isEmpty(c[1])) |
257 |
|
{ |
258 |
20 |
emptyNodes.insert(c[0]); |
259 |
|
} |
260 |
|
else |
261 |
|
{ |
262 |
10 |
allEmptyEqs = false; |
263 |
|
} |
264 |
|
} |
265 |
|
} |
266 |
|
|
267 |
340 |
if (emptyNodes.size() == 0) |
268 |
|
{ |
269 |
134 |
allEmptyEqs = false; |
270 |
|
} |
271 |
|
|
272 |
|
return std::make_pair( |
273 |
680 |
allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end())); |
274 |
|
} |
275 |
|
|
276 |
5457659 |
bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; } |
277 |
|
|
278 |
1714 |
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start) |
279 |
|
{ |
280 |
1714 |
size_t i = start; |
281 |
2818 |
while (i < rs.size() && rs[i].getKind() == REGEXP_SIGMA) |
282 |
|
{ |
283 |
552 |
i++; |
284 |
|
} |
285 |
|
|
286 |
1714 |
if (i >= rs.size()) |
287 |
|
{ |
288 |
12 |
return false; |
289 |
|
} |
290 |
|
|
291 |
1702 |
return rs[i].getKind() == REGEXP_STAR && rs[i][0].getKind() == REGEXP_SIGMA; |
292 |
|
} |
293 |
|
|
294 |
604 |
bool isSimpleRegExp(Node r) |
295 |
|
{ |
296 |
604 |
Assert(r.getType().isRegExp()); |
297 |
|
|
298 |
1208 |
std::vector<Node> v; |
299 |
604 |
utils::getConcat(r, v); |
300 |
1845 |
for (const Node& n : v) |
301 |
|
{ |
302 |
1553 |
if (n.getKind() == STRING_TO_REGEXP) |
303 |
|
{ |
304 |
546 |
if (!n[0].isConst()) |
305 |
|
{ |
306 |
360 |
return false; |
307 |
|
} |
308 |
|
} |
309 |
2014 |
else if (n.getKind() != REGEXP_SIGMA |
310 |
2014 |
&& (n.getKind() != REGEXP_STAR || n[0].getKind() != REGEXP_SIGMA)) |
311 |
|
{ |
312 |
264 |
return false; |
313 |
|
} |
314 |
|
} |
315 |
292 |
return true; |
316 |
|
} |
317 |
|
|
318 |
1404 |
void getRegexpComponents(Node r, std::vector<Node>& result) |
319 |
|
{ |
320 |
1404 |
Assert(r.getType().isRegExp()); |
321 |
|
|
322 |
1404 |
NodeManager* nm = NodeManager::currentNM(); |
323 |
1404 |
if (r.getKind() == REGEXP_CONCAT) |
324 |
|
{ |
325 |
1368 |
for (const Node& n : r) |
326 |
|
{ |
327 |
1128 |
getRegexpComponents(n, result); |
328 |
|
} |
329 |
|
} |
330 |
1164 |
else if (r.getKind() == STRING_TO_REGEXP && r[0].isConst()) |
331 |
|
{ |
332 |
440 |
size_t rlen = Word::getLength(r[0]); |
333 |
1610 |
for (size_t i = 0; i < rlen; i++) |
334 |
|
{ |
335 |
1170 |
result.push_back(nm->mkNode(STRING_TO_REGEXP, Word::substr(r[0], i, 1))); |
336 |
|
} |
337 |
|
} |
338 |
|
else |
339 |
|
{ |
340 |
724 |
result.push_back(r); |
341 |
|
} |
342 |
1404 |
} |
343 |
|
|
344 |
|
void printConcat(std::ostream& out, std::vector<Node>& n) |
345 |
|
{ |
346 |
|
for (unsigned i = 0, nsize = n.size(); i < nsize; i++) |
347 |
|
{ |
348 |
|
if (i > 0) |
349 |
|
{ |
350 |
|
out << " ++ "; |
351 |
|
} |
352 |
|
out << n[i]; |
353 |
|
} |
354 |
|
} |
355 |
|
|
356 |
|
void printConcatTrace(std::vector<Node>& n, const char* c) |
357 |
|
{ |
358 |
|
std::stringstream ss; |
359 |
|
printConcat(ss, n); |
360 |
|
Trace(c) << ss.str(); |
361 |
|
} |
362 |
|
|
363 |
4851 |
bool isStringKind(Kind k) |
364 |
|
{ |
365 |
4835 |
return k == STRING_STOI || k == STRING_ITOS || k == STRING_TOLOWER |
366 |
4781 |
|| k == STRING_TOUPPER || k == STRING_LEQ || k == STRING_LT |
367 |
9592 |
|| k == STRING_FROM_CODE || k == STRING_TO_CODE; |
368 |
|
} |
369 |
|
|
370 |
521 |
bool isRegExpKind(Kind k) |
371 |
|
{ |
372 |
521 |
return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP |
373 |
521 |
|| k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER |
374 |
237 |
|| k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT |
375 |
30 |
|| k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV |
376 |
551 |
|| k == REGEXP_COMPLEMENT; |
377 |
|
} |
378 |
|
|
379 |
23817 |
TypeNode getOwnerStringType(Node n) |
380 |
|
{ |
381 |
23817 |
TypeNode tn; |
382 |
23817 |
Kind k = n.getKind(); |
383 |
23817 |
if (k == STRING_STRIDOF || k == STRING_LENGTH || k == STRING_STRCTN |
384 |
4870 |
|| k == SEQ_NTH || k == STRING_PREFIX || k == STRING_SUFFIX) |
385 |
|
{ |
386 |
|
// owning string type is the type of first argument |
387 |
18966 |
tn = n[0].getType(); |
388 |
|
} |
389 |
4851 |
else if (isStringKind(k)) |
390 |
|
{ |
391 |
1166 |
tn = NodeManager::currentNM()->stringType(); |
392 |
|
} |
393 |
|
else |
394 |
|
{ |
395 |
3685 |
tn = n.getType(); |
396 |
|
} |
397 |
47634 |
AlwaysAssert(tn.isStringLike()) |
398 |
23817 |
<< "Unexpected term in getOwnerStringType : " << n << ", type " << tn; |
399 |
23817 |
return tn; |
400 |
|
} |
401 |
|
|
402 |
4 |
unsigned getRepeatAmount(TNode node) |
403 |
|
{ |
404 |
4 |
Assert(node.getKind() == REGEXP_REPEAT); |
405 |
4 |
return node.getOperator().getConst<RegExpRepeat>().d_repeatAmount; |
406 |
|
} |
407 |
|
|
408 |
24 |
unsigned getLoopMaxOccurrences(TNode node) |
409 |
|
{ |
410 |
24 |
Assert(node.getKind() == REGEXP_LOOP); |
411 |
24 |
return node.getOperator().getConst<RegExpLoop>().d_loopMaxOcc; |
412 |
|
} |
413 |
|
|
414 |
24 |
unsigned getLoopMinOccurrences(TNode node) |
415 |
|
{ |
416 |
24 |
Assert(node.getKind() == REGEXP_LOOP); |
417 |
24 |
return node.getOperator().getConst<RegExpLoop>().d_loopMinOcc; |
418 |
|
} |
419 |
|
|
420 |
|
} // namespace utils |
421 |
|
} // namespace strings |
422 |
|
} // namespace theory |
423 |
27735 |
} // namespace cvc5 |