1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Gereon Kremer |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of rewrite rules for string-specific operators in |
14 |
|
* theory of strings. |
15 |
|
*/ |
16 |
|
|
17 |
|
#include "theory/strings/strings_rewriter.h" |
18 |
|
|
19 |
|
#include "expr/node_builder.h" |
20 |
|
#include "theory/strings/theory_strings_utils.h" |
21 |
|
#include "util/rational.h" |
22 |
|
#include "util/string.h" |
23 |
|
|
24 |
|
using namespace cvc5::kind; |
25 |
|
|
26 |
|
namespace cvc5 { |
27 |
|
namespace theory { |
28 |
|
namespace strings { |
29 |
|
|
30 |
15274 |
StringsRewriter::StringsRewriter(Rewriter* r, |
31 |
|
HistogramStat<Rewrite>* statistics, |
32 |
15274 |
uint32_t alphaCard) |
33 |
15274 |
: SequencesRewriter(r, statistics), d_alphaCard(alphaCard) |
34 |
|
{ |
35 |
15274 |
} |
36 |
|
|
37 |
489709 |
RewriteResponse StringsRewriter::postRewrite(TNode node) |
38 |
|
{ |
39 |
979418 |
Trace("strings-postrewrite") |
40 |
489709 |
<< "Strings::StringsRewriter::postRewrite start " << node << std::endl; |
41 |
|
|
42 |
979418 |
Node retNode = node; |
43 |
489709 |
Kind nk = node.getKind(); |
44 |
489709 |
if (nk == kind::STRING_LT) |
45 |
|
{ |
46 |
23 |
retNode = rewriteStringLt(node); |
47 |
|
} |
48 |
489686 |
else if (nk == kind::STRING_LEQ) |
49 |
|
{ |
50 |
404 |
retNode = rewriteStringLeq(node); |
51 |
|
} |
52 |
489282 |
else if (nk == STRING_TOLOWER || nk == STRING_TOUPPER) |
53 |
|
{ |
54 |
208 |
retNode = rewriteStrConvert(node); |
55 |
|
} |
56 |
489074 |
else if (nk == STRING_IS_DIGIT) |
57 |
|
{ |
58 |
8 |
retNode = rewriteStringIsDigit(node); |
59 |
|
} |
60 |
489066 |
else if (nk == kind::STRING_ITOS) |
61 |
|
{ |
62 |
619 |
retNode = rewriteIntToStr(node); |
63 |
|
} |
64 |
488447 |
else if (nk == kind::STRING_STOI) |
65 |
|
{ |
66 |
1010 |
retNode = rewriteStrToInt(node); |
67 |
|
} |
68 |
487437 |
else if (nk == STRING_TO_CODE) |
69 |
|
{ |
70 |
6144 |
retNode = rewriteStringToCode(node); |
71 |
|
} |
72 |
481293 |
else if (nk == STRING_FROM_CODE) |
73 |
|
{ |
74 |
120 |
retNode = rewriteStringFromCode(node); |
75 |
|
} |
76 |
|
else |
77 |
|
{ |
78 |
481173 |
return SequencesRewriter::postRewrite(node); |
79 |
|
} |
80 |
|
|
81 |
17072 |
Trace("strings-postrewrite") |
82 |
8536 |
<< "Strings::StringsRewriter::postRewrite returning " << retNode |
83 |
8536 |
<< std::endl; |
84 |
8536 |
if (node != retNode) |
85 |
|
{ |
86 |
4656 |
Trace("strings-rewrite-debug") << "Strings::StringsRewriter::postRewrite " |
87 |
2328 |
<< node << " to " << retNode << std::endl; |
88 |
2328 |
return RewriteResponse(REWRITE_AGAIN_FULL, retNode); |
89 |
|
} |
90 |
6208 |
return RewriteResponse(REWRITE_DONE, retNode); |
91 |
|
} |
92 |
|
|
93 |
1010 |
Node StringsRewriter::rewriteStrToInt(Node node) |
94 |
|
{ |
95 |
1010 |
Assert(node.getKind() == STRING_STOI); |
96 |
1010 |
NodeManager* nm = NodeManager::currentNM(); |
97 |
1010 |
if (node[0].isConst()) |
98 |
|
{ |
99 |
538 |
Node ret; |
100 |
538 |
String s = node[0].getConst<String>(); |
101 |
269 |
if (s.isNumber()) |
102 |
|
{ |
103 |
124 |
ret = nm->mkConst(s.toNumber()); |
104 |
|
} |
105 |
|
else |
106 |
|
{ |
107 |
145 |
ret = nm->mkConst(Rational(-1)); |
108 |
|
} |
109 |
269 |
return returnRewrite(node, ret, Rewrite::STOI_EVAL); |
110 |
|
} |
111 |
741 |
else if (node[0].getKind() == STRING_CONCAT) |
112 |
|
{ |
113 |
1207 |
for (TNode nc : node[0]) |
114 |
|
{ |
115 |
927 |
if (nc.isConst()) |
116 |
|
{ |
117 |
205 |
String t = nc.getConst<String>(); |
118 |
113 |
if (!t.isNumber()) |
119 |
|
{ |
120 |
42 |
Node ret = nm->mkConst(Rational(-1)); |
121 |
21 |
return returnRewrite(node, ret, Rewrite::STOI_CONCAT_NONNUM); |
122 |
|
} |
123 |
|
} |
124 |
|
} |
125 |
|
} |
126 |
720 |
return node; |
127 |
|
} |
128 |
|
|
129 |
619 |
Node StringsRewriter::rewriteIntToStr(Node node) |
130 |
|
{ |
131 |
619 |
Assert(node.getKind() == STRING_ITOS); |
132 |
619 |
NodeManager* nm = NodeManager::currentNM(); |
133 |
619 |
if (node[0].isConst()) |
134 |
|
{ |
135 |
550 |
Node ret; |
136 |
275 |
if (node[0].getConst<Rational>().sgn() == -1) |
137 |
|
{ |
138 |
126 |
ret = nm->mkConst(String("")); |
139 |
|
} |
140 |
|
else |
141 |
|
{ |
142 |
298 |
std::string stmp = node[0].getConst<Rational>().getNumerator().toString(); |
143 |
149 |
Assert(stmp[0] != '-'); |
144 |
149 |
ret = nm->mkConst(String(stmp)); |
145 |
|
} |
146 |
275 |
return returnRewrite(node, ret, Rewrite::ITOS_EVAL); |
147 |
|
} |
148 |
344 |
return node; |
149 |
|
} |
150 |
|
|
151 |
208 |
Node StringsRewriter::rewriteStrConvert(Node node) |
152 |
|
{ |
153 |
208 |
Kind nk = node.getKind(); |
154 |
208 |
Assert(nk == STRING_TOLOWER || nk == STRING_TOUPPER); |
155 |
208 |
NodeManager* nm = NodeManager::currentNM(); |
156 |
208 |
if (node[0].isConst()) |
157 |
|
{ |
158 |
52 |
std::vector<unsigned> nvec = node[0].getConst<String>().getVec(); |
159 |
96 |
for (unsigned i = 0, nvsize = nvec.size(); i < nvsize; i++) |
160 |
|
{ |
161 |
70 |
unsigned newChar = nvec[i]; |
162 |
|
// transform it |
163 |
|
// upper 65 ... 90 |
164 |
|
// lower 97 ... 122 |
165 |
70 |
if (nk == STRING_TOUPPER) |
166 |
|
{ |
167 |
36 |
if (newChar >= 97 && newChar <= 122) |
168 |
|
{ |
169 |
22 |
newChar = newChar - 32; |
170 |
|
} |
171 |
|
} |
172 |
34 |
else if (nk == STRING_TOLOWER) |
173 |
|
{ |
174 |
34 |
if (newChar >= 65 && newChar <= 90) |
175 |
|
{ |
176 |
25 |
newChar = newChar + 32; |
177 |
|
} |
178 |
|
} |
179 |
70 |
nvec[i] = newChar; |
180 |
|
} |
181 |
52 |
Node retNode = nm->mkConst(String(nvec)); |
182 |
26 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_CONST); |
183 |
|
} |
184 |
182 |
else if (node[0].getKind() == STRING_CONCAT) |
185 |
|
{ |
186 |
92 |
NodeBuilder concatBuilder(STRING_CONCAT); |
187 |
198 |
for (const Node& nc : node[0]) |
188 |
|
{ |
189 |
152 |
concatBuilder << nm->mkNode(nk, nc); |
190 |
|
} |
191 |
|
// tolower( x1 ++ x2 ) --> tolower( x1 ) ++ tolower( x2 ) |
192 |
92 |
Node retNode = concatBuilder.constructNode(); |
193 |
46 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_MINSCOPE_CONCAT); |
194 |
|
} |
195 |
408 |
else if (node[0].getKind() == STRING_TOLOWER |
196 |
408 |
|| node[0].getKind() == STRING_TOUPPER) |
197 |
|
{ |
198 |
|
// tolower( tolower( x ) ) --> tolower( x ) |
199 |
|
// tolower( toupper( x ) ) --> tolower( x ) |
200 |
16 |
Node retNode = nm->mkNode(nk, node[0][0]); |
201 |
8 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_IDEM); |
202 |
|
} |
203 |
128 |
else if (node[0].getKind() == STRING_ITOS) |
204 |
|
{ |
205 |
|
// tolower( str.from.int( x ) ) --> str.from.int( x ) |
206 |
4 |
return returnRewrite(node, node[0], Rewrite::STR_CONV_ITOS); |
207 |
|
} |
208 |
124 |
return node; |
209 |
|
} |
210 |
|
|
211 |
23 |
Node StringsRewriter::rewriteStringLt(Node n) |
212 |
|
{ |
213 |
23 |
Assert(n.getKind() == kind::STRING_LT); |
214 |
23 |
NodeManager* nm = NodeManager::currentNM(); |
215 |
|
// eliminate s < t ---> s != t AND s <= t |
216 |
|
Node retNode = nm->mkNode( |
217 |
46 |
AND, n[0].eqNode(n[1]).negate(), nm->mkNode(STRING_LEQ, n[0], n[1])); |
218 |
46 |
return returnRewrite(n, retNode, Rewrite::STR_LT_ELIM); |
219 |
|
} |
220 |
|
|
221 |
404 |
Node StringsRewriter::rewriteStringLeq(Node n) |
222 |
|
{ |
223 |
404 |
Assert(n.getKind() == kind::STRING_LEQ); |
224 |
404 |
NodeManager* nm = NodeManager::currentNM(); |
225 |
404 |
if (n[0] == n[1]) |
226 |
|
{ |
227 |
56 |
Node ret = nm->mkConst(true); |
228 |
28 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_ID); |
229 |
|
} |
230 |
376 |
if (n[0].isConst() && n[1].isConst()) |
231 |
|
{ |
232 |
88 |
String s = n[0].getConst<String>(); |
233 |
88 |
String t = n[1].getConst<String>(); |
234 |
88 |
Node ret = nm->mkConst(s.isLeq(t)); |
235 |
44 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_EVAL); |
236 |
|
} |
237 |
|
// empty strings |
238 |
967 |
for (unsigned i = 0; i < 2; i++) |
239 |
|
{ |
240 |
656 |
if (n[i].isConst() && n[i].getConst<String>().empty()) |
241 |
|
{ |
242 |
42 |
Node ret = i == 0 ? nm->mkConst(true) : n[0].eqNode(n[1]); |
243 |
21 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_EMPTY); |
244 |
|
} |
245 |
|
} |
246 |
|
|
247 |
622 |
std::vector<Node> n1; |
248 |
311 |
utils::getConcat(n[0], n1); |
249 |
622 |
std::vector<Node> n2; |
250 |
311 |
utils::getConcat(n[1], n2); |
251 |
311 |
Assert(!n1.empty() && !n2.empty()); |
252 |
|
|
253 |
|
// constant prefixes |
254 |
311 |
if (n1[0].isConst() && n2[0].isConst() && n1[0] != n2[0]) |
255 |
|
{ |
256 |
111 |
String s = n1[0].getConst<String>(); |
257 |
111 |
String t = n2[0].getConst<String>(); |
258 |
71 |
size_t prefixLen = std::min(s.size(), t.size()); |
259 |
71 |
s = s.prefix(prefixLen); |
260 |
71 |
t = t.prefix(prefixLen); |
261 |
|
// if the prefixes are not the same, then we can already decide the outcome |
262 |
71 |
if (s != t) |
263 |
|
{ |
264 |
62 |
Node ret = nm->mkConst(s.isLeq(t)); |
265 |
31 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_CPREFIX); |
266 |
|
} |
267 |
|
} |
268 |
280 |
return n; |
269 |
|
} |
270 |
|
|
271 |
120 |
Node StringsRewriter::rewriteStringFromCode(Node n) |
272 |
|
{ |
273 |
120 |
Assert(n.getKind() == kind::STRING_FROM_CODE); |
274 |
120 |
NodeManager* nm = NodeManager::currentNM(); |
275 |
|
|
276 |
120 |
if (n[0].isConst()) |
277 |
|
{ |
278 |
48 |
Integer i = n[0].getConst<Rational>().getNumerator(); |
279 |
48 |
Node ret; |
280 |
24 |
if (i >= 0 && i < d_alphaCard) |
281 |
|
{ |
282 |
22 |
std::vector<unsigned> svec = {i.toUnsignedInt()}; |
283 |
11 |
ret = nm->mkConst(String(svec)); |
284 |
|
} |
285 |
|
else |
286 |
|
{ |
287 |
13 |
ret = nm->mkConst(String("")); |
288 |
|
} |
289 |
24 |
return returnRewrite(n, ret, Rewrite::FROM_CODE_EVAL); |
290 |
|
} |
291 |
96 |
return n; |
292 |
|
} |
293 |
|
|
294 |
6144 |
Node StringsRewriter::rewriteStringToCode(Node n) |
295 |
|
{ |
296 |
6144 |
Assert(n.getKind() == kind::STRING_TO_CODE); |
297 |
6144 |
if (n[0].isConst()) |
298 |
|
{ |
299 |
1500 |
NodeManager* nm = NodeManager::currentNM(); |
300 |
3000 |
String s = n[0].getConst<String>(); |
301 |
3000 |
Node ret; |
302 |
1500 |
if (s.size() == 1) |
303 |
|
{ |
304 |
1296 |
std::vector<unsigned> vec = s.getVec(); |
305 |
648 |
Assert(vec.size() == 1); |
306 |
648 |
ret = nm->mkConst(Rational(vec[0])); |
307 |
|
} |
308 |
|
else |
309 |
|
{ |
310 |
852 |
ret = nm->mkConst(Rational(-1)); |
311 |
|
} |
312 |
1500 |
return returnRewrite(n, ret, Rewrite::TO_CODE_EVAL); |
313 |
|
} |
314 |
4644 |
return n; |
315 |
|
} |
316 |
|
|
317 |
8 |
Node StringsRewriter::rewriteStringIsDigit(Node n) |
318 |
|
{ |
319 |
8 |
Assert(n.getKind() == kind::STRING_IS_DIGIT); |
320 |
8 |
NodeManager* nm = NodeManager::currentNM(); |
321 |
|
// eliminate str.is_digit(s) ----> 48 <= str.to_code(s) <= 57 |
322 |
16 |
Node t = nm->mkNode(STRING_TO_CODE, n[0]); |
323 |
|
Node retNode = nm->mkNode(AND, |
324 |
16 |
nm->mkNode(LEQ, nm->mkConst(Rational(48)), t), |
325 |
32 |
nm->mkNode(LEQ, t, nm->mkConst(Rational(57)))); |
326 |
16 |
return returnRewrite(n, retNode, Rewrite::IS_DIGIT_ELIM); |
327 |
|
} |
328 |
|
|
329 |
|
} // namespace strings |
330 |
|
} // namespace theory |
331 |
31137 |
} // namespace cvc5 |