1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Gereon Kremer |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of rewrite rules for string-specific operators in |
14 |
|
* theory of strings. |
15 |
|
*/ |
16 |
|
|
17 |
|
#include "theory/strings/strings_rewriter.h" |
18 |
|
|
19 |
|
#include "expr/node_builder.h" |
20 |
|
#include "theory/strings/theory_strings_utils.h" |
21 |
|
#include "util/rational.h" |
22 |
|
#include "util/string.h" |
23 |
|
|
24 |
|
using namespace cvc5::kind; |
25 |
|
|
26 |
|
namespace cvc5 { |
27 |
|
namespace theory { |
28 |
|
namespace strings { |
29 |
|
|
30 |
9952 |
StringsRewriter::StringsRewriter(HistogramStat<Rewrite>* statistics) |
31 |
9952 |
: SequencesRewriter(statistics) |
32 |
|
{ |
33 |
9952 |
} |
34 |
|
|
35 |
617526 |
RewriteResponse StringsRewriter::postRewrite(TNode node) |
36 |
|
{ |
37 |
1235052 |
Trace("strings-postrewrite") |
38 |
617526 |
<< "Strings::StringsRewriter::postRewrite start " << node << std::endl; |
39 |
|
|
40 |
1235052 |
Node retNode = node; |
41 |
617526 |
Kind nk = node.getKind(); |
42 |
617526 |
if (nk == kind::STRING_LT) |
43 |
|
{ |
44 |
17 |
retNode = rewriteStringLt(node); |
45 |
|
} |
46 |
617509 |
else if (nk == kind::STRING_LEQ) |
47 |
|
{ |
48 |
366 |
retNode = rewriteStringLeq(node); |
49 |
|
} |
50 |
617143 |
else if (nk == STRING_TOLOWER || nk == STRING_TOUPPER) |
51 |
|
{ |
52 |
212 |
retNode = rewriteStrConvert(node); |
53 |
|
} |
54 |
616931 |
else if (nk == STRING_IS_DIGIT) |
55 |
|
{ |
56 |
8 |
retNode = rewriteStringIsDigit(node); |
57 |
|
} |
58 |
616923 |
else if (nk == kind::STRING_ITOS) |
59 |
|
{ |
60 |
573 |
retNode = rewriteIntToStr(node); |
61 |
|
} |
62 |
616350 |
else if (nk == kind::STRING_STOI) |
63 |
|
{ |
64 |
1266 |
retNode = rewriteStrToInt(node); |
65 |
|
} |
66 |
615084 |
else if (nk == STRING_TO_CODE) |
67 |
|
{ |
68 |
6643 |
retNode = rewriteStringToCode(node); |
69 |
|
} |
70 |
608441 |
else if (nk == STRING_FROM_CODE) |
71 |
|
{ |
72 |
108 |
retNode = rewriteStringFromCode(node); |
73 |
|
} |
74 |
|
else |
75 |
|
{ |
76 |
608333 |
return SequencesRewriter::postRewrite(node); |
77 |
|
} |
78 |
|
|
79 |
18386 |
Trace("strings-postrewrite") |
80 |
9193 |
<< "Strings::StringsRewriter::postRewrite returning " << retNode |
81 |
9193 |
<< std::endl; |
82 |
9193 |
if (node != retNode) |
83 |
|
{ |
84 |
5578 |
Trace("strings-rewrite-debug") << "Strings::StringsRewriter::postRewrite " |
85 |
2789 |
<< node << " to " << retNode << std::endl; |
86 |
2789 |
return RewriteResponse(REWRITE_AGAIN_FULL, retNode); |
87 |
|
} |
88 |
6404 |
return RewriteResponse(REWRITE_DONE, retNode); |
89 |
|
} |
90 |
|
|
91 |
1266 |
Node StringsRewriter::rewriteStrToInt(Node node) |
92 |
|
{ |
93 |
1266 |
Assert(node.getKind() == STRING_STOI); |
94 |
1266 |
NodeManager* nm = NodeManager::currentNM(); |
95 |
1266 |
if (node[0].isConst()) |
96 |
|
{ |
97 |
682 |
Node ret; |
98 |
682 |
String s = node[0].getConst<String>(); |
99 |
341 |
if (s.isNumber()) |
100 |
|
{ |
101 |
153 |
ret = nm->mkConst(s.toNumber()); |
102 |
|
} |
103 |
|
else |
104 |
|
{ |
105 |
188 |
ret = nm->mkConst(Rational(-1)); |
106 |
|
} |
107 |
341 |
return returnRewrite(node, ret, Rewrite::STOI_EVAL); |
108 |
|
} |
109 |
925 |
else if (node[0].getKind() == STRING_CONCAT) |
110 |
|
{ |
111 |
1441 |
for (TNode nc : node[0]) |
112 |
|
{ |
113 |
1091 |
if (nc.isConst()) |
114 |
|
{ |
115 |
287 |
String t = nc.getConst<String>(); |
116 |
165 |
if (!t.isNumber()) |
117 |
|
{ |
118 |
86 |
Node ret = nm->mkConst(Rational(-1)); |
119 |
43 |
return returnRewrite(node, ret, Rewrite::STOI_CONCAT_NONNUM); |
120 |
|
} |
121 |
|
} |
122 |
|
} |
123 |
|
} |
124 |
882 |
return node; |
125 |
|
} |
126 |
|
|
127 |
573 |
Node StringsRewriter::rewriteIntToStr(Node node) |
128 |
|
{ |
129 |
573 |
Assert(node.getKind() == STRING_ITOS); |
130 |
573 |
NodeManager* nm = NodeManager::currentNM(); |
131 |
573 |
if (node[0].isConst()) |
132 |
|
{ |
133 |
530 |
Node ret; |
134 |
265 |
if (node[0].getConst<Rational>().sgn() == -1) |
135 |
|
{ |
136 |
129 |
ret = nm->mkConst(String("")); |
137 |
|
} |
138 |
|
else |
139 |
|
{ |
140 |
272 |
std::string stmp = node[0].getConst<Rational>().getNumerator().toString(); |
141 |
136 |
Assert(stmp[0] != '-'); |
142 |
136 |
ret = nm->mkConst(String(stmp)); |
143 |
|
} |
144 |
265 |
return returnRewrite(node, ret, Rewrite::ITOS_EVAL); |
145 |
|
} |
146 |
308 |
return node; |
147 |
|
} |
148 |
|
|
149 |
212 |
Node StringsRewriter::rewriteStrConvert(Node node) |
150 |
|
{ |
151 |
212 |
Kind nk = node.getKind(); |
152 |
212 |
Assert(nk == STRING_TOLOWER || nk == STRING_TOUPPER); |
153 |
212 |
NodeManager* nm = NodeManager::currentNM(); |
154 |
212 |
if (node[0].isConst()) |
155 |
|
{ |
156 |
52 |
std::vector<unsigned> nvec = node[0].getConst<String>().getVec(); |
157 |
96 |
for (unsigned i = 0, nvsize = nvec.size(); i < nvsize; i++) |
158 |
|
{ |
159 |
70 |
unsigned newChar = nvec[i]; |
160 |
|
// transform it |
161 |
|
// upper 65 ... 90 |
162 |
|
// lower 97 ... 122 |
163 |
70 |
if (nk == STRING_TOUPPER) |
164 |
|
{ |
165 |
36 |
if (newChar >= 97 && newChar <= 122) |
166 |
|
{ |
167 |
22 |
newChar = newChar - 32; |
168 |
|
} |
169 |
|
} |
170 |
34 |
else if (nk == STRING_TOLOWER) |
171 |
|
{ |
172 |
34 |
if (newChar >= 65 && newChar <= 90) |
173 |
|
{ |
174 |
25 |
newChar = newChar + 32; |
175 |
|
} |
176 |
|
} |
177 |
70 |
nvec[i] = newChar; |
178 |
|
} |
179 |
52 |
Node retNode = nm->mkConst(String(nvec)); |
180 |
26 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_CONST); |
181 |
|
} |
182 |
186 |
else if (node[0].getKind() == STRING_CONCAT) |
183 |
|
{ |
184 |
100 |
NodeBuilder concatBuilder(STRING_CONCAT); |
185 |
222 |
for (const Node& nc : node[0]) |
186 |
|
{ |
187 |
172 |
concatBuilder << nm->mkNode(nk, nc); |
188 |
|
} |
189 |
|
// tolower( x1 ++ x2 ) --> tolower( x1 ) ++ tolower( x2 ) |
190 |
100 |
Node retNode = concatBuilder.constructNode(); |
191 |
50 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_MINSCOPE_CONCAT); |
192 |
|
} |
193 |
408 |
else if (node[0].getKind() == STRING_TOLOWER |
194 |
408 |
|| node[0].getKind() == STRING_TOUPPER) |
195 |
|
{ |
196 |
|
// tolower( tolower( x ) ) --> tolower( x ) |
197 |
|
// tolower( toupper( x ) ) --> tolower( x ) |
198 |
16 |
Node retNode = nm->mkNode(nk, node[0][0]); |
199 |
8 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_IDEM); |
200 |
|
} |
201 |
128 |
else if (node[0].getKind() == STRING_ITOS) |
202 |
|
{ |
203 |
|
// tolower( str.from.int( x ) ) --> str.from.int( x ) |
204 |
4 |
return returnRewrite(node, node[0], Rewrite::STR_CONV_ITOS); |
205 |
|
} |
206 |
124 |
return node; |
207 |
|
} |
208 |
|
|
209 |
17 |
Node StringsRewriter::rewriteStringLt(Node n) |
210 |
|
{ |
211 |
17 |
Assert(n.getKind() == kind::STRING_LT); |
212 |
17 |
NodeManager* nm = NodeManager::currentNM(); |
213 |
|
// eliminate s < t ---> s != t AND s <= t |
214 |
|
Node retNode = nm->mkNode( |
215 |
34 |
AND, n[0].eqNode(n[1]).negate(), nm->mkNode(STRING_LEQ, n[0], n[1])); |
216 |
34 |
return returnRewrite(n, retNode, Rewrite::STR_LT_ELIM); |
217 |
|
} |
218 |
|
|
219 |
366 |
Node StringsRewriter::rewriteStringLeq(Node n) |
220 |
|
{ |
221 |
366 |
Assert(n.getKind() == kind::STRING_LEQ); |
222 |
366 |
NodeManager* nm = NodeManager::currentNM(); |
223 |
366 |
if (n[0] == n[1]) |
224 |
|
{ |
225 |
42 |
Node ret = nm->mkConst(true); |
226 |
21 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_ID); |
227 |
|
} |
228 |
345 |
if (n[0].isConst() && n[1].isConst()) |
229 |
|
{ |
230 |
90 |
String s = n[0].getConst<String>(); |
231 |
90 |
String t = n[1].getConst<String>(); |
232 |
90 |
Node ret = nm->mkConst(s.isLeq(t)); |
233 |
45 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_EVAL); |
234 |
|
} |
235 |
|
// empty strings |
236 |
880 |
for (unsigned i = 0; i < 2; i++) |
237 |
|
{ |
238 |
595 |
if (n[i].isConst() && n[i].getConst<String>().empty()) |
239 |
|
{ |
240 |
30 |
Node ret = i == 0 ? nm->mkConst(true) : n[0].eqNode(n[1]); |
241 |
15 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_EMPTY); |
242 |
|
} |
243 |
|
} |
244 |
|
|
245 |
570 |
std::vector<Node> n1; |
246 |
285 |
utils::getConcat(n[0], n1); |
247 |
570 |
std::vector<Node> n2; |
248 |
285 |
utils::getConcat(n[1], n2); |
249 |
285 |
Assert(!n1.empty() && !n2.empty()); |
250 |
|
|
251 |
|
// constant prefixes |
252 |
285 |
if (n1[0].isConst() && n2[0].isConst() && n1[0] != n2[0]) |
253 |
|
{ |
254 |
111 |
String s = n1[0].getConst<String>(); |
255 |
111 |
String t = n2[0].getConst<String>(); |
256 |
71 |
size_t prefixLen = std::min(s.size(), t.size()); |
257 |
71 |
s = s.prefix(prefixLen); |
258 |
71 |
t = t.prefix(prefixLen); |
259 |
|
// if the prefixes are not the same, then we can already decide the outcome |
260 |
71 |
if (s != t) |
261 |
|
{ |
262 |
62 |
Node ret = nm->mkConst(s.isLeq(t)); |
263 |
31 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_CPREFIX); |
264 |
|
} |
265 |
|
} |
266 |
254 |
return n; |
267 |
|
} |
268 |
|
|
269 |
108 |
Node StringsRewriter::rewriteStringFromCode(Node n) |
270 |
|
{ |
271 |
108 |
Assert(n.getKind() == kind::STRING_FROM_CODE); |
272 |
108 |
NodeManager* nm = NodeManager::currentNM(); |
273 |
|
|
274 |
108 |
if (n[0].isConst()) |
275 |
|
{ |
276 |
44 |
Integer i = n[0].getConst<Rational>().getNumerator(); |
277 |
44 |
Node ret; |
278 |
22 |
if (i >= 0 && i < strings::utils::getAlphabetCardinality()) |
279 |
|
{ |
280 |
18 |
std::vector<unsigned> svec = {i.toUnsignedInt()}; |
281 |
9 |
ret = nm->mkConst(String(svec)); |
282 |
|
} |
283 |
|
else |
284 |
|
{ |
285 |
13 |
ret = nm->mkConst(String("")); |
286 |
|
} |
287 |
22 |
return returnRewrite(n, ret, Rewrite::FROM_CODE_EVAL); |
288 |
|
} |
289 |
86 |
return n; |
290 |
|
} |
291 |
|
|
292 |
6643 |
Node StringsRewriter::rewriteStringToCode(Node n) |
293 |
|
{ |
294 |
6643 |
Assert(n.getKind() == kind::STRING_TO_CODE); |
295 |
6643 |
if (n[0].isConst()) |
296 |
|
{ |
297 |
1893 |
NodeManager* nm = NodeManager::currentNM(); |
298 |
3786 |
String s = n[0].getConst<String>(); |
299 |
3786 |
Node ret; |
300 |
1893 |
if (s.size() == 1) |
301 |
|
{ |
302 |
1452 |
std::vector<unsigned> vec = s.getVec(); |
303 |
726 |
Assert(vec.size() == 1); |
304 |
726 |
ret = nm->mkConst(Rational(vec[0])); |
305 |
|
} |
306 |
|
else |
307 |
|
{ |
308 |
1167 |
ret = nm->mkConst(Rational(-1)); |
309 |
|
} |
310 |
1893 |
return returnRewrite(n, ret, Rewrite::TO_CODE_EVAL); |
311 |
|
} |
312 |
4750 |
return n; |
313 |
|
} |
314 |
|
|
315 |
8 |
Node StringsRewriter::rewriteStringIsDigit(Node n) |
316 |
|
{ |
317 |
8 |
Assert(n.getKind() == kind::STRING_IS_DIGIT); |
318 |
8 |
NodeManager* nm = NodeManager::currentNM(); |
319 |
|
// eliminate str.is_digit(s) ----> 48 <= str.to_code(s) <= 57 |
320 |
16 |
Node t = nm->mkNode(STRING_TO_CODE, n[0]); |
321 |
|
Node retNode = nm->mkNode(AND, |
322 |
16 |
nm->mkNode(LEQ, nm->mkConst(Rational(48)), t), |
323 |
32 |
nm->mkNode(LEQ, t, nm->mkConst(Rational(57)))); |
324 |
16 |
return returnRewrite(n, retNode, Rewrite::IS_DIGIT_ELIM); |
325 |
|
} |
326 |
|
|
327 |
|
} // namespace strings |
328 |
|
} // namespace theory |
329 |
29574 |
} // namespace cvc5 |