1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Gereon Kremer |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of rewrite rules for string-specific operators in |
14 |
|
* theory of strings. |
15 |
|
*/ |
16 |
|
|
17 |
|
#include "theory/strings/strings_rewriter.h" |
18 |
|
|
19 |
|
#include "expr/node_builder.h" |
20 |
|
#include "theory/strings/theory_strings_utils.h" |
21 |
|
#include "util/rational.h" |
22 |
|
|
23 |
|
using namespace cvc5::kind; |
24 |
|
|
25 |
|
namespace cvc5 { |
26 |
|
namespace theory { |
27 |
|
namespace strings { |
28 |
|
|
29 |
9471 |
StringsRewriter::StringsRewriter(HistogramStat<Rewrite>* statistics) |
30 |
9471 |
: SequencesRewriter(statistics) |
31 |
|
{ |
32 |
9471 |
} |
33 |
|
|
34 |
387050 |
RewriteResponse StringsRewriter::postRewrite(TNode node) |
35 |
|
{ |
36 |
774100 |
Trace("strings-postrewrite") |
37 |
387050 |
<< "Strings::StringsRewriter::postRewrite start " << node << std::endl; |
38 |
|
|
39 |
774100 |
Node retNode = node; |
40 |
387050 |
Kind nk = node.getKind(); |
41 |
387050 |
if (nk == kind::STRING_LT) |
42 |
|
{ |
43 |
17 |
retNode = rewriteStringLt(node); |
44 |
|
} |
45 |
387033 |
else if (nk == kind::STRING_LEQ) |
46 |
|
{ |
47 |
343 |
retNode = rewriteStringLeq(node); |
48 |
|
} |
49 |
386690 |
else if (nk == STRING_TOLOWER || nk == STRING_TOUPPER) |
50 |
|
{ |
51 |
216 |
retNode = rewriteStrConvert(node); |
52 |
|
} |
53 |
386474 |
else if (nk == STRING_IS_DIGIT) |
54 |
|
{ |
55 |
8 |
retNode = rewriteStringIsDigit(node); |
56 |
|
} |
57 |
386466 |
else if (nk == kind::STRING_ITOS) |
58 |
|
{ |
59 |
559 |
retNode = rewriteIntToStr(node); |
60 |
|
} |
61 |
385907 |
else if (nk == kind::STRING_STOI) |
62 |
|
{ |
63 |
471 |
retNode = rewriteStrToInt(node); |
64 |
|
} |
65 |
385436 |
else if (nk == STRING_TO_CODE) |
66 |
|
{ |
67 |
3712 |
retNode = rewriteStringToCode(node); |
68 |
|
} |
69 |
381724 |
else if (nk == STRING_FROM_CODE) |
70 |
|
{ |
71 |
84 |
retNode = rewriteStringFromCode(node); |
72 |
|
} |
73 |
|
else |
74 |
|
{ |
75 |
381640 |
return SequencesRewriter::postRewrite(node); |
76 |
|
} |
77 |
|
|
78 |
10820 |
Trace("strings-postrewrite") |
79 |
5410 |
<< "Strings::StringsRewriter::postRewrite returning " << retNode |
80 |
5410 |
<< std::endl; |
81 |
5410 |
if (node != retNode) |
82 |
|
{ |
83 |
3236 |
Trace("strings-rewrite-debug") << "Strings::StringsRewriter::postRewrite " |
84 |
1618 |
<< node << " to " << retNode << std::endl; |
85 |
1618 |
return RewriteResponse(REWRITE_AGAIN_FULL, retNode); |
86 |
|
} |
87 |
3792 |
return RewriteResponse(REWRITE_DONE, retNode); |
88 |
|
} |
89 |
|
|
90 |
471 |
Node StringsRewriter::rewriteStrToInt(Node node) |
91 |
|
{ |
92 |
471 |
Assert(node.getKind() == STRING_STOI); |
93 |
471 |
NodeManager* nm = NodeManager::currentNM(); |
94 |
471 |
if (node[0].isConst()) |
95 |
|
{ |
96 |
382 |
Node ret; |
97 |
382 |
String s = node[0].getConst<String>(); |
98 |
191 |
if (s.isNumber()) |
99 |
|
{ |
100 |
91 |
ret = nm->mkConst(s.toNumber()); |
101 |
|
} |
102 |
|
else |
103 |
|
{ |
104 |
100 |
ret = nm->mkConst(Rational(-1)); |
105 |
|
} |
106 |
191 |
return returnRewrite(node, ret, Rewrite::STOI_EVAL); |
107 |
|
} |
108 |
280 |
else if (node[0].getKind() == STRING_CONCAT) |
109 |
|
{ |
110 |
752 |
for (TNode nc : node[0]) |
111 |
|
{ |
112 |
624 |
if (nc.isConst()) |
113 |
|
{ |
114 |
36 |
String t = nc.getConst<String>(); |
115 |
20 |
if (!t.isNumber()) |
116 |
|
{ |
117 |
8 |
Node ret = nm->mkConst(Rational(-1)); |
118 |
4 |
return returnRewrite(node, ret, Rewrite::STOI_CONCAT_NONNUM); |
119 |
|
} |
120 |
|
} |
121 |
|
} |
122 |
|
} |
123 |
276 |
return node; |
124 |
|
} |
125 |
|
|
126 |
559 |
Node StringsRewriter::rewriteIntToStr(Node node) |
127 |
|
{ |
128 |
559 |
Assert(node.getKind() == STRING_ITOS); |
129 |
559 |
NodeManager* nm = NodeManager::currentNM(); |
130 |
559 |
if (node[0].isConst()) |
131 |
|
{ |
132 |
526 |
Node ret; |
133 |
263 |
if (node[0].getConst<Rational>().sgn() == -1) |
134 |
|
{ |
135 |
129 |
ret = nm->mkConst(String("")); |
136 |
|
} |
137 |
|
else |
138 |
|
{ |
139 |
268 |
std::string stmp = node[0].getConst<Rational>().getNumerator().toString(); |
140 |
134 |
Assert(stmp[0] != '-'); |
141 |
134 |
ret = nm->mkConst(String(stmp)); |
142 |
|
} |
143 |
263 |
return returnRewrite(node, ret, Rewrite::ITOS_EVAL); |
144 |
|
} |
145 |
296 |
return node; |
146 |
|
} |
147 |
|
|
148 |
216 |
Node StringsRewriter::rewriteStrConvert(Node node) |
149 |
|
{ |
150 |
216 |
Kind nk = node.getKind(); |
151 |
216 |
Assert(nk == STRING_TOLOWER || nk == STRING_TOUPPER); |
152 |
216 |
NodeManager* nm = NodeManager::currentNM(); |
153 |
216 |
if (node[0].isConst()) |
154 |
|
{ |
155 |
44 |
std::vector<unsigned> nvec = node[0].getConst<String>().getVec(); |
156 |
74 |
for (unsigned i = 0, nvsize = nvec.size(); i < nvsize; i++) |
157 |
|
{ |
158 |
52 |
unsigned newChar = nvec[i]; |
159 |
|
// transform it |
160 |
|
// upper 65 ... 90 |
161 |
|
// lower 97 ... 122 |
162 |
52 |
if (nk == STRING_TOUPPER) |
163 |
|
{ |
164 |
18 |
if (newChar >= 97 && newChar <= 122) |
165 |
|
{ |
166 |
16 |
newChar = newChar - 32; |
167 |
|
} |
168 |
|
} |
169 |
34 |
else if (nk == STRING_TOLOWER) |
170 |
|
{ |
171 |
34 |
if (newChar >= 65 && newChar <= 90) |
172 |
|
{ |
173 |
25 |
newChar = newChar + 32; |
174 |
|
} |
175 |
|
} |
176 |
52 |
nvec[i] = newChar; |
177 |
|
} |
178 |
44 |
Node retNode = nm->mkConst(String(nvec)); |
179 |
22 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_CONST); |
180 |
|
} |
181 |
194 |
else if (node[0].getKind() == STRING_CONCAT) |
182 |
|
{ |
183 |
108 |
NodeBuilder concatBuilder(STRING_CONCAT); |
184 |
236 |
for (const Node& nc : node[0]) |
185 |
|
{ |
186 |
182 |
concatBuilder << nm->mkNode(nk, nc); |
187 |
|
} |
188 |
|
// tolower( x1 ++ x2 ) --> tolower( x1 ) ++ tolower( x2 ) |
189 |
108 |
Node retNode = concatBuilder.constructNode(); |
190 |
54 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_MINSCOPE_CONCAT); |
191 |
|
} |
192 |
420 |
else if (node[0].getKind() == STRING_TOLOWER |
193 |
420 |
|| node[0].getKind() == STRING_TOUPPER) |
194 |
|
{ |
195 |
|
// tolower( tolower( x ) ) --> tolower( x ) |
196 |
|
// tolower( toupper( x ) ) --> tolower( x ) |
197 |
16 |
Node retNode = nm->mkNode(nk, node[0][0]); |
198 |
8 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_IDEM); |
199 |
|
} |
200 |
132 |
else if (node[0].getKind() == STRING_ITOS) |
201 |
|
{ |
202 |
|
// tolower( str.from.int( x ) ) --> str.from.int( x ) |
203 |
4 |
return returnRewrite(node, node[0], Rewrite::STR_CONV_ITOS); |
204 |
|
} |
205 |
128 |
return node; |
206 |
|
} |
207 |
|
|
208 |
17 |
Node StringsRewriter::rewriteStringLt(Node n) |
209 |
|
{ |
210 |
17 |
Assert(n.getKind() == kind::STRING_LT); |
211 |
17 |
NodeManager* nm = NodeManager::currentNM(); |
212 |
|
// eliminate s < t ---> s != t AND s <= t |
213 |
|
Node retNode = nm->mkNode( |
214 |
34 |
AND, n[0].eqNode(n[1]).negate(), nm->mkNode(STRING_LEQ, n[0], n[1])); |
215 |
34 |
return returnRewrite(n, retNode, Rewrite::STR_LT_ELIM); |
216 |
|
} |
217 |
|
|
218 |
343 |
Node StringsRewriter::rewriteStringLeq(Node n) |
219 |
|
{ |
220 |
343 |
Assert(n.getKind() == kind::STRING_LEQ); |
221 |
343 |
NodeManager* nm = NodeManager::currentNM(); |
222 |
343 |
if (n[0] == n[1]) |
223 |
|
{ |
224 |
42 |
Node ret = nm->mkConst(true); |
225 |
21 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_ID); |
226 |
|
} |
227 |
322 |
if (n[0].isConst() && n[1].isConst()) |
228 |
|
{ |
229 |
66 |
String s = n[0].getConst<String>(); |
230 |
66 |
String t = n[1].getConst<String>(); |
231 |
66 |
Node ret = nm->mkConst(s.isLeq(t)); |
232 |
33 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_EVAL); |
233 |
|
} |
234 |
|
// empty strings |
235 |
849 |
for (unsigned i = 0; i < 2; i++) |
236 |
|
{ |
237 |
573 |
if (n[i].isConst() && n[i].getConst<String>().empty()) |
238 |
|
{ |
239 |
26 |
Node ret = i == 0 ? nm->mkConst(true) : n[0].eqNode(n[1]); |
240 |
13 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_EMPTY); |
241 |
|
} |
242 |
|
} |
243 |
|
|
244 |
552 |
std::vector<Node> n1; |
245 |
276 |
utils::getConcat(n[0], n1); |
246 |
552 |
std::vector<Node> n2; |
247 |
276 |
utils::getConcat(n[1], n2); |
248 |
276 |
Assert(!n1.empty() && !n2.empty()); |
249 |
|
|
250 |
|
// constant prefixes |
251 |
276 |
if (n1[0].isConst() && n2[0].isConst() && n1[0] != n2[0]) |
252 |
|
{ |
253 |
136 |
String s = n1[0].getConst<String>(); |
254 |
136 |
String t = n2[0].getConst<String>(); |
255 |
92 |
size_t prefixLen = std::min(s.size(), t.size()); |
256 |
92 |
s = s.prefix(prefixLen); |
257 |
92 |
t = t.prefix(prefixLen); |
258 |
|
// if the prefixes are not the same, then we can already decide the outcome |
259 |
92 |
if (s != t) |
260 |
|
{ |
261 |
96 |
Node ret = nm->mkConst(s.isLeq(t)); |
262 |
48 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_CPREFIX); |
263 |
|
} |
264 |
|
} |
265 |
228 |
return n; |
266 |
|
} |
267 |
|
|
268 |
84 |
Node StringsRewriter::rewriteStringFromCode(Node n) |
269 |
|
{ |
270 |
84 |
Assert(n.getKind() == kind::STRING_FROM_CODE); |
271 |
84 |
NodeManager* nm = NodeManager::currentNM(); |
272 |
|
|
273 |
84 |
if (n[0].isConst()) |
274 |
|
{ |
275 |
44 |
Integer i = n[0].getConst<Rational>().getNumerator(); |
276 |
44 |
Node ret; |
277 |
22 |
if (i >= 0 && i < strings::utils::getAlphabetCardinality()) |
278 |
|
{ |
279 |
18 |
std::vector<unsigned> svec = {i.toUnsignedInt()}; |
280 |
9 |
ret = nm->mkConst(String(svec)); |
281 |
|
} |
282 |
|
else |
283 |
|
{ |
284 |
13 |
ret = nm->mkConst(String("")); |
285 |
|
} |
286 |
22 |
return returnRewrite(n, ret, Rewrite::FROM_CODE_EVAL); |
287 |
|
} |
288 |
62 |
return n; |
289 |
|
} |
290 |
|
|
291 |
3712 |
Node StringsRewriter::rewriteStringToCode(Node n) |
292 |
|
{ |
293 |
3712 |
Assert(n.getKind() == kind::STRING_TO_CODE); |
294 |
3712 |
if (n[0].isConst()) |
295 |
|
{ |
296 |
910 |
NodeManager* nm = NodeManager::currentNM(); |
297 |
1820 |
String s = n[0].getConst<String>(); |
298 |
1820 |
Node ret; |
299 |
910 |
if (s.size() == 1) |
300 |
|
{ |
301 |
924 |
std::vector<unsigned> vec = s.getVec(); |
302 |
462 |
Assert(vec.size() == 1); |
303 |
462 |
ret = nm->mkConst(Rational(vec[0])); |
304 |
|
} |
305 |
|
else |
306 |
|
{ |
307 |
448 |
ret = nm->mkConst(Rational(-1)); |
308 |
|
} |
309 |
910 |
return returnRewrite(n, ret, Rewrite::TO_CODE_EVAL); |
310 |
|
} |
311 |
2802 |
return n; |
312 |
|
} |
313 |
|
|
314 |
8 |
Node StringsRewriter::rewriteStringIsDigit(Node n) |
315 |
|
{ |
316 |
8 |
Assert(n.getKind() == kind::STRING_IS_DIGIT); |
317 |
8 |
NodeManager* nm = NodeManager::currentNM(); |
318 |
|
// eliminate str.is_digit(s) ----> 48 <= str.to_code(s) <= 57 |
319 |
16 |
Node t = nm->mkNode(STRING_TO_CODE, n[0]); |
320 |
|
Node retNode = nm->mkNode(AND, |
321 |
16 |
nm->mkNode(LEQ, nm->mkConst(Rational(48)), t), |
322 |
32 |
nm->mkNode(LEQ, t, nm->mkConst(Rational(57)))); |
323 |
16 |
return returnRewrite(n, retNode, Rewrite::IS_DIGIT_ELIM); |
324 |
|
} |
325 |
|
|
326 |
|
} // namespace strings |
327 |
|
} // namespace theory |
328 |
28191 |
} // namespace cvc5 |