1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Andres Noetzli, Gereon Kremer |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Implementation of rewrite rules for string-specific operators in |
14 |
|
* theory of strings. |
15 |
|
*/ |
16 |
|
|
17 |
|
#include "theory/strings/strings_rewriter.h" |
18 |
|
|
19 |
|
#include "expr/node_builder.h" |
20 |
|
#include "theory/strings/theory_strings_utils.h" |
21 |
|
#include "util/rational.h" |
22 |
|
#include "util/string.h" |
23 |
|
|
24 |
|
using namespace cvc5::kind; |
25 |
|
|
26 |
|
namespace cvc5 { |
27 |
|
namespace theory { |
28 |
|
namespace strings { |
29 |
|
|
30 |
6273 |
StringsRewriter::StringsRewriter(Rewriter* r, |
31 |
6273 |
HistogramStat<Rewrite>* statistics) |
32 |
6273 |
: SequencesRewriter(r, statistics) |
33 |
|
{ |
34 |
6273 |
} |
35 |
|
|
36 |
426386 |
RewriteResponse StringsRewriter::postRewrite(TNode node) |
37 |
|
{ |
38 |
852772 |
Trace("strings-postrewrite") |
39 |
426386 |
<< "Strings::StringsRewriter::postRewrite start " << node << std::endl; |
40 |
|
|
41 |
852772 |
Node retNode = node; |
42 |
426386 |
Kind nk = node.getKind(); |
43 |
426386 |
if (nk == kind::STRING_LT) |
44 |
|
{ |
45 |
14 |
retNode = rewriteStringLt(node); |
46 |
|
} |
47 |
426372 |
else if (nk == kind::STRING_LEQ) |
48 |
|
{ |
49 |
252 |
retNode = rewriteStringLeq(node); |
50 |
|
} |
51 |
426120 |
else if (nk == STRING_TOLOWER || nk == STRING_TOUPPER) |
52 |
|
{ |
53 |
185 |
retNode = rewriteStrConvert(node); |
54 |
|
} |
55 |
425935 |
else if (nk == STRING_IS_DIGIT) |
56 |
|
{ |
57 |
8 |
retNode = rewriteStringIsDigit(node); |
58 |
|
} |
59 |
425927 |
else if (nk == kind::STRING_ITOS) |
60 |
|
{ |
61 |
525 |
retNode = rewriteIntToStr(node); |
62 |
|
} |
63 |
425402 |
else if (nk == kind::STRING_STOI) |
64 |
|
{ |
65 |
1077 |
retNode = rewriteStrToInt(node); |
66 |
|
} |
67 |
424325 |
else if (nk == STRING_TO_CODE) |
68 |
|
{ |
69 |
5189 |
retNode = rewriteStringToCode(node); |
70 |
|
} |
71 |
419136 |
else if (nk == STRING_FROM_CODE) |
72 |
|
{ |
73 |
53 |
retNode = rewriteStringFromCode(node); |
74 |
|
} |
75 |
|
else |
76 |
|
{ |
77 |
419083 |
return SequencesRewriter::postRewrite(node); |
78 |
|
} |
79 |
|
|
80 |
14606 |
Trace("strings-postrewrite") |
81 |
7303 |
<< "Strings::StringsRewriter::postRewrite returning " << retNode |
82 |
7303 |
<< std::endl; |
83 |
7303 |
if (node != retNode) |
84 |
|
{ |
85 |
4614 |
Trace("strings-rewrite-debug") << "Strings::StringsRewriter::postRewrite " |
86 |
2307 |
<< node << " to " << retNode << std::endl; |
87 |
2307 |
return RewriteResponse(REWRITE_AGAIN_FULL, retNode); |
88 |
|
} |
89 |
4996 |
return RewriteResponse(REWRITE_DONE, retNode); |
90 |
|
} |
91 |
|
|
92 |
1077 |
Node StringsRewriter::rewriteStrToInt(Node node) |
93 |
|
{ |
94 |
1077 |
Assert(node.getKind() == STRING_STOI); |
95 |
1077 |
NodeManager* nm = NodeManager::currentNM(); |
96 |
1077 |
if (node[0].isConst()) |
97 |
|
{ |
98 |
540 |
Node ret; |
99 |
540 |
String s = node[0].getConst<String>(); |
100 |
270 |
if (s.isNumber()) |
101 |
|
{ |
102 |
99 |
ret = nm->mkConst(s.toNumber()); |
103 |
|
} |
104 |
|
else |
105 |
|
{ |
106 |
171 |
ret = nm->mkConst(Rational(-1)); |
107 |
|
} |
108 |
270 |
return returnRewrite(node, ret, Rewrite::STOI_EVAL); |
109 |
|
} |
110 |
807 |
else if (node[0].getKind() == STRING_CONCAT) |
111 |
|
{ |
112 |
1385 |
for (TNode nc : node[0]) |
113 |
|
{ |
114 |
1053 |
if (nc.isConst()) |
115 |
|
{ |
116 |
269 |
String t = nc.getConst<String>(); |
117 |
155 |
if (!t.isNumber()) |
118 |
|
{ |
119 |
82 |
Node ret = nm->mkConst(Rational(-1)); |
120 |
41 |
return returnRewrite(node, ret, Rewrite::STOI_CONCAT_NONNUM); |
121 |
|
} |
122 |
|
} |
123 |
|
} |
124 |
|
} |
125 |
766 |
return node; |
126 |
|
} |
127 |
|
|
128 |
525 |
Node StringsRewriter::rewriteIntToStr(Node node) |
129 |
|
{ |
130 |
525 |
Assert(node.getKind() == STRING_ITOS); |
131 |
525 |
NodeManager* nm = NodeManager::currentNM(); |
132 |
525 |
if (node[0].isConst()) |
133 |
|
{ |
134 |
518 |
Node ret; |
135 |
259 |
if (node[0].getConst<Rational>().sgn() == -1) |
136 |
|
{ |
137 |
129 |
ret = nm->mkConst(String("")); |
138 |
|
} |
139 |
|
else |
140 |
|
{ |
141 |
260 |
std::string stmp = node[0].getConst<Rational>().getNumerator().toString(); |
142 |
130 |
Assert(stmp[0] != '-'); |
143 |
130 |
ret = nm->mkConst(String(stmp)); |
144 |
|
} |
145 |
259 |
return returnRewrite(node, ret, Rewrite::ITOS_EVAL); |
146 |
|
} |
147 |
266 |
return node; |
148 |
|
} |
149 |
|
|
150 |
185 |
Node StringsRewriter::rewriteStrConvert(Node node) |
151 |
|
{ |
152 |
185 |
Kind nk = node.getKind(); |
153 |
185 |
Assert(nk == STRING_TOLOWER || nk == STRING_TOUPPER); |
154 |
185 |
NodeManager* nm = NodeManager::currentNM(); |
155 |
185 |
if (node[0].isConst()) |
156 |
|
{ |
157 |
46 |
std::vector<unsigned> nvec = node[0].getConst<String>().getVec(); |
158 |
90 |
for (unsigned i = 0, nvsize = nvec.size(); i < nvsize; i++) |
159 |
|
{ |
160 |
67 |
unsigned newChar = nvec[i]; |
161 |
|
// transform it |
162 |
|
// upper 65 ... 90 |
163 |
|
// lower 97 ... 122 |
164 |
67 |
if (nk == STRING_TOUPPER) |
165 |
|
{ |
166 |
36 |
if (newChar >= 97 && newChar <= 122) |
167 |
|
{ |
168 |
22 |
newChar = newChar - 32; |
169 |
|
} |
170 |
|
} |
171 |
31 |
else if (nk == STRING_TOLOWER) |
172 |
|
{ |
173 |
31 |
if (newChar >= 65 && newChar <= 90) |
174 |
|
{ |
175 |
22 |
newChar = newChar + 32; |
176 |
|
} |
177 |
|
} |
178 |
67 |
nvec[i] = newChar; |
179 |
|
} |
180 |
46 |
Node retNode = nm->mkConst(String(nvec)); |
181 |
23 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_CONST); |
182 |
|
} |
183 |
162 |
else if (node[0].getKind() == STRING_CONCAT) |
184 |
|
{ |
185 |
94 |
NodeBuilder concatBuilder(STRING_CONCAT); |
186 |
213 |
for (const Node& nc : node[0]) |
187 |
|
{ |
188 |
166 |
concatBuilder << nm->mkNode(nk, nc); |
189 |
|
} |
190 |
|
// tolower( x1 ++ x2 ) --> tolower( x1 ) ++ tolower( x2 ) |
191 |
94 |
Node retNode = concatBuilder.constructNode(); |
192 |
47 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_MINSCOPE_CONCAT); |
193 |
|
} |
194 |
345 |
else if (node[0].getKind() == STRING_TOLOWER |
195 |
345 |
|| node[0].getKind() == STRING_TOUPPER) |
196 |
|
{ |
197 |
|
// tolower( tolower( x ) ) --> tolower( x ) |
198 |
|
// tolower( toupper( x ) ) --> tolower( x ) |
199 |
4 |
Node retNode = nm->mkNode(nk, node[0][0]); |
200 |
2 |
return returnRewrite(node, retNode, Rewrite::STR_CONV_IDEM); |
201 |
|
} |
202 |
113 |
else if (node[0].getKind() == STRING_ITOS) |
203 |
|
{ |
204 |
|
// tolower( str.from.int( x ) ) --> str.from.int( x ) |
205 |
1 |
return returnRewrite(node, node[0], Rewrite::STR_CONV_ITOS); |
206 |
|
} |
207 |
112 |
return node; |
208 |
|
} |
209 |
|
|
210 |
14 |
Node StringsRewriter::rewriteStringLt(Node n) |
211 |
|
{ |
212 |
14 |
Assert(n.getKind() == kind::STRING_LT); |
213 |
14 |
NodeManager* nm = NodeManager::currentNM(); |
214 |
|
// eliminate s < t ---> s != t AND s <= t |
215 |
|
Node retNode = nm->mkNode( |
216 |
28 |
AND, n[0].eqNode(n[1]).negate(), nm->mkNode(STRING_LEQ, n[0], n[1])); |
217 |
28 |
return returnRewrite(n, retNode, Rewrite::STR_LT_ELIM); |
218 |
|
} |
219 |
|
|
220 |
252 |
Node StringsRewriter::rewriteStringLeq(Node n) |
221 |
|
{ |
222 |
252 |
Assert(n.getKind() == kind::STRING_LEQ); |
223 |
252 |
NodeManager* nm = NodeManager::currentNM(); |
224 |
252 |
if (n[0] == n[1]) |
225 |
|
{ |
226 |
20 |
Node ret = nm->mkConst(true); |
227 |
10 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_ID); |
228 |
|
} |
229 |
242 |
if (n[0].isConst() && n[1].isConst()) |
230 |
|
{ |
231 |
70 |
String s = n[0].getConst<String>(); |
232 |
70 |
String t = n[1].getConst<String>(); |
233 |
70 |
Node ret = nm->mkConst(s.isLeq(t)); |
234 |
35 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_EVAL); |
235 |
|
} |
236 |
|
// empty strings |
237 |
611 |
for (unsigned i = 0; i < 2; i++) |
238 |
|
{ |
239 |
411 |
if (n[i].isConst() && n[i].getConst<String>().empty()) |
240 |
|
{ |
241 |
14 |
Node ret = i == 0 ? nm->mkConst(true) : n[0].eqNode(n[1]); |
242 |
7 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_EMPTY); |
243 |
|
} |
244 |
|
} |
245 |
|
|
246 |
400 |
std::vector<Node> n1; |
247 |
200 |
utils::getConcat(n[0], n1); |
248 |
400 |
std::vector<Node> n2; |
249 |
200 |
utils::getConcat(n[1], n2); |
250 |
200 |
Assert(!n1.empty() && !n2.empty()); |
251 |
|
|
252 |
|
// constant prefixes |
253 |
200 |
if (n1[0].isConst() && n2[0].isConst() && n1[0] != n2[0]) |
254 |
|
{ |
255 |
94 |
String s = n1[0].getConst<String>(); |
256 |
94 |
String t = n2[0].getConst<String>(); |
257 |
54 |
size_t prefixLen = std::min(s.size(), t.size()); |
258 |
54 |
s = s.prefix(prefixLen); |
259 |
54 |
t = t.prefix(prefixLen); |
260 |
|
// if the prefixes are not the same, then we can already decide the outcome |
261 |
54 |
if (s != t) |
262 |
|
{ |
263 |
28 |
Node ret = nm->mkConst(s.isLeq(t)); |
264 |
14 |
return returnRewrite(n, ret, Rewrite::STR_LEQ_CPREFIX); |
265 |
|
} |
266 |
|
} |
267 |
186 |
return n; |
268 |
|
} |
269 |
|
|
270 |
53 |
Node StringsRewriter::rewriteStringFromCode(Node n) |
271 |
|
{ |
272 |
53 |
Assert(n.getKind() == kind::STRING_FROM_CODE); |
273 |
53 |
NodeManager* nm = NodeManager::currentNM(); |
274 |
|
|
275 |
53 |
if (n[0].isConst()) |
276 |
|
{ |
277 |
26 |
Integer i = n[0].getConst<Rational>().getNumerator(); |
278 |
26 |
Node ret; |
279 |
13 |
if (i >= 0 && i < strings::utils::getAlphabetCardinality()) |
280 |
|
{ |
281 |
12 |
std::vector<unsigned> svec = {i.toUnsignedInt()}; |
282 |
6 |
ret = nm->mkConst(String(svec)); |
283 |
|
} |
284 |
|
else |
285 |
|
{ |
286 |
7 |
ret = nm->mkConst(String("")); |
287 |
|
} |
288 |
13 |
return returnRewrite(n, ret, Rewrite::FROM_CODE_EVAL); |
289 |
|
} |
290 |
40 |
return n; |
291 |
|
} |
292 |
|
|
293 |
5189 |
Node StringsRewriter::rewriteStringToCode(Node n) |
294 |
|
{ |
295 |
5189 |
Assert(n.getKind() == kind::STRING_TO_CODE); |
296 |
5189 |
if (n[0].isConst()) |
297 |
|
{ |
298 |
1563 |
NodeManager* nm = NodeManager::currentNM(); |
299 |
3126 |
String s = n[0].getConst<String>(); |
300 |
3126 |
Node ret; |
301 |
1563 |
if (s.size() == 1) |
302 |
|
{ |
303 |
1212 |
std::vector<unsigned> vec = s.getVec(); |
304 |
606 |
Assert(vec.size() == 1); |
305 |
606 |
ret = nm->mkConst(Rational(vec[0])); |
306 |
|
} |
307 |
|
else |
308 |
|
{ |
309 |
957 |
ret = nm->mkConst(Rational(-1)); |
310 |
|
} |
311 |
1563 |
return returnRewrite(n, ret, Rewrite::TO_CODE_EVAL); |
312 |
|
} |
313 |
3626 |
return n; |
314 |
|
} |
315 |
|
|
316 |
8 |
Node StringsRewriter::rewriteStringIsDigit(Node n) |
317 |
|
{ |
318 |
8 |
Assert(n.getKind() == kind::STRING_IS_DIGIT); |
319 |
8 |
NodeManager* nm = NodeManager::currentNM(); |
320 |
|
// eliminate str.is_digit(s) ----> 48 <= str.to_code(s) <= 57 |
321 |
16 |
Node t = nm->mkNode(STRING_TO_CODE, n[0]); |
322 |
|
Node retNode = nm->mkNode(AND, |
323 |
16 |
nm->mkNode(LEQ, nm->mkConst(Rational(48)), t), |
324 |
32 |
nm->mkNode(LEQ, t, nm->mkConst(Rational(57)))); |
325 |
16 |
return returnRewrite(n, retNode, Rewrite::IS_DIGIT_ELIM); |
326 |
|
} |
327 |
|
|
328 |
|
} // namespace strings |
329 |
|
} // namespace theory |
330 |
22746 |
} // namespace cvc5 |