GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/theory_strings_utils.cpp Lines: 175 191 91.6 %
Date: 2021-05-22 Branches: 327 692 47.3 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Yoni Zohar
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Util functions for theory strings.
14
 */
15
16
#include "theory/strings/theory_strings_utils.h"
17
18
#include <sstream>
19
20
#include "options/strings_options.h"
21
#include "theory/rewriter.h"
22
#include "theory/strings/arith_entail.h"
23
#include "theory/strings/strings_entail.h"
24
#include "theory/strings/word.h"
25
26
using namespace cvc5::kind;
27
28
namespace cvc5 {
29
namespace theory {
30
namespace strings {
31
namespace utils {
32
33
40650
uint32_t getAlphabetCardinality()
34
{
35
  // 3*16^4 = 196608 values in the SMT-LIB standard for Unicode strings
36
40650
  Assert(196608 <= String::num_codes());
37
40650
  return 196608;
38
}
39
40
212072
Node mkAnd(const std::vector<Node>& a)
41
{
42
424144
  std::vector<Node> au;
43
453257
  for (const Node& ai : a)
44
  {
45
241185
    if (std::find(au.begin(), au.end(), ai) == au.end())
46
    {
47
235367
      au.push_back(ai);
48
    }
49
  }
50
212072
  if (au.empty())
51
  {
52
116676
    return NodeManager::currentNM()->mkConst(true);
53
  }
54
95396
  else if (au.size() == 1)
55
  {
56
42763
    return au[0];
57
  }
58
52633
  return NodeManager::currentNM()->mkNode(AND, au);
59
}
60
61
104781
void flattenOp(Kind k, Node n, std::vector<Node>& conj)
62
{
63
104781
  if (n.getKind() != k)
64
  {
65
    // easy case, just add to conj if non-duplicate
66
99972
    if (std::find(conj.begin(), conj.end(), n) == conj.end())
67
    {
68
96793
      conj.push_back(n);
69
    }
70
99972
    return;
71
  }
72
  // otherwise, traverse
73
9618
  std::unordered_set<TNode> visited;
74
4809
  std::unordered_set<TNode>::iterator it;
75
9618
  std::vector<TNode> visit;
76
9618
  TNode cur;
77
4809
  visit.push_back(n);
78
10204
  do
79
  {
80
15013
    cur = visit.back();
81
15013
    visit.pop_back();
82
15013
    it = visited.find(cur);
83
84
15013
    if (it == visited.end())
85
    {
86
15013
      visited.insert(cur);
87
15013
      if (cur.getKind() == k)
88
      {
89
        // Add in reverse order, so that we traverse left to right.
90
        // This is important so that explantaions aren't reversed when they
91
        // are flattened, which is important for proofs involving substitutions.
92
9618
        std::vector<Node> newChildren;
93
4809
        newChildren.insert(newChildren.end(), cur.begin(), cur.end());
94
4809
        visit.insert(visit.end(), newChildren.rbegin(), newChildren.rend());
95
      }
96
10204
      else if (std::find(conj.begin(), conj.end(), cur) == conj.end())
97
      {
98
10190
        conj.push_back(cur);
99
      }
100
    }
101
15013
  } while (!visit.empty());
102
}
103
104
1168899
void getConcat(Node n, std::vector<Node>& c)
105
{
106
1168899
  Kind k = n.getKind();
107
1168899
  if (k == STRING_CONCAT || k == REGEXP_CONCAT)
108
  {
109
943796
    for (const Node& nc : n)
110
    {
111
700125
      c.push_back(nc);
112
243671
    }
113
  }
114
  else
115
  {
116
925228
    c.push_back(n);
117
  }
118
1168899
}
119
120
474923
Node mkConcat(const std::vector<Node>& c, TypeNode tn)
121
{
122
474923
  Assert(tn.isStringLike() || tn.isRegExp());
123
474923
  if (c.empty())
124
  {
125
30686
    Assert(tn.isStringLike());
126
30686
    return Word::mkEmptyWord(tn);
127
  }
128
444237
  else if (c.size() == 1)
129
  {
130
201990
    return c[0];
131
  }
132
242247
  Kind k = tn.isStringLike() ? STRING_CONCAT : REGEXP_CONCAT;
133
242247
  return NodeManager::currentNM()->mkNode(k, c);
134
}
135
136
1104
Node mkNConcat(Node n1, Node n2)
137
{
138
  return Rewriter::rewrite(
139
1104
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2));
140
}
141
142
1957
Node mkNConcat(Node n1, Node n2, Node n3)
143
{
144
  return Rewriter::rewrite(
145
1957
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2, n3));
146
}
147
148
365682
Node mkNConcat(const std::vector<Node>& c, TypeNode tn)
149
{
150
365682
  return Rewriter::rewrite(mkConcat(c, tn));
151
}
152
153
484315
Node mkNLength(Node t)
154
{
155
484315
  return Rewriter::rewrite(NodeManager::currentNM()->mkNode(STRING_LENGTH, t));
156
}
157
158
10880
Node mkPrefix(Node t, Node n)
159
{
160
10880
  NodeManager* nm = NodeManager::currentNM();
161
10880
  return nm->mkNode(STRING_SUBSTR, t, nm->mkConst(Rational(0)), n);
162
}
163
164
8780
Node mkSuffix(Node t, Node n)
165
{
166
8780
  NodeManager* nm = NodeManager::currentNM();
167
  return nm->mkNode(
168
8780
      STRING_SUBSTR, t, n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, t), n));
169
}
170
171
96170
Node getConstantComponent(Node t)
172
{
173
96170
  if (t.getKind() == STRING_TO_REGEXP)
174
  {
175
3168
    return t[0].isConst() ? t[0] : Node::null();
176
  }
177
93002
  return t.isConst() ? t : Node::null();
178
}
179
180
79422
Node getConstantEndpoint(Node e, bool isSuf)
181
{
182
79422
  Kind ek = e.getKind();
183
79422
  if (ek == STRING_IN_REGEXP)
184
  {
185
2171
    e = e[1];
186
2171
    ek = e.getKind();
187
  }
188
79422
  if (ek == STRING_CONCAT || ek == REGEXP_CONCAT)
189
  {
190
41179
    return getConstantComponent(e[isSuf ? e.getNumChildren() - 1 : 0]);
191
  }
192
38243
  return getConstantComponent(e);
193
}
194
195
274667
Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls)
196
{
197
274667
  Assert(ss.empty());
198
274667
  Assert(ls.empty());
199
349387
  while (s.getKind() == STRING_SUBSTR)
200
  {
201
37360
    ss.push_back(s[1]);
202
37360
    ls.push_back(s[2]);
203
37360
    s = s[0];
204
  }
205
274667
  std::reverse(ss.begin(), ss.end());
206
274667
  std::reverse(ls.begin(), ls.end());
207
274667
  return s;
208
}
209
210
Node mkSubstrChain(Node base,
211
                   const std::vector<Node>& ss,
212
                   const std::vector<Node>& ls)
213
{
214
  NodeManager* nm = NodeManager::currentNM();
215
  for (unsigned i = 0, size = ss.size(); i < size; i++)
216
  {
217
    base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
218
  }
219
  return base;
220
}
221
222
340
std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x)
223
{
224
  // Collect the equalities of the form (= x "") (sorted)
225
680
  std::set<TNode> emptyNodes;
226
340
  bool allEmptyEqs = true;
227
340
  if (x.getKind() == EQUAL)
228
  {
229
294
    if (Word::isEmpty(x[0]))
230
    {
231
114
      emptyNodes.insert(x[1]);
232
    }
233
180
    else if (Word::isEmpty(x[1]))
234
    {
235
62
      emptyNodes.insert(x[0]);
236
    }
237
    else
238
    {
239
118
      allEmptyEqs = false;
240
    }
241
  }
242
46
  else if (x.getKind() == AND)
243
  {
244
98
    for (const Node& c : x)
245
    {
246
72
      if (c.getKind() != EQUAL)
247
      {
248
2
        allEmptyEqs = false;
249
2
        continue;
250
      }
251
252
68
      if (Word::isEmpty(c[0]))
253
      {
254
38
        emptyNodes.insert(c[1]);
255
      }
256
30
      else if (Word::isEmpty(c[1]))
257
      {
258
20
        emptyNodes.insert(c[0]);
259
      }
260
      else
261
      {
262
10
        allEmptyEqs = false;
263
      }
264
    }
265
  }
266
267
340
  if (emptyNodes.size() == 0)
268
  {
269
134
    allEmptyEqs = false;
270
  }
271
272
  return std::make_pair(
273
680
      allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
274
}
275
276
6341574
bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; }
277
278
1714
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
279
{
280
1714
  size_t i = start;
281
2818
  while (i < rs.size() && rs[i].getKind() == REGEXP_SIGMA)
282
  {
283
552
    i++;
284
  }
285
286
1714
  if (i >= rs.size())
287
  {
288
12
    return false;
289
  }
290
291
1702
  return rs[i].getKind() == REGEXP_STAR && rs[i][0].getKind() == REGEXP_SIGMA;
292
}
293
294
611
bool isSimpleRegExp(Node r)
295
{
296
611
  Assert(r.getType().isRegExp());
297
298
1222
  std::vector<Node> v;
299
611
  utils::getConcat(r, v);
300
1865
  for (const Node& n : v)
301
  {
302
1568
    if (n.getKind() == STRING_TO_REGEXP)
303
    {
304
554
      if (!n[0].isConst())
305
      {
306
362
        return false;
307
      }
308
    }
309
2028
    else if (n.getKind() != REGEXP_SIGMA
310
2028
             && (n.getKind() != REGEXP_STAR || n[0].getKind() != REGEXP_SIGMA))
311
    {
312
266
      return false;
313
    }
314
  }
315
297
  return true;
316
}
317
318
1414
void getRegexpComponents(Node r, std::vector<Node>& result)
319
{
320
1414
  Assert(r.getType().isRegExp());
321
322
1414
  NodeManager* nm = NodeManager::currentNM();
323
1414
  if (r.getKind() == REGEXP_CONCAT)
324
  {
325
1376
    for (const Node& n : r)
326
    {
327
1134
      getRegexpComponents(n, result);
328
    }
329
  }
330
1172
  else if (r.getKind() == STRING_TO_REGEXP && r[0].isConst())
331
  {
332
446
    size_t rlen = Word::getLength(r[0]);
333
1620
    for (size_t i = 0; i < rlen; i++)
334
    {
335
1174
      result.push_back(nm->mkNode(STRING_TO_REGEXP, Word::substr(r[0], i, 1)));
336
    }
337
  }
338
  else
339
  {
340
726
    result.push_back(r);
341
  }
342
1414
}
343
344
void printConcat(std::ostream& out, std::vector<Node>& n)
345
{
346
  for (unsigned i = 0, nsize = n.size(); i < nsize; i++)
347
  {
348
    if (i > 0)
349
    {
350
      out << " ++ ";
351
    }
352
    out << n[i];
353
  }
354
}
355
356
void printConcatTrace(std::vector<Node>& n, const char* c)
357
{
358
  std::stringstream ss;
359
  printConcat(ss, n);
360
  Trace(c) << ss.str();
361
}
362
363
4976
bool isStringKind(Kind k)
364
{
365
4960
  return k == STRING_STOI || k == STRING_ITOS || k == STRING_TOLOWER
366
4906
         || k == STRING_TOUPPER || k == STRING_LEQ || k == STRING_LT
367
9842
         || k == STRING_FROM_CODE || k == STRING_TO_CODE;
368
}
369
370
527
bool isRegExpKind(Kind k)
371
{
372
527
  return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
373
527
         || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
374
239
         || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
375
31
         || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
376
558
         || k == REGEXP_COMPLEMENT;
377
}
378
379
25243
TypeNode getOwnerStringType(Node n)
380
{
381
25243
  TypeNode tn;
382
25243
  Kind k = n.getKind();
383
25243
  if (k == STRING_STRIDOF || k == STRING_LENGTH || k == STRING_STRCTN
384
4995
      || k == SEQ_NTH || k == STRING_PREFIX || k == STRING_SUFFIX)
385
  {
386
    // owning string type is the type of first argument
387
20267
    tn = n[0].getType();
388
  }
389
4976
  else if (isStringKind(k))
390
  {
391
1166
    tn = NodeManager::currentNM()->stringType();
392
  }
393
  else
394
  {
395
3810
    tn = n.getType();
396
  }
397
50486
  AlwaysAssert(tn.isStringLike())
398
25243
      << "Unexpected term in getOwnerStringType : " << n << ", type " << tn;
399
25243
  return tn;
400
}
401
402
4
unsigned getRepeatAmount(TNode node)
403
{
404
4
  Assert(node.getKind() == REGEXP_REPEAT);
405
4
  return node.getOperator().getConst<RegExpRepeat>().d_repeatAmount;
406
}
407
408
24
unsigned getLoopMaxOccurrences(TNode node)
409
{
410
24
  Assert(node.getKind() == REGEXP_LOOP);
411
24
  return node.getOperator().getConst<RegExpLoop>().d_loopMaxOcc;
412
}
413
414
24
unsigned getLoopMinOccurrences(TNode node)
415
{
416
24
  Assert(node.getKind() == REGEXP_LOOP);
417
24
  return node.getOperator().getConst<RegExpLoop>().d_loopMinOcc;
418
}
419
420
}  // namespace utils
421
}  // namespace strings
422
}  // namespace theory
423
28194
}  // namespace cvc5