GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/theory_strings_utils.cpp Lines: 178 194 91.8 %
Date: 2021-11-07 Branches: 331 696 47.6 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Yoni Zohar
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Util functions for theory strings.
14
 */
15
16
#include "theory/strings/theory_strings_utils.h"
17
18
#include <sstream>
19
20
#include "expr/attribute.h"
21
#include "expr/skolem_manager.h"
22
#include "options/strings_options.h"
23
#include "theory/quantifiers/fmf/bounded_integers.h"
24
#include "theory/quantifiers/quantifiers_attributes.h"
25
#include "theory/rewriter.h"
26
#include "theory/strings/arith_entail.h"
27
#include "theory/strings/strings_entail.h"
28
#include "theory/strings/word.h"
29
#include "util/rational.h"
30
#include "util/regexp.h"
31
#include "util/string.h"
32
33
using namespace cvc5::kind;
34
35
namespace cvc5 {
36
namespace theory {
37
namespace strings {
38
namespace utils {
39
40
619
uint32_t getDefaultAlphabetCardinality()
41
{
42
  // 3*16^4 = 196608 values in the SMT-LIB standard for Unicode strings
43
619
  Assert(196608 <= String::num_codes());
44
619
  return 196608;
45
}
46
47
362426
Node mkAnd(const std::vector<Node>& a)
48
{
49
724852
  std::vector<Node> au;
50
801201
  for (const Node& ai : a)
51
  {
52
438775
    if (std::find(au.begin(), au.end(), ai) == au.end())
53
    {
54
431519
      au.push_back(ai);
55
    }
56
  }
57
362426
  if (au.empty())
58
  {
59
171976
    return NodeManager::currentNM()->mkConst(true);
60
  }
61
190450
  else if (au.size() == 1)
62
  {
63
95128
    return au[0];
64
  }
65
95322
  return NodeManager::currentNM()->mkNode(AND, au);
66
}
67
68
229813
void flattenOp(Kind k, Node n, std::vector<Node>& conj)
69
{
70
229813
  if (n.getKind() != k)
71
  {
72
    // easy case, just add to conj if non-duplicate
73
209710
    if (std::find(conj.begin(), conj.end(), n) == conj.end())
74
    {
75
205045
      conj.push_back(n);
76
    }
77
209710
    return;
78
  }
79
  // otherwise, traverse
80
40206
  std::unordered_set<TNode> visited;
81
20103
  std::unordered_set<TNode>::iterator it;
82
40206
  std::vector<TNode> visit;
83
40206
  TNode cur;
84
20103
  visit.push_back(n);
85
42620
  do
86
  {
87
62723
    cur = visit.back();
88
62723
    visit.pop_back();
89
62723
    it = visited.find(cur);
90
91
62723
    if (it == visited.end())
92
    {
93
62723
      visited.insert(cur);
94
62723
      if (cur.getKind() == k)
95
      {
96
        // Add in reverse order, so that we traverse left to right.
97
        // This is important so that explantaions aren't reversed when they
98
        // are flattened, which is important for proofs involving substitutions.
99
40206
        std::vector<Node> newChildren;
100
20103
        newChildren.insert(newChildren.end(), cur.begin(), cur.end());
101
20103
        visit.insert(visit.end(), newChildren.rbegin(), newChildren.rend());
102
      }
103
42620
      else if (std::find(conj.begin(), conj.end(), cur) == conj.end())
104
      {
105
41813
        conj.push_back(cur);
106
      }
107
    }
108
62723
  } while (!visit.empty());
109
}
110
111
778146
void getConcat(Node n, std::vector<Node>& c)
112
{
113
778146
  Kind k = n.getKind();
114
778146
  if (k == STRING_CONCAT || k == REGEXP_CONCAT)
115
  {
116
249058
    for (const Node& nc : n)
117
    {
118
185860
      c.push_back(nc);
119
63198
    }
120
  }
121
  else
122
  {
123
714948
    c.push_back(n);
124
  }
125
778146
}
126
127
742213
Node mkConcat(const std::vector<Node>& c, TypeNode tn)
128
{
129
742213
  Assert(tn.isStringLike() || tn.isRegExp());
130
742213
  if (c.empty())
131
  {
132
54445
    Assert(tn.isStringLike());
133
54445
    return Word::mkEmptyWord(tn);
134
  }
135
687768
  else if (c.size() == 1)
136
  {
137
275208
    return c[0];
138
  }
139
412560
  Kind k = tn.isStringLike() ? STRING_CONCAT : REGEXP_CONCAT;
140
412560
  return NodeManager::currentNM()->mkNode(k, c);
141
}
142
143
1136
Node mkNConcat(Node n1, Node n2)
144
{
145
  return Rewriter::rewrite(
146
1136
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2));
147
}
148
149
4103
Node mkNConcat(Node n1, Node n2, Node n3)
150
{
151
  return Rewriter::rewrite(
152
4103
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2, n3));
153
}
154
155
596550
Node mkNConcat(const std::vector<Node>& c, TypeNode tn)
156
{
157
596550
  return Rewriter::rewrite(mkConcat(c, tn));
158
}
159
160
1150222
Node mkNLength(Node t)
161
{
162
1150222
  return Rewriter::rewrite(NodeManager::currentNM()->mkNode(STRING_LENGTH, t));
163
}
164
165
18602
Node mkPrefix(Node t, Node n)
166
{
167
18602
  NodeManager* nm = NodeManager::currentNM();
168
18602
  return nm->mkNode(STRING_SUBSTR, t, nm->mkConst(Rational(0)), n);
169
}
170
171
14891
Node mkSuffix(Node t, Node n)
172
{
173
14891
  NodeManager* nm = NodeManager::currentNM();
174
  return nm->mkNode(
175
14891
      STRING_SUBSTR, t, n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, t), n));
176
}
177
178
190839
Node getConstantComponent(Node t)
179
{
180
190839
  if (t.getKind() == STRING_TO_REGEXP)
181
  {
182
8064
    return t[0].isConst() ? t[0] : Node::null();
183
  }
184
182775
  return t.isConst() ? t : Node::null();
185
}
186
187
157621
Node getConstantEndpoint(Node e, bool isSuf)
188
{
189
157621
  Kind ek = e.getKind();
190
157621
  if (ek == STRING_IN_REGEXP)
191
  {
192
2628
    e = e[1];
193
2628
    ek = e.getKind();
194
  }
195
157621
  if (ek == STRING_CONCAT || ek == REGEXP_CONCAT)
196
  {
197
81379
    return getConstantComponent(e[isSuf ? e.getNumChildren() - 1 : 0]);
198
  }
199
76242
  return getConstantComponent(e);
200
}
201
202
143296
Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls)
203
{
204
143296
  Assert(ss.empty());
205
143296
  Assert(ls.empty());
206
265836
  while (s.getKind() == STRING_SUBSTR)
207
  {
208
61270
    ss.push_back(s[1]);
209
61270
    ls.push_back(s[2]);
210
61270
    s = s[0];
211
  }
212
143296
  std::reverse(ss.begin(), ss.end());
213
143296
  std::reverse(ls.begin(), ls.end());
214
143296
  return s;
215
}
216
217
Node mkSubstrChain(Node base,
218
                   const std::vector<Node>& ss,
219
                   const std::vector<Node>& ls)
220
{
221
  NodeManager* nm = NodeManager::currentNM();
222
  for (unsigned i = 0, size = ss.size(); i < size; i++)
223
  {
224
    base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
225
  }
226
  return base;
227
}
228
229
400
std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x)
230
{
231
  // Collect the equalities of the form (= x "") (sorted)
232
800
  std::set<TNode> emptyNodes;
233
400
  bool allEmptyEqs = true;
234
400
  if (x.getKind() == EQUAL)
235
  {
236
298
    if (Word::isEmpty(x[0]))
237
    {
238
168
      emptyNodes.insert(x[1]);
239
    }
240
130
    else if (Word::isEmpty(x[1]))
241
    {
242
84
      emptyNodes.insert(x[0]);
243
    }
244
    else
245
    {
246
46
      allEmptyEqs = false;
247
    }
248
  }
249
102
  else if (x.getKind() == AND)
250
  {
251
270
    for (const Node& c : x)
252
    {
253
194
      if (c.getKind() != EQUAL)
254
      {
255
2
        allEmptyEqs = false;
256
2
        continue;
257
      }
258
259
190
      if (Word::isEmpty(c[0]))
260
      {
261
154
        emptyNodes.insert(c[1]);
262
      }
263
36
      else if (Word::isEmpty(c[1]))
264
      {
265
20
        emptyNodes.insert(c[0]);
266
      }
267
      else
268
      {
269
16
        allEmptyEqs = false;
270
      }
271
    }
272
  }
273
274
400
  if (emptyNodes.size() == 0)
275
  {
276
68
    allEmptyEqs = false;
277
  }
278
279
  return std::make_pair(
280
800
      allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
281
}
282
283
13182054
bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; }
284
285
1794
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
286
{
287
1794
  size_t i = start;
288
2910
  while (i < rs.size() && rs[i].getKind() == REGEXP_SIGMA)
289
  {
290
558
    i++;
291
  }
292
293
1794
  if (i >= rs.size())
294
  {
295
12
    return false;
296
  }
297
298
1782
  return rs[i].getKind() == REGEXP_STAR && rs[i][0].getKind() == REGEXP_SIGMA;
299
}
300
301
992
bool isSimpleRegExp(Node r)
302
{
303
992
  Assert(r.getType().isRegExp());
304
305
1984
  std::vector<Node> v;
306
992
  utils::getConcat(r, v);
307
2634
  for (const Node& n : v)
308
  {
309
2252
    if (n.getKind() == STRING_TO_REGEXP)
310
    {
311
800
      if (!n[0].isConst())
312
      {
313
682
        return false;
314
      }
315
    }
316
2904
    else if (n.getKind() != REGEXP_SIGMA
317
2904
             && (n.getKind() != REGEXP_STAR || n[0].getKind() != REGEXP_SIGMA))
318
    {
319
538
      return false;
320
    }
321
  }
322
382
  return true;
323
}
324
325
1746
void getRegexpComponents(Node r, std::vector<Node>& result)
326
{
327
1746
  Assert(r.getType().isRegExp());
328
329
1746
  NodeManager* nm = NodeManager::currentNM();
330
1746
  if (r.getKind() == REGEXP_CONCAT)
331
  {
332
1674
    for (const Node& n : r)
333
    {
334
1410
      getRegexpComponents(n, result);
335
    }
336
  }
337
1482
  else if (r.getKind() == STRING_TO_REGEXP && r[0].isConst())
338
  {
339
606
    size_t rlen = Word::getLength(r[0]);
340
2006
    for (size_t i = 0; i < rlen; i++)
341
    {
342
1400
      result.push_back(nm->mkNode(STRING_TO_REGEXP, Word::substr(r[0], i, 1)));
343
    }
344
  }
345
  else
346
  {
347
876
    result.push_back(r);
348
  }
349
1746
}
350
351
void printConcat(std::ostream& out, std::vector<Node>& n)
352
{
353
  for (unsigned i = 0, nsize = n.size(); i < nsize; i++)
354
  {
355
    if (i > 0)
356
    {
357
      out << " ++ ";
358
    }
359
    out << n[i];
360
  }
361
}
362
363
void printConcatTrace(std::vector<Node>& n, const char* c)
364
{
365
  std::stringstream ss;
366
  printConcat(ss, n);
367
  Trace(c) << ss.str();
368
}
369
370
7668
bool isStringKind(Kind k)
371
{
372
7544
  return k == STRING_STOI || k == STRING_ITOS || k == STRING_TOLOWER
373
7490
         || k == STRING_TOUPPER || k == STRING_LEQ || k == STRING_LT
374
15120
         || k == STRING_FROM_CODE || k == STRING_TO_CODE;
375
}
376
377
807
bool isRegExpKind(Kind k)
378
{
379
807
  return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
380
807
         || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
381
345
         || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
382
48
         || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
383
855
         || k == REGEXP_COMPLEMENT;
384
}
385
386
38302
TypeNode getOwnerStringType(Node n)
387
{
388
38302
  TypeNode tn;
389
38302
  Kind k = n.getKind();
390
38302
  if (k == STRING_INDEXOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH
391
10297
      || k == STRING_CONTAINS || k == SEQ_NTH || k == STRING_PREFIX
392
7668
      || k == STRING_SUFFIX)
393
  {
394
    // owning string type is the type of first argument
395
30634
    tn = n[0].getType();
396
  }
397
7668
  else if (isStringKind(k))
398
  {
399
2220
    tn = NodeManager::currentNM()->stringType();
400
  }
401
  else
402
  {
403
5448
    tn = n.getType();
404
  }
405
76604
  AlwaysAssert(tn.isStringLike())
406
38302
      << "Unexpected term in getOwnerStringType : " << n << ", type " << tn;
407
38302
  return tn;
408
}
409
410
4
unsigned getRepeatAmount(TNode node)
411
{
412
4
  Assert(node.getKind() == REGEXP_REPEAT);
413
4
  return node.getOperator().getConst<RegExpRepeat>().d_repeatAmount;
414
}
415
416
32
unsigned getLoopMaxOccurrences(TNode node)
417
{
418
32
  Assert(node.getKind() == REGEXP_LOOP);
419
32
  return node.getOperator().getConst<RegExpLoop>().d_loopMaxOcc;
420
}
421
422
32
unsigned getLoopMinOccurrences(TNode node)
423
{
424
32
  Assert(node.getKind() == REGEXP_LOOP);
425
32
  return node.getOperator().getConst<RegExpLoop>().d_loopMinOcc;
426
}
427
428
681
Node mkForallInternal(Node bvl, Node body)
429
{
430
681
  return quantifiers::BoundedIntegers::mkBoundedForall(bvl, body);
431
}
432
433
}  // namespace utils
434
}  // namespace strings
435
}  // namespace theory
436
31137
}  // namespace cvc5