GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/theory_strings_utils.cpp Lines: 188 204 92.2 %
Date: 2021-09-29 Branches: 351 734 47.8 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Yoni Zohar
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Util functions for theory strings.
14
 */
15
16
#include "theory/strings/theory_strings_utils.h"
17
18
#include <sstream>
19
20
#include "expr/attribute.h"
21
#include "expr/skolem_manager.h"
22
#include "options/strings_options.h"
23
#include "theory/quantifiers/quantifiers_attributes.h"
24
#include "theory/rewriter.h"
25
#include "theory/strings/arith_entail.h"
26
#include "theory/strings/strings_entail.h"
27
#include "theory/strings/word.h"
28
#include "util/rational.h"
29
#include "util/regexp.h"
30
#include "util/string.h"
31
32
using namespace cvc5::kind;
33
34
namespace cvc5 {
35
namespace theory {
36
namespace strings {
37
namespace utils {
38
39
88298
uint32_t getAlphabetCardinality()
40
{
41
  // 3*16^4 = 196608 values in the SMT-LIB standard for Unicode strings
42
88298
  Assert(196608 <= String::num_codes());
43
88298
  return 196608;
44
}
45
46
269233
Node mkAnd(const std::vector<Node>& a)
47
{
48
538466
  std::vector<Node> au;
49
631609
  for (const Node& ai : a)
50
  {
51
362376
    if (std::find(au.begin(), au.end(), ai) == au.end())
52
    {
53
356120
      au.push_back(ai);
54
    }
55
  }
56
269233
  if (au.empty())
57
  {
58
124300
    return NodeManager::currentNM()->mkConst(true);
59
  }
60
144933
  else if (au.size() == 1)
61
  {
62
66485
    return au[0];
63
  }
64
78448
  return NodeManager::currentNM()->mkNode(AND, au);
65
}
66
67
168148
void flattenOp(Kind k, Node n, std::vector<Node>& conj)
68
{
69
168148
  if (n.getKind() != k)
70
  {
71
    // easy case, just add to conj if non-duplicate
72
149247
    if (std::find(conj.begin(), conj.end(), n) == conj.end())
73
    {
74
145353
      conj.push_back(n);
75
    }
76
149247
    return;
77
  }
78
  // otherwise, traverse
79
37802
  std::unordered_set<TNode> visited;
80
18901
  std::unordered_set<TNode>::iterator it;
81
37802
  std::vector<TNode> visit;
82
37802
  TNode cur;
83
18901
  visit.push_back(n);
84
39435
  do
85
  {
86
58336
    cur = visit.back();
87
58336
    visit.pop_back();
88
58336
    it = visited.find(cur);
89
90
58336
    if (it == visited.end())
91
    {
92
58336
      visited.insert(cur);
93
58336
      if (cur.getKind() == k)
94
      {
95
        // Add in reverse order, so that we traverse left to right.
96
        // This is important so that explantaions aren't reversed when they
97
        // are flattened, which is important for proofs involving substitutions.
98
37802
        std::vector<Node> newChildren;
99
18901
        newChildren.insert(newChildren.end(), cur.begin(), cur.end());
100
18901
        visit.insert(visit.end(), newChildren.rbegin(), newChildren.rend());
101
      }
102
39435
      else if (std::find(conj.begin(), conj.end(), cur) == conj.end())
103
      {
104
38882
        conj.push_back(cur);
105
      }
106
    }
107
58336
  } while (!visit.empty());
108
}
109
110
576434
void getConcat(Node n, std::vector<Node>& c)
111
{
112
576434
  Kind k = n.getKind();
113
576434
  if (k == STRING_CONCAT || k == REGEXP_CONCAT)
114
  {
115
170120
    for (const Node& nc : n)
116
    {
117
127574
      c.push_back(nc);
118
42546
    }
119
  }
120
  else
121
  {
122
533888
    c.push_back(n);
123
  }
124
576434
}
125
126
546004
Node mkConcat(const std::vector<Node>& c, TypeNode tn)
127
{
128
546004
  Assert(tn.isStringLike() || tn.isRegExp());
129
546004
  if (c.empty())
130
  {
131
35718
    Assert(tn.isStringLike());
132
35718
    return Word::mkEmptyWord(tn);
133
  }
134
510286
  else if (c.size() == 1)
135
  {
136
200190
    return c[0];
137
  }
138
310096
  Kind k = tn.isStringLike() ? STRING_CONCAT : REGEXP_CONCAT;
139
310096
  return NodeManager::currentNM()->mkNode(k, c);
140
}
141
142
1091
Node mkNConcat(Node n1, Node n2)
143
{
144
  return Rewriter::rewrite(
145
1091
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2));
146
}
147
148
2074
Node mkNConcat(Node n1, Node n2, Node n3)
149
{
150
  return Rewriter::rewrite(
151
2074
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2, n3));
152
}
153
154
440358
Node mkNConcat(const std::vector<Node>& c, TypeNode tn)
155
{
156
440358
  return Rewriter::rewrite(mkConcat(c, tn));
157
}
158
159
1037299
Node mkNLength(Node t)
160
{
161
1037299
  return Rewriter::rewrite(NodeManager::currentNM()->mkNode(STRING_LENGTH, t));
162
}
163
164
11105
Node mkPrefix(Node t, Node n)
165
{
166
11105
  NodeManager* nm = NodeManager::currentNM();
167
11105
  return nm->mkNode(STRING_SUBSTR, t, nm->mkConst(Rational(0)), n);
168
}
169
170
9984
Node mkSuffix(Node t, Node n)
171
{
172
9984
  NodeManager* nm = NodeManager::currentNM();
173
  return nm->mkNode(
174
9984
      STRING_SUBSTR, t, n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, t), n));
175
}
176
177
116921
Node getConstantComponent(Node t)
178
{
179
116921
  if (t.getKind() == STRING_TO_REGEXP)
180
  {
181
8313
    return t[0].isConst() ? t[0] : Node::null();
182
  }
183
108608
  return t.isConst() ? t : Node::null();
184
}
185
186
92317
Node getConstantEndpoint(Node e, bool isSuf)
187
{
188
92317
  Kind ek = e.getKind();
189
92317
  if (ek == STRING_IN_REGEXP)
190
  {
191
2318
    e = e[1];
192
2318
    ek = e.getKind();
193
  }
194
92317
  if (ek == STRING_CONCAT || ek == REGEXP_CONCAT)
195
  {
196
47907
    return getConstantComponent(e[isSuf ? e.getNumChildren() - 1 : 0]);
197
  }
198
44410
  return getConstantComponent(e);
199
}
200
201
99399
Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls)
202
{
203
99399
  Assert(ss.empty());
204
99399
  Assert(ls.empty());
205
194541
  while (s.getKind() == STRING_SUBSTR)
206
  {
207
47571
    ss.push_back(s[1]);
208
47571
    ls.push_back(s[2]);
209
47571
    s = s[0];
210
  }
211
99399
  std::reverse(ss.begin(), ss.end());
212
99399
  std::reverse(ls.begin(), ls.end());
213
99399
  return s;
214
}
215
216
Node mkSubstrChain(Node base,
217
                   const std::vector<Node>& ss,
218
                   const std::vector<Node>& ls)
219
{
220
  NodeManager* nm = NodeManager::currentNM();
221
  for (unsigned i = 0, size = ss.size(); i < size; i++)
222
  {
223
    base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
224
  }
225
  return base;
226
}
227
228
330
std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x)
229
{
230
  // Collect the equalities of the form (= x "") (sorted)
231
660
  std::set<TNode> emptyNodes;
232
330
  bool allEmptyEqs = true;
233
330
  if (x.getKind() == EQUAL)
234
  {
235
278
    if (Word::isEmpty(x[0]))
236
    {
237
106
      emptyNodes.insert(x[1]);
238
    }
239
172
    else if (Word::isEmpty(x[1]))
240
    {
241
52
      emptyNodes.insert(x[0]);
242
    }
243
    else
244
    {
245
120
      allEmptyEqs = false;
246
    }
247
  }
248
52
  else if (x.getKind() == AND)
249
  {
250
132
    for (const Node& c : x)
251
    {
252
96
      if (c.getKind() != EQUAL)
253
      {
254
2
        allEmptyEqs = false;
255
2
        continue;
256
      }
257
258
92
      if (Word::isEmpty(c[0]))
259
      {
260
78
        emptyNodes.insert(c[1]);
261
      }
262
14
      else if (Word::isEmpty(c[1]))
263
      {
264
8
        emptyNodes.insert(c[0]);
265
      }
266
      else
267
      {
268
6
        allEmptyEqs = false;
269
      }
270
    }
271
  }
272
273
330
  if (emptyNodes.size() == 0)
274
  {
275
132
    allEmptyEqs = false;
276
  }
277
278
  return std::make_pair(
279
660
      allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
280
}
281
282
9668368
bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; }
283
284
1469
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
285
{
286
1469
  size_t i = start;
287
2543
  while (i < rs.size() && rs[i].getKind() == REGEXP_SIGMA)
288
  {
289
537
    i++;
290
  }
291
292
1469
  if (i >= rs.size())
293
  {
294
12
    return false;
295
  }
296
297
1457
  return rs[i].getKind() == REGEXP_STAR && rs[i][0].getKind() == REGEXP_SIGMA;
298
}
299
300
631
bool isSimpleRegExp(Node r)
301
{
302
631
  Assert(r.getType().isRegExp());
303
304
1262
  std::vector<Node> v;
305
631
  utils::getConcat(r, v);
306
1739
  for (const Node& n : v)
307
  {
308
1461
    if (n.getKind() == STRING_TO_REGEXP)
309
    {
310
461
      if (!n[0].isConst())
311
      {
312
367
        return false;
313
      }
314
    }
315
2000
    else if (n.getKind() != REGEXP_SIGMA
316
2000
             && (n.getKind() != REGEXP_STAR || n[0].getKind() != REGEXP_SIGMA))
317
    {
318
339
      return false;
319
    }
320
  }
321
278
  return true;
322
}
323
324
1229
void getRegexpComponents(Node r, std::vector<Node>& result)
325
{
326
1229
  Assert(r.getType().isRegExp());
327
328
1229
  NodeManager* nm = NodeManager::currentNM();
329
1229
  if (r.getKind() == REGEXP_CONCAT)
330
  {
331
1177
    for (const Node& n : r)
332
    {
333
965
      getRegexpComponents(n, result);
334
    }
335
  }
336
1017
  else if (r.getKind() == STRING_TO_REGEXP && r[0].isConst())
337
  {
338
370
    size_t rlen = Word::getLength(r[0]);
339
1409
    for (size_t i = 0; i < rlen; i++)
340
    {
341
1039
      result.push_back(nm->mkNode(STRING_TO_REGEXP, Word::substr(r[0], i, 1)));
342
    }
343
  }
344
  else
345
  {
346
647
    result.push_back(r);
347
  }
348
1229
}
349
350
void printConcat(std::ostream& out, std::vector<Node>& n)
351
{
352
  for (unsigned i = 0, nsize = n.size(); i < nsize; i++)
353
  {
354
    if (i > 0)
355
    {
356
      out << " ++ ";
357
    }
358
    out << n[i];
359
  }
360
}
361
362
void printConcatTrace(std::vector<Node>& n, const char* c)
363
{
364
  std::stringstream ss;
365
  printConcat(ss, n);
366
  Trace(c) << ss.str();
367
}
368
369
6971
bool isStringKind(Kind k)
370
{
371
6855
  return k == STRING_STOI || k == STRING_ITOS || k == STRING_TOLOWER
372
6804
         || k == STRING_TOUPPER || k == STRING_LEQ || k == STRING_LT
373
13737
         || k == STRING_FROM_CODE || k == STRING_TO_CODE;
374
}
375
376
485
bool isRegExpKind(Kind k)
377
{
378
485
  return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
379
485
         || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
380
192
         || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
381
20
         || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
382
505
         || k == REGEXP_COMPLEMENT;
383
}
384
385
32337
TypeNode getOwnerStringType(Node n)
386
{
387
32337
  TypeNode tn;
388
32337
  Kind k = n.getKind();
389
32337
  if (k == STRING_INDEXOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH
390
9353
      || k == STRING_CONTAINS || k == SEQ_NTH || k == STRING_PREFIX
391
6971
      || k == STRING_SUFFIX)
392
  {
393
    // owning string type is the type of first argument
394
25366
    tn = n[0].getType();
395
  }
396
6971
  else if (isStringKind(k))
397
  {
398
2125
    tn = NodeManager::currentNM()->stringType();
399
  }
400
  else
401
  {
402
4846
    tn = n.getType();
403
  }
404
64674
  AlwaysAssert(tn.isStringLike())
405
32337
      << "Unexpected term in getOwnerStringType : " << n << ", type " << tn;
406
32337
  return tn;
407
}
408
409
4
unsigned getRepeatAmount(TNode node)
410
{
411
4
  Assert(node.getKind() == REGEXP_REPEAT);
412
4
  return node.getOperator().getConst<RegExpRepeat>().d_repeatAmount;
413
}
414
415
26
unsigned getLoopMaxOccurrences(TNode node)
416
{
417
26
  Assert(node.getKind() == REGEXP_LOOP);
418
26
  return node.getOperator().getConst<RegExpLoop>().d_loopMaxOcc;
419
}
420
421
26
unsigned getLoopMinOccurrences(TNode node)
422
{
423
26
  Assert(node.getKind() == REGEXP_LOOP);
424
26
  return node.getOperator().getConst<RegExpLoop>().d_loopMinOcc;
425
}
426
427
/**
428
 * Mapping to a dummy node for marking an attribute on internal quantified
429
 * formulas. This ensures that reductions are deterministic.
430
 */
431
struct QInternalVarAttributeId
432
{
433
};
434
typedef expr::Attribute<QInternalVarAttributeId, Node> QInternalVarAttribute;
435
436
388
Node mkForallInternal(Node bvl, Node body)
437
{
438
388
  NodeManager* nm = NodeManager::currentNM();
439
  QInternalVarAttribute qiva;
440
776
  Node qvar;
441
388
  if (bvl.hasAttribute(qiva))
442
  {
443
14
    qvar = bvl.getAttribute(qiva);
444
  }
445
  else
446
  {
447
374
    SkolemManager* sm = nm->getSkolemManager();
448
374
    qvar = sm->mkDummySkolem("qinternal", nm->booleanType());
449
    // this dummy variable marks that the quantified formula is internal
450
374
    qvar.setAttribute(InternalQuantAttribute(), true);
451
    // remember the dummy variable
452
374
    bvl.setAttribute(qiva, qvar);
453
  }
454
  // make the internal attribute, and put it in a singleton list
455
776
  Node ip = nm->mkNode(INST_ATTRIBUTE, qvar);
456
776
  Node ipl = nm->mkNode(INST_PATTERN_LIST, ip);
457
  // make the overall formula
458
776
  return nm->mkNode(FORALL, bvl, body, ipl);
459
}
460
461
}  // namespace utils
462
}  // namespace strings
463
}  // namespace theory
464
22746
}  // namespace cvc5