GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/theory_strings_utils.cpp Lines: 188 204 92.2 %
Date: 2021-08-01 Branches: 351 736 47.7 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Yoni Zohar
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Util functions for theory strings.
14
 */
15
16
#include "theory/strings/theory_strings_utils.h"
17
18
#include <sstream>
19
20
#include "expr/attribute.h"
21
#include "expr/skolem_manager.h"
22
#include "options/strings_options.h"
23
#include "theory/quantifiers/quantifiers_attributes.h"
24
#include "theory/rewriter.h"
25
#include "theory/strings/arith_entail.h"
26
#include "theory/strings/strings_entail.h"
27
#include "theory/strings/word.h"
28
#include "util/rational.h"
29
#include "util/regexp.h"
30
#include "util/string.h"
31
32
using namespace cvc5::kind;
33
34
namespace cvc5 {
35
namespace theory {
36
namespace strings {
37
namespace utils {
38
39
42668
uint32_t getAlphabetCardinality()
40
{
41
  // 3*16^4 = 196608 values in the SMT-LIB standard for Unicode strings
42
42668
  Assert(196608 <= String::num_codes());
43
42668
  return 196608;
44
}
45
46
265153
Node mkAnd(const std::vector<Node>& a)
47
{
48
530306
  std::vector<Node> au;
49
582581
  for (const Node& ai : a)
50
  {
51
317428
    if (std::find(au.begin(), au.end(), ai) == au.end())
52
    {
53
311152
      au.push_back(ai);
54
    }
55
  }
56
265153
  if (au.empty())
57
  {
58
136265
    return NodeManager::currentNM()->mkConst(true);
59
  }
60
128888
  else if (au.size() == 1)
61
  {
62
62236
    return au[0];
63
  }
64
66652
  return NodeManager::currentNM()->mkNode(AND, au);
65
}
66
67
150014
void flattenOp(Kind k, Node n, std::vector<Node>& conj)
68
{
69
150014
  if (n.getKind() != k)
70
  {
71
    // easy case, just add to conj if non-duplicate
72
142158
    if (std::find(conj.begin(), conj.end(), n) == conj.end())
73
    {
74
138229
      conj.push_back(n);
75
    }
76
142158
    return;
77
  }
78
  // otherwise, traverse
79
15712
  std::unordered_set<TNode> visited;
80
7856
  std::unordered_set<TNode>::iterator it;
81
15712
  std::vector<TNode> visit;
82
15712
  TNode cur;
83
7856
  visit.push_back(n);
84
16514
  do
85
  {
86
24370
    cur = visit.back();
87
24370
    visit.pop_back();
88
24370
    it = visited.find(cur);
89
90
24370
    if (it == visited.end())
91
    {
92
24370
      visited.insert(cur);
93
24370
      if (cur.getKind() == k)
94
      {
95
        // Add in reverse order, so that we traverse left to right.
96
        // This is important so that explantaions aren't reversed when they
97
        // are flattened, which is important for proofs involving substitutions.
98
15712
        std::vector<Node> newChildren;
99
7856
        newChildren.insert(newChildren.end(), cur.begin(), cur.end());
100
7856
        visit.insert(visit.end(), newChildren.rbegin(), newChildren.rend());
101
      }
102
16514
      else if (std::find(conj.begin(), conj.end(), cur) == conj.end())
103
      {
104
16075
        conj.push_back(cur);
105
      }
106
    }
107
24370
  } while (!visit.empty());
108
}
109
110
1507783
void getConcat(Node n, std::vector<Node>& c)
111
{
112
1507783
  Kind k = n.getKind();
113
1507783
  if (k == STRING_CONCAT || k == REGEXP_CONCAT)
114
  {
115
1330894
    for (const Node& nc : n)
116
    {
117
986024
      c.push_back(nc);
118
344870
    }
119
  }
120
  else
121
  {
122
1162913
    c.push_back(n);
123
  }
124
1507783
}
125
126
596538
Node mkConcat(const std::vector<Node>& c, TypeNode tn)
127
{
128
596538
  Assert(tn.isStringLike() || tn.isRegExp());
129
596538
  if (c.empty())
130
  {
131
46046
    Assert(tn.isStringLike());
132
46046
    return Word::mkEmptyWord(tn);
133
  }
134
550492
  else if (c.size() == 1)
135
  {
136
233441
    return c[0];
137
  }
138
317051
  Kind k = tn.isStringLike() ? STRING_CONCAT : REGEXP_CONCAT;
139
317051
  return NodeManager::currentNM()->mkNode(k, c);
140
}
141
142
1136
Node mkNConcat(Node n1, Node n2)
143
{
144
  return Rewriter::rewrite(
145
1136
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2));
146
}
147
148
3135
Node mkNConcat(Node n1, Node n2, Node n3)
149
{
150
  return Rewriter::rewrite(
151
3135
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2, n3));
152
}
153
154
456565
Node mkNConcat(const std::vector<Node>& c, TypeNode tn)
155
{
156
456565
  return Rewriter::rewrite(mkConcat(c, tn));
157
}
158
159
591058
Node mkNLength(Node t)
160
{
161
591058
  return Rewriter::rewrite(NodeManager::currentNM()->mkNode(STRING_LENGTH, t));
162
}
163
164
14877
Node mkPrefix(Node t, Node n)
165
{
166
14877
  NodeManager* nm = NodeManager::currentNM();
167
14877
  return nm->mkNode(STRING_SUBSTR, t, nm->mkConst(Rational(0)), n);
168
}
169
170
11561
Node mkSuffix(Node t, Node n)
171
{
172
11561
  NodeManager* nm = NodeManager::currentNM();
173
  return nm->mkNode(
174
11561
      STRING_SUBSTR, t, n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, t), n));
175
}
176
177
173167
Node getConstantComponent(Node t)
178
{
179
173167
  if (t.getKind() == STRING_TO_REGEXP)
180
  {
181
7940
    return t[0].isConst() ? t[0] : Node::null();
182
  }
183
165227
  return t.isConst() ? t : Node::null();
184
}
185
186
144743
Node getConstantEndpoint(Node e, bool isSuf)
187
{
188
144743
  Kind ek = e.getKind();
189
144743
  if (ek == STRING_IN_REGEXP)
190
  {
191
2634
    e = e[1];
192
2634
    ek = e.getKind();
193
  }
194
144743
  if (ek == STRING_CONCAT || ek == REGEXP_CONCAT)
195
  {
196
74927
    return getConstantComponent(e[isSuf ? e.getNumChildren() - 1 : 0]);
197
  }
198
69816
  return getConstantComponent(e);
199
}
200
201
352023
Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls)
202
{
203
352023
  Assert(ss.empty());
204
352023
  Assert(ls.empty());
205
456529
  while (s.getKind() == STRING_SUBSTR)
206
  {
207
52253
    ss.push_back(s[1]);
208
52253
    ls.push_back(s[2]);
209
52253
    s = s[0];
210
  }
211
352023
  std::reverse(ss.begin(), ss.end());
212
352023
  std::reverse(ls.begin(), ls.end());
213
352023
  return s;
214
}
215
216
Node mkSubstrChain(Node base,
217
                   const std::vector<Node>& ss,
218
                   const std::vector<Node>& ls)
219
{
220
  NodeManager* nm = NodeManager::currentNM();
221
  for (unsigned i = 0, size = ss.size(); i < size; i++)
222
  {
223
    base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
224
  }
225
  return base;
226
}
227
228
416
std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x)
229
{
230
  // Collect the equalities of the form (= x "") (sorted)
231
832
  std::set<TNode> emptyNodes;
232
416
  bool allEmptyEqs = true;
233
416
  if (x.getKind() == EQUAL)
234
  {
235
312
    if (Word::isEmpty(x[0]))
236
    {
237
112
      emptyNodes.insert(x[1]);
238
    }
239
200
    else if (Word::isEmpty(x[1]))
240
    {
241
64
      emptyNodes.insert(x[0]);
242
    }
243
    else
244
    {
245
136
      allEmptyEqs = false;
246
    }
247
  }
248
104
  else if (x.getKind() == AND)
249
  {
250
264
    for (const Node& c : x)
251
    {
252
190
      if (c.getKind() != EQUAL)
253
      {
254
2
        allEmptyEqs = false;
255
2
        continue;
256
      }
257
258
186
      if (Word::isEmpty(c[0]))
259
      {
260
114
        emptyNodes.insert(c[1]);
261
      }
262
72
      else if (Word::isEmpty(c[1]))
263
      {
264
46
        emptyNodes.insert(c[0]);
265
      }
266
      else
267
      {
268
26
        allEmptyEqs = false;
269
      }
270
    }
271
  }
272
273
416
  if (emptyNodes.size() == 0)
274
  {
275
162
    allEmptyEqs = false;
276
  }
277
278
  return std::make_pair(
279
832
      allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
280
}
281
282
9928240
bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; }
283
284
1754
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
285
{
286
1754
  size_t i = start;
287
2850
  while (i < rs.size() && rs[i].getKind() == REGEXP_SIGMA)
288
  {
289
548
    i++;
290
  }
291
292
1754
  if (i >= rs.size())
293
  {
294
12
    return false;
295
  }
296
297
1742
  return rs[i].getKind() == REGEXP_STAR && rs[i][0].getKind() == REGEXP_SIGMA;
298
}
299
300
848
bool isSimpleRegExp(Node r)
301
{
302
848
  Assert(r.getType().isRegExp());
303
304
1696
  std::vector<Node> v;
305
848
  utils::getConcat(r, v);
306
2232
  for (const Node& n : v)
307
  {
308
1878
    if (n.getKind() == STRING_TO_REGEXP)
309
    {
310
662
      if (!n[0].isConst())
311
      {
312
550
        return false;
313
      }
314
    }
315
2432
    else if (n.getKind() != REGEXP_SIGMA
316
2432
             && (n.getKind() != REGEXP_STAR || n[0].getKind() != REGEXP_SIGMA))
317
    {
318
438
      return false;
319
    }
320
  }
321
354
  return true;
322
}
323
324
1492
void getRegexpComponents(Node r, std::vector<Node>& result)
325
{
326
1492
  Assert(r.getType().isRegExp());
327
328
1492
  NodeManager* nm = NodeManager::currentNM();
329
1492
  if (r.getKind() == REGEXP_CONCAT)
330
  {
331
1420
    for (const Node& n : r)
332
    {
333
1168
      getRegexpComponents(n, result);
334
    }
335
  }
336
1240
  else if (r.getKind() == STRING_TO_REGEXP && r[0].isConst())
337
  {
338
488
    size_t rlen = Word::getLength(r[0]);
339
1738
    for (size_t i = 0; i < rlen; i++)
340
    {
341
1250
      result.push_back(nm->mkNode(STRING_TO_REGEXP, Word::substr(r[0], i, 1)));
342
    }
343
  }
344
  else
345
  {
346
752
    result.push_back(r);
347
  }
348
1492
}
349
350
void printConcat(std::ostream& out, std::vector<Node>& n)
351
{
352
  for (unsigned i = 0, nsize = n.size(); i < nsize; i++)
353
  {
354
    if (i > 0)
355
    {
356
      out << " ++ ";
357
    }
358
    out << n[i];
359
  }
360
}
361
362
void printConcatTrace(std::vector<Node>& n, const char* c)
363
{
364
  std::stringstream ss;
365
  printConcat(ss, n);
366
  Trace(c) << ss.str();
367
}
368
369
5545
bool isStringKind(Kind k)
370
{
371
5529
  return k == STRING_STOI || k == STRING_ITOS || k == STRING_TOLOWER
372
5475
         || k == STRING_TOUPPER || k == STRING_LEQ || k == STRING_LT
373
10982
         || k == STRING_FROM_CODE || k == STRING_TO_CODE;
374
}
375
376
674
bool isRegExpKind(Kind k)
377
{
378
674
  return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
379
674
         || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
380
303
         || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
381
29
         || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
382
703
         || k == REGEXP_COMPLEMENT;
383
}
384
385
28363
TypeNode getOwnerStringType(Node n)
386
{
387
28363
  TypeNode tn;
388
28363
  Kind k = n.getKind();
389
28363
  if (k == STRING_INDEXOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH
390
7112
      || k == STRING_CONTAINS || k == SEQ_NTH || k == STRING_PREFIX
391
5545
      || k == STRING_SUFFIX)
392
  {
393
    // owning string type is the type of first argument
394
22818
    tn = n[0].getType();
395
  }
396
5545
  else if (isStringKind(k))
397
  {
398
1671
    tn = NodeManager::currentNM()->stringType();
399
  }
400
  else
401
  {
402
3874
    tn = n.getType();
403
  }
404
56726
  AlwaysAssert(tn.isStringLike())
405
28363
      << "Unexpected term in getOwnerStringType : " << n << ", type " << tn;
406
28363
  return tn;
407
}
408
409
4
unsigned getRepeatAmount(TNode node)
410
{
411
4
  Assert(node.getKind() == REGEXP_REPEAT);
412
4
  return node.getOperator().getConst<RegExpRepeat>().d_repeatAmount;
413
}
414
415
24
unsigned getLoopMaxOccurrences(TNode node)
416
{
417
24
  Assert(node.getKind() == REGEXP_LOOP);
418
24
  return node.getOperator().getConst<RegExpLoop>().d_loopMaxOcc;
419
}
420
421
24
unsigned getLoopMinOccurrences(TNode node)
422
{
423
24
  Assert(node.getKind() == REGEXP_LOOP);
424
24
  return node.getOperator().getConst<RegExpLoop>().d_loopMinOcc;
425
}
426
427
/**
428
 * Mapping to a dummy node for marking an attribute on internal quantified
429
 * formulas. This ensures that reductions are deterministic.
430
 */
431
struct QInternalVarAttributeId
432
{
433
};
434
typedef expr::Attribute<QInternalVarAttributeId, Node> QInternalVarAttribute;
435
436
645
Node mkForallInternal(Node bvl, Node body)
437
{
438
645
  NodeManager* nm = NodeManager::currentNM();
439
  QInternalVarAttribute qiva;
440
1290
  Node qvar;
441
645
  if (bvl.hasAttribute(qiva))
442
  {
443
154
    qvar = bvl.getAttribute(qiva);
444
  }
445
  else
446
  {
447
491
    SkolemManager* sm = nm->getSkolemManager();
448
491
    qvar = sm->mkDummySkolem("qinternal", nm->booleanType());
449
    // this dummy variable marks that the quantified formula is internal
450
491
    qvar.setAttribute(InternalQuantAttribute(), true);
451
    // remember the dummy variable
452
491
    bvl.setAttribute(qiva, qvar);
453
  }
454
  // make the internal attribute, and put it in a singleton list
455
1290
  Node ip = nm->mkNode(INST_ATTRIBUTE, qvar);
456
1290
  Node ipl = nm->mkNode(INST_PATTERN_LIST, ip);
457
  // make the overall formula
458
1290
  return nm->mkNode(FORALL, bvl, body, ipl);
459
}
460
461
}  // namespace utils
462
}  // namespace strings
463
}  // namespace theory
464
29280
}  // namespace cvc5