GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/theory_strings_utils.cpp Lines: 188 204 92.2 %
Date: 2021-09-18 Branches: 351 734 47.8 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Yoni Zohar
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Util functions for theory strings.
14
 */
15
16
#include "theory/strings/theory_strings_utils.h"
17
18
#include <sstream>
19
20
#include "expr/attribute.h"
21
#include "expr/skolem_manager.h"
22
#include "options/strings_options.h"
23
#include "theory/quantifiers/quantifiers_attributes.h"
24
#include "theory/rewriter.h"
25
#include "theory/strings/arith_entail.h"
26
#include "theory/strings/strings_entail.h"
27
#include "theory/strings/word.h"
28
#include "util/rational.h"
29
#include "util/regexp.h"
30
#include "util/string.h"
31
32
using namespace cvc5::kind;
33
34
namespace cvc5 {
35
namespace theory {
36
namespace strings {
37
namespace utils {
38
39
43337
uint32_t getAlphabetCardinality()
40
{
41
  // 3*16^4 = 196608 values in the SMT-LIB standard for Unicode strings
42
43337
  Assert(196608 <= String::num_codes());
43
43337
  return 196608;
44
}
45
46
361157
Node mkAnd(const std::vector<Node>& a)
47
{
48
722314
  std::vector<Node> au;
49
801587
  for (const Node& ai : a)
50
  {
51
440430
    if (std::find(au.begin(), au.end(), ai) == au.end())
52
    {
53
433170
      au.push_back(ai);
54
    }
55
  }
56
361157
  if (au.empty())
57
  {
58
169268
    return NodeManager::currentNM()->mkConst(true);
59
  }
60
191889
  else if (au.size() == 1)
61
  {
62
96540
    return au[0];
63
  }
64
95349
  return NodeManager::currentNM()->mkNode(AND, au);
65
}
66
67
230196
void flattenOp(Kind k, Node n, std::vector<Node>& conj)
68
{
69
230196
  if (n.getKind() != k)
70
  {
71
    // easy case, just add to conj if non-duplicate
72
209778
    if (std::find(conj.begin(), conj.end(), n) == conj.end())
73
    {
74
205101
      conj.push_back(n);
75
    }
76
209778
    return;
77
  }
78
  // otherwise, traverse
79
40836
  std::unordered_set<TNode> visited;
80
20418
  std::unordered_set<TNode>::iterator it;
81
40836
  std::vector<TNode> visit;
82
40836
  TNode cur;
83
20418
  visit.push_back(n);
84
43124
  do
85
  {
86
63542
    cur = visit.back();
87
63542
    visit.pop_back();
88
63542
    it = visited.find(cur);
89
90
63542
    if (it == visited.end())
91
    {
92
63542
      visited.insert(cur);
93
63542
      if (cur.getKind() == k)
94
      {
95
        // Add in reverse order, so that we traverse left to right.
96
        // This is important so that explantaions aren't reversed when they
97
        // are flattened, which is important for proofs involving substitutions.
98
40836
        std::vector<Node> newChildren;
99
20418
        newChildren.insert(newChildren.end(), cur.begin(), cur.end());
100
20418
        visit.insert(visit.end(), newChildren.rbegin(), newChildren.rend());
101
      }
102
43124
      else if (std::find(conj.begin(), conj.end(), cur) == conj.end())
103
      {
104
42319
        conj.push_back(cur);
105
      }
106
    }
107
63542
  } while (!visit.empty());
108
}
109
110
763009
void getConcat(Node n, std::vector<Node>& c)
111
{
112
763009
  Kind k = n.getKind();
113
763009
  if (k == STRING_CONCAT || k == REGEXP_CONCAT)
114
  {
115
257696
    for (const Node& nc : n)
116
    {
117
192424
      c.push_back(nc);
118
65272
    }
119
  }
120
  else
121
  {
122
697737
    c.push_back(n);
123
  }
124
763009
}
125
126
739969
Node mkConcat(const std::vector<Node>& c, TypeNode tn)
127
{
128
739969
  Assert(tn.isStringLike() || tn.isRegExp());
129
739969
  if (c.empty())
130
  {
131
54082
    Assert(tn.isStringLike());
132
54082
    return Word::mkEmptyWord(tn);
133
  }
134
685887
  else if (c.size() == 1)
135
  {
136
269206
    return c[0];
137
  }
138
416681
  Kind k = tn.isStringLike() ? STRING_CONCAT : REGEXP_CONCAT;
139
416681
  return NodeManager::currentNM()->mkNode(k, c);
140
}
141
142
1136
Node mkNConcat(Node n1, Node n2)
143
{
144
  return Rewriter::rewrite(
145
1136
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2));
146
}
147
148
4134
Node mkNConcat(Node n1, Node n2, Node n3)
149
{
150
  return Rewriter::rewrite(
151
4134
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2, n3));
152
}
153
154
593166
Node mkNConcat(const std::vector<Node>& c, TypeNode tn)
155
{
156
593166
  return Rewriter::rewrite(mkConcat(c, tn));
157
}
158
159
1152208
Node mkNLength(Node t)
160
{
161
1152208
  return Rewriter::rewrite(NodeManager::currentNM()->mkNode(STRING_LENGTH, t));
162
}
163
164
18534
Node mkPrefix(Node t, Node n)
165
{
166
18534
  NodeManager* nm = NodeManager::currentNM();
167
18534
  return nm->mkNode(STRING_SUBSTR, t, nm->mkConst(Rational(0)), n);
168
}
169
170
14817
Node mkSuffix(Node t, Node n)
171
{
172
14817
  NodeManager* nm = NodeManager::currentNM();
173
  return nm->mkNode(
174
14817
      STRING_SUBSTR, t, n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, t), n));
175
}
176
177
197749
Node getConstantComponent(Node t)
178
{
179
197749
  if (t.getKind() == STRING_TO_REGEXP)
180
  {
181
9133
    return t[0].isConst() ? t[0] : Node::null();
182
  }
183
188616
  return t.isConst() ? t : Node::null();
184
}
185
186
163685
Node getConstantEndpoint(Node e, bool isSuf)
187
{
188
163685
  Kind ek = e.getKind();
189
163685
  if (ek == STRING_IN_REGEXP)
190
  {
191
2748
    e = e[1];
192
2748
    ek = e.getKind();
193
  }
194
163685
  if (ek == STRING_CONCAT || ek == REGEXP_CONCAT)
195
  {
196
84405
    return getConstantComponent(e[isSuf ? e.getNumChildren() - 1 : 0]);
197
  }
198
79280
  return getConstantComponent(e);
199
}
200
201
132292
Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls)
202
{
203
132292
  Assert(ss.empty());
204
132292
  Assert(ls.empty());
205
257636
  while (s.getKind() == STRING_SUBSTR)
206
  {
207
62672
    ss.push_back(s[1]);
208
62672
    ls.push_back(s[2]);
209
62672
    s = s[0];
210
  }
211
132292
  std::reverse(ss.begin(), ss.end());
212
132292
  std::reverse(ls.begin(), ls.end());
213
132292
  return s;
214
}
215
216
Node mkSubstrChain(Node base,
217
                   const std::vector<Node>& ss,
218
                   const std::vector<Node>& ls)
219
{
220
  NodeManager* nm = NodeManager::currentNM();
221
  for (unsigned i = 0, size = ss.size(); i < size; i++)
222
  {
223
    base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
224
  }
225
  return base;
226
}
227
228
500
std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x)
229
{
230
  // Collect the equalities of the form (= x "") (sorted)
231
1000
  std::set<TNode> emptyNodes;
232
500
  bool allEmptyEqs = true;
233
500
  if (x.getKind() == EQUAL)
234
  {
235
394
    if (Word::isEmpty(x[0]))
236
    {
237
170
      emptyNodes.insert(x[1]);
238
    }
239
224
    else if (Word::isEmpty(x[1]))
240
    {
241
84
      emptyNodes.insert(x[0]);
242
    }
243
    else
244
    {
245
140
      allEmptyEqs = false;
246
    }
247
  }
248
106
  else if (x.getKind() == AND)
249
  {
250
276
    for (const Node& c : x)
251
    {
252
198
      if (c.getKind() != EQUAL)
253
      {
254
2
        allEmptyEqs = false;
255
2
        continue;
256
      }
257
258
194
      if (Word::isEmpty(c[0]))
259
      {
260
156
        emptyNodes.insert(c[1]);
261
      }
262
38
      else if (Word::isEmpty(c[1]))
263
      {
264
20
        emptyNodes.insert(c[0]);
265
      }
266
      else
267
      {
268
18
        allEmptyEqs = false;
269
      }
270
    }
271
  }
272
273
500
  if (emptyNodes.size() == 0)
274
  {
275
164
    allEmptyEqs = false;
276
  }
277
278
  return std::make_pair(
279
1000
      allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
280
}
281
282
13364804
bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; }
283
284
1754
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
285
{
286
1754
  size_t i = start;
287
2850
  while (i < rs.size() && rs[i].getKind() == REGEXP_SIGMA)
288
  {
289
548
    i++;
290
  }
291
292
1754
  if (i >= rs.size())
293
  {
294
12
    return false;
295
  }
296
297
1742
  return rs[i].getKind() == REGEXP_STAR && rs[i][0].getKind() == REGEXP_SIGMA;
298
}
299
300
882
bool isSimpleRegExp(Node r)
301
{
302
882
  Assert(r.getType().isRegExp());
303
304
1764
  std::vector<Node> v;
305
882
  utils::getConcat(r, v);
306
2254
  for (const Node& n : v)
307
  {
308
1900
    if (n.getKind() == STRING_TO_REGEXP)
309
    {
310
650
      if (!n[0].isConst())
311
      {
312
584
        return false;
313
      }
314
    }
315
2500
    else if (n.getKind() != REGEXP_SIGMA
316
2500
             && (n.getKind() != REGEXP_STAR || n[0].getKind() != REGEXP_SIGMA))
317
    {
318
472
      return false;
319
    }
320
  }
321
354
  return true;
322
}
323
324
1492
void getRegexpComponents(Node r, std::vector<Node>& result)
325
{
326
1492
  Assert(r.getType().isRegExp());
327
328
1492
  NodeManager* nm = NodeManager::currentNM();
329
1492
  if (r.getKind() == REGEXP_CONCAT)
330
  {
331
1420
    for (const Node& n : r)
332
    {
333
1168
      getRegexpComponents(n, result);
334
    }
335
  }
336
1240
  else if (r.getKind() == STRING_TO_REGEXP && r[0].isConst())
337
  {
338
488
    size_t rlen = Word::getLength(r[0]);
339
1738
    for (size_t i = 0; i < rlen; i++)
340
    {
341
1250
      result.push_back(nm->mkNode(STRING_TO_REGEXP, Word::substr(r[0], i, 1)));
342
    }
343
  }
344
  else
345
  {
346
752
    result.push_back(r);
347
  }
348
1492
}
349
350
void printConcat(std::ostream& out, std::vector<Node>& n)
351
{
352
  for (unsigned i = 0, nsize = n.size(); i < nsize; i++)
353
  {
354
    if (i > 0)
355
    {
356
      out << " ++ ";
357
    }
358
    out << n[i];
359
  }
360
}
361
362
void printConcatTrace(std::vector<Node>& n, const char* c)
363
{
364
  std::stringstream ss;
365
  printConcat(ss, n);
366
  Trace(c) << ss.str();
367
}
368
369
7636
bool isStringKind(Kind k)
370
{
371
7512
  return k == STRING_STOI || k == STRING_ITOS || k == STRING_TOLOWER
372
7458
         || k == STRING_TOUPPER || k == STRING_LEQ || k == STRING_LT
373
15056
         || k == STRING_FROM_CODE || k == STRING_TO_CODE;
374
}
375
376
730
bool isRegExpKind(Kind k)
377
{
378
730
  return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
379
730
         || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
380
308
         || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
381
29
         || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
382
759
         || k == REGEXP_COMPLEMENT;
383
}
384
385
36714
TypeNode getOwnerStringType(Node n)
386
{
387
36714
  TypeNode tn;
388
36714
  Kind k = n.getKind();
389
36714
  if (k == STRING_INDEXOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH
390
10261
      || k == STRING_CONTAINS || k == SEQ_NTH || k == STRING_PREFIX
391
7636
      || k == STRING_SUFFIX)
392
  {
393
    // owning string type is the type of first argument
394
29078
    tn = n[0].getType();
395
  }
396
7636
  else if (isStringKind(k))
397
  {
398
2220
    tn = NodeManager::currentNM()->stringType();
399
  }
400
  else
401
  {
402
5416
    tn = n.getType();
403
  }
404
73428
  AlwaysAssert(tn.isStringLike())
405
36714
      << "Unexpected term in getOwnerStringType : " << n << ", type " << tn;
406
36714
  return tn;
407
}
408
409
4
unsigned getRepeatAmount(TNode node)
410
{
411
4
  Assert(node.getKind() == REGEXP_REPEAT);
412
4
  return node.getOperator().getConst<RegExpRepeat>().d_repeatAmount;
413
}
414
415
32
unsigned getLoopMaxOccurrences(TNode node)
416
{
417
32
  Assert(node.getKind() == REGEXP_LOOP);
418
32
  return node.getOperator().getConst<RegExpLoop>().d_loopMaxOcc;
419
}
420
421
32
unsigned getLoopMinOccurrences(TNode node)
422
{
423
32
  Assert(node.getKind() == REGEXP_LOOP);
424
32
  return node.getOperator().getConst<RegExpLoop>().d_loopMinOcc;
425
}
426
427
/**
428
 * Mapping to a dummy node for marking an attribute on internal quantified
429
 * formulas. This ensures that reductions are deterministic.
430
 */
431
struct QInternalVarAttributeId
432
{
433
};
434
typedef expr::Attribute<QInternalVarAttributeId, Node> QInternalVarAttribute;
435
436
627
Node mkForallInternal(Node bvl, Node body)
437
{
438
627
  NodeManager* nm = NodeManager::currentNM();
439
  QInternalVarAttribute qiva;
440
1254
  Node qvar;
441
627
  if (bvl.hasAttribute(qiva))
442
  {
443
104
    qvar = bvl.getAttribute(qiva);
444
  }
445
  else
446
  {
447
523
    SkolemManager* sm = nm->getSkolemManager();
448
523
    qvar = sm->mkDummySkolem("qinternal", nm->booleanType());
449
    // this dummy variable marks that the quantified formula is internal
450
523
    qvar.setAttribute(InternalQuantAttribute(), true);
451
    // remember the dummy variable
452
523
    bvl.setAttribute(qiva, qvar);
453
  }
454
  // make the internal attribute, and put it in a singleton list
455
1254
  Node ip = nm->mkNode(INST_ATTRIBUTE, qvar);
456
1254
  Node ipl = nm->mkNode(INST_PATTERN_LIST, ip);
457
  // make the overall formula
458
1254
  return nm->mkNode(FORALL, bvl, body, ipl);
459
}
460
461
}  // namespace utils
462
}  // namespace strings
463
}  // namespace theory
464
29574
}  // namespace cvc5