GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/theory_strings_utils.cpp Lines: 188 204 92.2 %
Date: 2021-09-30 Branches: 351 734 47.8 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Yoni Zohar
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Util functions for theory strings.
14
 */
15
16
#include "theory/strings/theory_strings_utils.h"
17
18
#include <sstream>
19
20
#include "expr/attribute.h"
21
#include "expr/skolem_manager.h"
22
#include "options/strings_options.h"
23
#include "theory/quantifiers/quantifiers_attributes.h"
24
#include "theory/rewriter.h"
25
#include "theory/strings/arith_entail.h"
26
#include "theory/strings/strings_entail.h"
27
#include "theory/strings/word.h"
28
#include "util/rational.h"
29
#include "util/regexp.h"
30
#include "util/string.h"
31
32
using namespace cvc5::kind;
33
34
namespace cvc5 {
35
namespace theory {
36
namespace strings {
37
namespace utils {
38
39
88357
uint32_t getAlphabetCardinality()
40
{
41
  // 3*16^4 = 196608 values in the SMT-LIB standard for Unicode strings
42
88357
  Assert(196608 <= String::num_codes());
43
88357
  return 196608;
44
}
45
46
296572
Node mkAnd(const std::vector<Node>& a)
47
{
48
593144
  std::vector<Node> au;
49
695975
  for (const Node& ai : a)
50
  {
51
399403
    if (std::find(au.begin(), au.end(), ai) == au.end())
52
    {
53
392995
      au.push_back(ai);
54
    }
55
  }
56
296572
  if (au.empty())
57
  {
58
136332
    return NodeManager::currentNM()->mkConst(true);
59
  }
60
160240
  else if (au.size() == 1)
61
  {
62
72441
    return au[0];
63
  }
64
87799
  return NodeManager::currentNM()->mkNode(AND, au);
65
}
66
67
189526
void flattenOp(Kind k, Node n, std::vector<Node>& conj)
68
{
69
189526
  if (n.getKind() != k)
70
  {
71
    // easy case, just add to conj if non-duplicate
72
158013
    if (std::find(conj.begin(), conj.end(), n) == conj.end())
73
    {
74
153982
      conj.push_back(n);
75
    }
76
158013
    return;
77
  }
78
  // otherwise, traverse
79
63026
  std::unordered_set<TNode> visited;
80
31513
  std::unordered_set<TNode>::iterator it;
81
63026
  std::vector<TNode> visit;
82
63026
  TNode cur;
83
31513
  visit.push_back(n);
84
64743
  do
85
  {
86
96256
    cur = visit.back();
87
96256
    visit.pop_back();
88
96256
    it = visited.find(cur);
89
90
96256
    if (it == visited.end())
91
    {
92
96256
      visited.insert(cur);
93
96256
      if (cur.getKind() == k)
94
      {
95
        // Add in reverse order, so that we traverse left to right.
96
        // This is important so that explantaions aren't reversed when they
97
        // are flattened, which is important for proofs involving substitutions.
98
63026
        std::vector<Node> newChildren;
99
31513
        newChildren.insert(newChildren.end(), cur.begin(), cur.end());
100
31513
        visit.insert(visit.end(), newChildren.rbegin(), newChildren.rend());
101
      }
102
64743
      else if (std::find(conj.begin(), conj.end(), cur) == conj.end())
103
      {
104
64144
        conj.push_back(cur);
105
      }
106
    }
107
96256
  } while (!visit.empty());
108
}
109
110
630733
void getConcat(Node n, std::vector<Node>& c)
111
{
112
630733
  Kind k = n.getKind();
113
630733
  if (k == STRING_CONCAT || k == REGEXP_CONCAT)
114
  {
115
199384
    for (const Node& nc : n)
116
    {
117
149231
      c.push_back(nc);
118
50153
    }
119
  }
120
  else
121
  {
122
580580
    c.push_back(n);
123
  }
124
630733
}
125
126
604872
Node mkConcat(const std::vector<Node>& c, TypeNode tn)
127
{
128
604872
  Assert(tn.isStringLike() || tn.isRegExp());
129
604872
  if (c.empty())
130
  {
131
37832
    Assert(tn.isStringLike());
132
37832
    return Word::mkEmptyWord(tn);
133
  }
134
567040
  else if (c.size() == 1)
135
  {
136
217237
    return c[0];
137
  }
138
349803
  Kind k = tn.isStringLike() ? STRING_CONCAT : REGEXP_CONCAT;
139
349803
  return NodeManager::currentNM()->mkNode(k, c);
140
}
141
142
1091
Node mkNConcat(Node n1, Node n2)
143
{
144
  return Rewriter::rewrite(
145
1091
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2));
146
}
147
148
2074
Node mkNConcat(Node n1, Node n2, Node n3)
149
{
150
  return Rewriter::rewrite(
151
2074
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2, n3));
152
}
153
154
492022
Node mkNConcat(const std::vector<Node>& c, TypeNode tn)
155
{
156
492022
  return Rewriter::rewrite(mkConcat(c, tn));
157
}
158
159
1155288
Node mkNLength(Node t)
160
{
161
1155288
  return Rewriter::rewrite(NodeManager::currentNM()->mkNode(STRING_LENGTH, t));
162
}
163
164
13098
Node mkPrefix(Node t, Node n)
165
{
166
13098
  NodeManager* nm = NodeManager::currentNM();
167
13098
  return nm->mkNode(STRING_SUBSTR, t, nm->mkConst(Rational(0)), n);
168
}
169
170
12174
Node mkSuffix(Node t, Node n)
171
{
172
12174
  NodeManager* nm = NodeManager::currentNM();
173
  return nm->mkNode(
174
12174
      STRING_SUBSTR, t, n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, t), n));
175
}
176
177
160105
Node getConstantComponent(Node t)
178
{
179
160105
  if (t.getKind() == STRING_TO_REGEXP)
180
  {
181
11715
    return t[0].isConst() ? t[0] : Node::null();
182
  }
183
148390
  return t.isConst() ? t : Node::null();
184
}
185
186
134107
Node getConstantEndpoint(Node e, bool isSuf)
187
{
188
134107
  Kind ek = e.getKind();
189
134107
  if (ek == STRING_IN_REGEXP)
190
  {
191
5146
    e = e[1];
192
5146
    ek = e.getKind();
193
  }
194
134107
  if (ek == STRING_CONCAT || ek == REGEXP_CONCAT)
195
  {
196
70242
    return getConstantComponent(e[isSuf ? e.getNumChildren() - 1 : 0]);
197
  }
198
63865
  return getConstantComponent(e);
199
}
200
201
113433
Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls)
202
{
203
113433
  Assert(ss.empty());
204
113433
  Assert(ls.empty());
205
220655
  while (s.getKind() == STRING_SUBSTR)
206
  {
207
53611
    ss.push_back(s[1]);
208
53611
    ls.push_back(s[2]);
209
53611
    s = s[0];
210
  }
211
113433
  std::reverse(ss.begin(), ss.end());
212
113433
  std::reverse(ls.begin(), ls.end());
213
113433
  return s;
214
}
215
216
Node mkSubstrChain(Node base,
217
                   const std::vector<Node>& ss,
218
                   const std::vector<Node>& ls)
219
{
220
  NodeManager* nm = NodeManager::currentNM();
221
  for (unsigned i = 0, size = ss.size(); i < size; i++)
222
  {
223
    base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
224
  }
225
  return base;
226
}
227
228
330
std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x)
229
{
230
  // Collect the equalities of the form (= x "") (sorted)
231
660
  std::set<TNode> emptyNodes;
232
330
  bool allEmptyEqs = true;
233
330
  if (x.getKind() == EQUAL)
234
  {
235
278
    if (Word::isEmpty(x[0]))
236
    {
237
106
      emptyNodes.insert(x[1]);
238
    }
239
172
    else if (Word::isEmpty(x[1]))
240
    {
241
52
      emptyNodes.insert(x[0]);
242
    }
243
    else
244
    {
245
120
      allEmptyEqs = false;
246
    }
247
  }
248
52
  else if (x.getKind() == AND)
249
  {
250
132
    for (const Node& c : x)
251
    {
252
96
      if (c.getKind() != EQUAL)
253
      {
254
2
        allEmptyEqs = false;
255
2
        continue;
256
      }
257
258
92
      if (Word::isEmpty(c[0]))
259
      {
260
78
        emptyNodes.insert(c[1]);
261
      }
262
14
      else if (Word::isEmpty(c[1]))
263
      {
264
8
        emptyNodes.insert(c[0]);
265
      }
266
      else
267
      {
268
6
        allEmptyEqs = false;
269
      }
270
    }
271
  }
272
273
330
  if (emptyNodes.size() == 0)
274
  {
275
132
    allEmptyEqs = false;
276
  }
277
278
  return std::make_pair(
279
660
      allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
280
}
281
282
11054830
bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; }
283
284
1469
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
285
{
286
1469
  size_t i = start;
287
2543
  while (i < rs.size() && rs[i].getKind() == REGEXP_SIGMA)
288
  {
289
537
    i++;
290
  }
291
292
1469
  if (i >= rs.size())
293
  {
294
12
    return false;
295
  }
296
297
1457
  return rs[i].getKind() == REGEXP_STAR && rs[i][0].getKind() == REGEXP_SIGMA;
298
}
299
300
635
bool isSimpleRegExp(Node r)
301
{
302
635
  Assert(r.getType().isRegExp());
303
304
1270
  std::vector<Node> v;
305
635
  utils::getConcat(r, v);
306
1751
  for (const Node& n : v)
307
  {
308
1469
    if (n.getKind() == STRING_TO_REGEXP)
309
    {
310
467
      if (!n[0].isConst())
311
      {
312
367
        return false;
313
      }
314
    }
315
2004
    else if (n.getKind() != REGEXP_SIGMA
316
2004
             && (n.getKind() != REGEXP_STAR || n[0].getKind() != REGEXP_SIGMA))
317
    {
318
339
      return false;
319
    }
320
  }
321
282
  return true;
322
}
323
324
1239
void getRegexpComponents(Node r, std::vector<Node>& result)
325
{
326
1239
  Assert(r.getType().isRegExp());
327
328
1239
  NodeManager* nm = NodeManager::currentNM();
329
1239
  if (r.getKind() == REGEXP_CONCAT)
330
  {
331
1185
    for (const Node& n : r)
332
    {
333
971
      getRegexpComponents(n, result);
334
    }
335
  }
336
1025
  else if (r.getKind() == STRING_TO_REGEXP && r[0].isConst())
337
  {
338
376
    size_t rlen = Word::getLength(r[0]);
339
1419
    for (size_t i = 0; i < rlen; i++)
340
    {
341
1043
      result.push_back(nm->mkNode(STRING_TO_REGEXP, Word::substr(r[0], i, 1)));
342
    }
343
  }
344
  else
345
  {
346
649
    result.push_back(r);
347
  }
348
1239
}
349
350
void printConcat(std::ostream& out, std::vector<Node>& n)
351
{
352
  for (unsigned i = 0, nsize = n.size(); i < nsize; i++)
353
  {
354
    if (i > 0)
355
    {
356
      out << " ++ ";
357
    }
358
    out << n[i];
359
  }
360
}
361
362
void printConcatTrace(std::vector<Node>& n, const char* c)
363
{
364
  std::stringstream ss;
365
  printConcat(ss, n);
366
  Trace(c) << ss.str();
367
}
368
369
7019
bool isStringKind(Kind k)
370
{
371
6903
  return k == STRING_STOI || k == STRING_ITOS || k == STRING_TOLOWER
372
6852
         || k == STRING_TOUPPER || k == STRING_LEQ || k == STRING_LT
373
13833
         || k == STRING_FROM_CODE || k == STRING_TO_CODE;
374
}
375
376
487
bool isRegExpKind(Kind k)
377
{
378
487
  return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
379
487
         || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
380
193
         || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
381
20
         || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
382
507
         || k == REGEXP_COMPLEMENT;
383
}
384
385
32873
TypeNode getOwnerStringType(Node n)
386
{
387
32873
  TypeNode tn;
388
32873
  Kind k = n.getKind();
389
32873
  if (k == STRING_INDEXOF || k == STRING_INDEXOF_RE || k == STRING_LENGTH
390
9401
      || k == STRING_CONTAINS || k == SEQ_NTH || k == STRING_PREFIX
391
7019
      || k == STRING_SUFFIX)
392
  {
393
    // owning string type is the type of first argument
394
25854
    tn = n[0].getType();
395
  }
396
7019
  else if (isStringKind(k))
397
  {
398
2125
    tn = NodeManager::currentNM()->stringType();
399
  }
400
  else
401
  {
402
4894
    tn = n.getType();
403
  }
404
65746
  AlwaysAssert(tn.isStringLike())
405
32873
      << "Unexpected term in getOwnerStringType : " << n << ", type " << tn;
406
32873
  return tn;
407
}
408
409
4
unsigned getRepeatAmount(TNode node)
410
{
411
4
  Assert(node.getKind() == REGEXP_REPEAT);
412
4
  return node.getOperator().getConst<RegExpRepeat>().d_repeatAmount;
413
}
414
415
26
unsigned getLoopMaxOccurrences(TNode node)
416
{
417
26
  Assert(node.getKind() == REGEXP_LOOP);
418
26
  return node.getOperator().getConst<RegExpLoop>().d_loopMaxOcc;
419
}
420
421
26
unsigned getLoopMinOccurrences(TNode node)
422
{
423
26
  Assert(node.getKind() == REGEXP_LOOP);
424
26
  return node.getOperator().getConst<RegExpLoop>().d_loopMinOcc;
425
}
426
427
/**
428
 * Mapping to a dummy node for marking an attribute on internal quantified
429
 * formulas. This ensures that reductions are deterministic.
430
 */
431
struct QInternalVarAttributeId
432
{
433
};
434
typedef expr::Attribute<QInternalVarAttributeId, Node> QInternalVarAttribute;
435
436
400
Node mkForallInternal(Node bvl, Node body)
437
{
438
400
  NodeManager* nm = NodeManager::currentNM();
439
  QInternalVarAttribute qiva;
440
800
  Node qvar;
441
400
  if (bvl.hasAttribute(qiva))
442
  {
443
14
    qvar = bvl.getAttribute(qiva);
444
  }
445
  else
446
  {
447
386
    SkolemManager* sm = nm->getSkolemManager();
448
386
    qvar = sm->mkDummySkolem("qinternal", nm->booleanType());
449
    // this dummy variable marks that the quantified formula is internal
450
386
    qvar.setAttribute(InternalQuantAttribute(), true);
451
    // remember the dummy variable
452
386
    bvl.setAttribute(qiva, qvar);
453
  }
454
  // make the internal attribute, and put it in a singleton list
455
800
  Node ip = nm->mkNode(INST_ATTRIBUTE, qvar);
456
800
  Node ipl = nm->mkNode(INST_PATTERN_LIST, ip);
457
  // make the overall formula
458
800
  return nm->mkNode(FORALL, bvl, body, ipl);
459
}
460
461
}  // namespace utils
462
}  // namespace strings
463
}  // namespace theory
464
22755
}  // namespace cvc5