GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/theory_strings_utils.cpp Lines: 174 192 90.6 %
Date: 2021-03-23 Branches: 320 712 44.9 %

Line Exec Source
1
/*********************                                                        */
2
/*! \file theory_strings_utils.cpp
3
 ** \verbatim
4
 ** Top contributors (to current version):
5
 **   Andrew Reynolds, Andres Noetzli, Yoni Zohar
6
 ** This file is part of the CVC4 project.
7
 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 ** in the top-level source directory and their institutional affiliations.
9
 ** All rights reserved.  See the file COPYING in the top-level source
10
 ** directory for licensing information.\endverbatim
11
 **
12
 ** \brief Util functions for theory strings.
13
 **
14
 ** Util functions for theory strings.
15
 **/
16
17
#include "theory/strings/theory_strings_utils.h"
18
19
#include <sstream>
20
21
#include "options/strings_options.h"
22
#include "theory/rewriter.h"
23
#include "theory/strings/arith_entail.h"
24
#include "theory/strings/strings_entail.h"
25
#include "theory/strings/word.h"
26
27
using namespace CVC4::kind;
28
29
namespace CVC4 {
30
namespace theory {
31
namespace strings {
32
namespace utils {
33
34
38942
uint32_t getAlphabetCardinality()
35
{
36
78046
  if (options::stdPrintASCII())
37
  {
38
8
    Assert(128 <= String::num_codes());
39
8
    return 128;
40
  }
41
  // 3*16^4 = 196608 values in the SMT-LIB standard for Unicode strings
42
38934
  Assert(196608 <= String::num_codes());
43
38934
  return 196608;
44
}
45
46
224410
Node mkAnd(const std::vector<Node>& a)
47
{
48
448820
  std::vector<Node> au;
49
482655
  for (const Node& ai : a)
50
  {
51
258245
    if (std::find(au.begin(), au.end(), ai) == au.end())
52
    {
53
252375
      au.push_back(ai);
54
    }
55
  }
56
224410
  if (au.empty())
57
  {
58
123196
    return NodeManager::currentNM()->mkConst(true);
59
  }
60
101214
  else if (au.size() == 1)
61
  {
62
45044
    return au[0];
63
  }
64
56170
  return NodeManager::currentNM()->mkNode(AND, au);
65
}
66
67
111222
void flattenOp(Kind k, Node n, std::vector<Node>& conj)
68
{
69
111222
  if (n.getKind() != k)
70
  {
71
    // easy case, just add to conj if non-duplicate
72
103541
    if (std::find(conj.begin(), conj.end(), n) == conj.end())
73
    {
74
100334
      conj.push_back(n);
75
    }
76
103541
    return;
77
  }
78
  // otherwise, traverse
79
15362
  std::unordered_set<TNode, TNodeHashFunction> visited;
80
7681
  std::unordered_set<TNode, TNodeHashFunction>::iterator it;
81
15362
  std::vector<TNode> visit;
82
15362
  TNode cur;
83
7681
  visit.push_back(n);
84
15975
  do
85
  {
86
23656
    cur = visit.back();
87
23656
    visit.pop_back();
88
23656
    it = visited.find(cur);
89
90
23656
    if (it == visited.end())
91
    {
92
23656
      visited.insert(cur);
93
23656
      if (cur.getKind() == k)
94
      {
95
        // Add in reverse order, so that we traverse left to right.
96
        // This is important so that explantaions aren't reversed when they
97
        // are flattened, which is important for proofs involving substitutions.
98
15362
        std::vector<Node> newChildren;
99
7681
        newChildren.insert(newChildren.end(), cur.begin(), cur.end());
100
7681
        visit.insert(visit.end(), newChildren.rbegin(), newChildren.rend());
101
      }
102
15975
      else if (std::find(conj.begin(), conj.end(), cur) == conj.end())
103
      {
104
15959
        conj.push_back(cur);
105
      }
106
    }
107
23656
  } while (!visit.empty());
108
}
109
110
1223650
void getConcat(Node n, std::vector<Node>& c)
111
{
112
1223650
  Kind k = n.getKind();
113
1223650
  if (k == STRING_CONCAT || k == REGEXP_CONCAT)
114
  {
115
945868
    for (const Node& nc : n)
116
    {
117
702460
      c.push_back(nc);
118
243408
    }
119
  }
120
  else
121
  {
122
980242
    c.push_back(n);
123
  }
124
1223650
}
125
126
513031
Node mkConcat(const std::vector<Node>& c, TypeNode tn)
127
{
128
513031
  Assert(tn.isStringLike() || tn.isRegExp());
129
513031
  if (c.empty())
130
  {
131
30367
    Assert(tn.isStringLike());
132
30367
    return Word::mkEmptyWord(tn);
133
  }
134
482664
  else if (c.size() == 1)
135
  {
136
213102
    return c[0];
137
  }
138
269562
  Kind k = tn.isStringLike() ? STRING_CONCAT : REGEXP_CONCAT;
139
269562
  return NodeManager::currentNM()->mkNode(k, c);
140
}
141
142
1100
Node mkNConcat(Node n1, Node n2)
143
{
144
  return Rewriter::rewrite(
145
1100
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2));
146
}
147
148
1914
Node mkNConcat(Node n1, Node n2, Node n3)
149
{
150
  return Rewriter::rewrite(
151
1914
      NodeManager::currentNM()->mkNode(STRING_CONCAT, n1, n2, n3));
152
}
153
154
397980
Node mkNConcat(const std::vector<Node>& c, TypeNode tn)
155
{
156
397980
  return Rewriter::rewrite(mkConcat(c, tn));
157
}
158
159
517663
Node mkNLength(Node t)
160
{
161
517663
  return Rewriter::rewrite(NodeManager::currentNM()->mkNode(STRING_LENGTH, t));
162
}
163
164
10960
Node mkPrefix(Node t, Node n)
165
{
166
10960
  NodeManager* nm = NodeManager::currentNM();
167
10960
  return nm->mkNode(STRING_SUBSTR, t, nm->mkConst(Rational(0)), n);
168
}
169
170
8888
Node mkSuffix(Node t, Node n)
171
{
172
8888
  NodeManager* nm = NodeManager::currentNM();
173
  return nm->mkNode(
174
8888
      STRING_SUBSTR, t, n, nm->mkNode(MINUS, nm->mkNode(STRING_LENGTH, t), n));
175
}
176
177
128470
Node getConstantComponent(Node t)
178
{
179
128470
  if (t.getKind() == STRING_TO_REGEXP)
180
  {
181
4296
    return t[0].isConst() ? t[0] : Node::null();
182
  }
183
124174
  return t.isConst() ? t : Node::null();
184
}
185
186
113084
Node getConstantEndpoint(Node e, bool isSuf)
187
{
188
113084
  Kind ek = e.getKind();
189
113084
  if (ek == STRING_IN_REGEXP)
190
  {
191
3485
    e = e[1];
192
3485
    ek = e.getKind();
193
  }
194
113084
  if (ek == STRING_CONCAT || ek == REGEXP_CONCAT)
195
  {
196
58693
    return getConstantComponent(e[isSuf ? e.getNumChildren() - 1 : 0]);
197
  }
198
54391
  return getConstantComponent(e);
199
}
200
201
297600
Node decomposeSubstrChain(Node s, std::vector<Node>& ss, std::vector<Node>& ls)
202
{
203
297600
  Assert(ss.empty());
204
297600
  Assert(ls.empty());
205
372834
  while (s.getKind() == STRING_SUBSTR)
206
  {
207
37617
    ss.push_back(s[1]);
208
37617
    ls.push_back(s[2]);
209
37617
    s = s[0];
210
  }
211
297600
  std::reverse(ss.begin(), ss.end());
212
297600
  std::reverse(ls.begin(), ls.end());
213
297600
  return s;
214
}
215
216
Node mkSubstrChain(Node base,
217
                   const std::vector<Node>& ss,
218
                   const std::vector<Node>& ls)
219
{
220
  NodeManager* nm = NodeManager::currentNM();
221
  for (unsigned i = 0, size = ss.size(); i < size; i++)
222
  {
223
    base = nm->mkNode(STRING_SUBSTR, base, ss[i], ls[i]);
224
  }
225
  return base;
226
}
227
228
286
std::pair<bool, std::vector<Node> > collectEmptyEqs(Node x)
229
{
230
  // Collect the equalities of the form (= x "") (sorted)
231
572
  std::set<TNode> emptyNodes;
232
286
  bool allEmptyEqs = true;
233
286
  if (x.getKind() == EQUAL)
234
  {
235
260
    if (Word::isEmpty(x[0]))
236
    {
237
98
      emptyNodes.insert(x[1]);
238
    }
239
162
    else if (Word::isEmpty(x[1]))
240
    {
241
50
      emptyNodes.insert(x[0]);
242
    }
243
    else
244
    {
245
112
      allEmptyEqs = false;
246
    }
247
  }
248
26
  else if (x.getKind() == AND)
249
  {
250
54
    for (const Node& c : x)
251
    {
252
36
      if (c.getKind() == EQUAL)
253
      {
254
34
        if (Word::isEmpty(c[0]))
255
        {
256
34
          emptyNodes.insert(c[1]);
257
        }
258
        else if (Word::isEmpty(c[1]))
259
        {
260
          emptyNodes.insert(c[0]);
261
        }
262
      }
263
      else
264
      {
265
2
        allEmptyEqs = false;
266
      }
267
    }
268
  }
269
270
286
  if (emptyNodes.size() == 0)
271
  {
272
120
    allEmptyEqs = false;
273
  }
274
275
  return std::make_pair(
276
572
      allEmptyEqs, std::vector<Node>(emptyNodes.begin(), emptyNodes.end()));
277
}
278
279
7092402
bool isConstantLike(Node n) { return n.isConst() || n.getKind() == SEQ_UNIT; }
280
281
1702
bool isUnboundedWildcard(const std::vector<Node>& rs, size_t start)
282
{
283
1702
  size_t i = start;
284
2798
  while (i < rs.size() && rs[i].getKind() == REGEXP_SIGMA)
285
  {
286
548
    i++;
287
  }
288
289
1702
  if (i >= rs.size())
290
  {
291
12
    return false;
292
  }
293
294
1690
  return rs[i].getKind() == REGEXP_STAR && rs[i][0].getKind() == REGEXP_SIGMA;
295
}
296
297
526
bool isSimpleRegExp(Node r)
298
{
299
526
  Assert(r.getType().isRegExp());
300
301
1052
  std::vector<Node> v;
302
526
  utils::getConcat(r, v);
303
1748
  for (const Node& n : v)
304
  {
305
1462
    if (n.getKind() == STRING_TO_REGEXP)
306
    {
307
498
      if (!n[0].isConst())
308
      {
309
240
        return false;
310
      }
311
    }
312
1928
    else if (n.getKind() != REGEXP_SIGMA
313
1928
             && (n.getKind() != REGEXP_STAR || n[0].getKind() != REGEXP_SIGMA))
314
    {
315
240
      return false;
316
    }
317
  }
318
286
  return true;
319
}
320
321
1384
void getRegexpComponents(Node r, std::vector<Node>& result)
322
{
323
1384
  Assert(r.getType().isRegExp());
324
325
1384
  NodeManager* nm = NodeManager::currentNM();
326
1384
  if (r.getKind() == REGEXP_CONCAT)
327
  {
328
1348
    for (const Node& n : r)
329
    {
330
1112
      getRegexpComponents(n, result);
331
    }
332
  }
333
1148
  else if (r.getKind() == STRING_TO_REGEXP && r[0].isConst())
334
  {
335
440
    size_t rlen = Word::getLength(r[0]);
336
1796
    for (size_t i = 0; i < rlen; i++)
337
    {
338
1356
      result.push_back(nm->mkNode(STRING_TO_REGEXP, Word::substr(r[0], i, 1)));
339
    }
340
  }
341
  else
342
  {
343
708
    result.push_back(r);
344
  }
345
1384
}
346
347
void printConcat(std::ostream& out, std::vector<Node>& n)
348
{
349
  for (unsigned i = 0, nsize = n.size(); i < nsize; i++)
350
  {
351
    if (i > 0)
352
    {
353
      out << " ++ ";
354
    }
355
    out << n[i];
356
  }
357
}
358
359
void printConcatTrace(std::vector<Node>& n, const char* c)
360
{
361
  std::stringstream ss;
362
  printConcat(ss, n);
363
  Trace(c) << ss.str();
364
}
365
366
4890
bool isStringKind(Kind k)
367
{
368
4874
  return k == STRING_STOI || k == STRING_ITOS || k == STRING_TOLOWER
369
4824
         || k == STRING_TOUPPER || k == STRING_LEQ || k == STRING_LT
370
9652
         || k == STRING_FROM_CODE || k == STRING_TO_CODE;
371
}
372
373
471
bool isRegExpKind(Kind k)
374
{
375
471
  return k == REGEXP_EMPTY || k == REGEXP_SIGMA || k == STRING_TO_REGEXP
376
471
         || k == REGEXP_CONCAT || k == REGEXP_UNION || k == REGEXP_INTER
377
220
         || k == REGEXP_STAR || k == REGEXP_PLUS || k == REGEXP_OPT
378
34
         || k == REGEXP_RANGE || k == REGEXP_LOOP || k == REGEXP_RV
379
505
         || k == REGEXP_COMPLEMENT;
380
}
381
382
26609
TypeNode getOwnerStringType(Node n)
383
{
384
26609
  TypeNode tn;
385
26609
  Kind k = n.getKind();
386
26609
  if (k == STRING_STRIDOF || k == STRING_LENGTH || k == STRING_STRCTN
387
4907
      || k == SEQ_NTH || k == STRING_PREFIX || k == STRING_SUFFIX)
388
  {
389
    // owning string type is the type of first argument
390
21719
    tn = n[0].getType();
391
  }
392
4890
  else if (isStringKind(k))
393
  {
394
1104
    tn = NodeManager::currentNM()->stringType();
395
  }
396
  else
397
  {
398
3786
    tn = n.getType();
399
  }
400
53218
  AlwaysAssert(tn.isStringLike())
401
26609
      << "Unexpected term in getOwnerStringType : " << n << ", type " << tn;
402
26609
  return tn;
403
}
404
405
4
unsigned getRepeatAmount(TNode node)
406
{
407
4
  Assert(node.getKind() == REGEXP_REPEAT);
408
4
  return node.getOperator().getConst<RegExpRepeat>().d_repeatAmount;
409
}
410
411
24
unsigned getLoopMaxOccurrences(TNode node)
412
{
413
24
  Assert(node.getKind() == REGEXP_LOOP);
414
24
  return node.getOperator().getConst<RegExpLoop>().d_loopMaxOcc;
415
}
416
417
24
unsigned getLoopMinOccurrences(TNode node)
418
{
419
24
  Assert(node.getKind() == REGEXP_LOOP);
420
24
  return node.getOperator().getConst<RegExpLoop>().d_loopMinOcc;
421
}
422
423
}  // namespace utils
424
}  // namespace strings
425
}  // namespace theory
426
65789
}  // namespace CVC4