GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/strings_entail.cpp Lines: 425 441 96.4 %
Date: 2021-05-22 Branches: 957 2017 47.4 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving strings.
14
 */
15
16
#include "theory/strings/strings_entail.h"
17
18
#include "expr/node_builder.h"
19
#include "theory/rewriter.h"
20
#include "theory/strings/arith_entail.h"
21
#include "theory/strings/sequences_rewriter.h"
22
#include "theory/strings/theory_strings_utils.h"
23
#include "theory/strings/word.h"
24
25
using namespace cvc5::kind;
26
27
namespace cvc5 {
28
namespace theory {
29
namespace strings {
30
31
9649
StringsEntail::StringsEntail(SequencesRewriter& rewriter) : d_rewriter(rewriter)
32
{
33
9649
}
34
35
2586
bool StringsEntail::canConstantContainConcat(Node c,
36
                                             Node n,
37
                                             int& firstc,
38
                                             int& lastc)
39
{
40
2586
  Assert(c.isConst());
41
2586
  Assert(n.getKind() == STRING_CONCAT);
42
  // must find constant components in order
43
2586
  size_t pos = 0;
44
2586
  firstc = -1;
45
2586
  lastc = -1;
46
9775
  for (unsigned i = 0; i < n.getNumChildren(); i++)
47
  {
48
7308
    if (n[i].isConst())
49
    {
50
1704
      firstc = firstc == -1 ? i : firstc;
51
1704
      lastc = i;
52
1704
      size_t new_pos = Word::find(c, n[i], pos);
53
1704
      if (new_pos == std::string::npos)
54
      {
55
111
        return false;
56
      }
57
      else
58
      {
59
1593
        pos = new_pos + Word::getLength(n[i]);
60
      }
61
    }
62
5604
    else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
63
    {
64
20
      Assert(c.getType().isString());  // string-only
65
20
      const std::vector<unsigned>& tvec = c.getConst<String>().getVec();
66
      // find the first occurrence of a digit starting at pos
67
116
      while (pos < tvec.size() && !String::isDigit(tvec[pos]))
68
      {
69
48
        pos++;
70
      }
71
20
      if (pos == tvec.size())
72
      {
73
8
        return false;
74
      }
75
      // must consume at least one digit here
76
12
      pos++;
77
    }
78
  }
79
2467
  return true;
80
}
81
82
36457
bool StringsEntail::canConstantContainList(Node c,
83
                                           std::vector<Node>& l,
84
                                           int& firstc,
85
                                           int& lastc)
86
{
87
36457
  Assert(c.isConst());
88
  // must find constant components in order
89
36457
  size_t pos = 0;
90
36457
  firstc = -1;
91
36457
  lastc = -1;
92
105199
  for (unsigned i = 0; i < l.size(); i++)
93
  {
94
69283
    if (l[i].isConst())
95
    {
96
58995
      firstc = firstc == -1 ? i : firstc;
97
58995
      lastc = i;
98
58995
      size_t new_pos = Word::find(c, l[i], pos);
99
58995
      if (new_pos == std::string::npos)
100
      {
101
541
        return false;
102
      }
103
      else
104
      {
105
58454
        pos = new_pos + Word::getLength(l[i]);
106
      }
107
    }
108
  }
109
35916
  return true;
110
}
111
112
56130
bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
113
                                        std::vector<Node>& nr,
114
                                        int dir,
115
                                        Node& curr,
116
                                        bool strict)
117
{
118
56130
  Assert(dir == 1 || dir == -1);
119
56130
  Assert(nr.empty());
120
56130
  NodeManager* nm = NodeManager::currentNM();
121
112260
  Node zero = nm->mkConst(cvc5::Rational(0));
122
56130
  bool ret = false;
123
56130
  bool success = true;
124
56130
  unsigned sindex = 0;
125
149406
  while (success && curr != zero && sindex < n1.size())
126
  {
127
46638
    Assert(!curr.isNull());
128
46638
    success = false;
129
46638
    unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
130
46638
    if (n1[sindex_use].isConst())
131
    {
132
      // could strip part of a constant
133
20746
      Node lowerBound = ArithEntail::getConstantBound(Rewriter::rewrite(curr));
134
10373
      if (!lowerBound.isNull())
135
      {
136
3224
        Assert(lowerBound.isConst());
137
6448
        Rational lbr = lowerBound.getConst<Rational>();
138
3224
        if (lbr.sgn() > 0)
139
        {
140
1759
          Assert(ArithEntail::check(curr, true));
141
3518
          Node s = n1[sindex_use];
142
1759
          size_t slen = Word::getLength(s);
143
3518
          Node ncl = nm->mkConst(cvc5::Rational(slen));
144
3518
          Node next_s = nm->mkNode(MINUS, lowerBound, ncl);
145
1759
          next_s = Rewriter::rewrite(next_s);
146
1759
          Assert(next_s.isConst());
147
          // we can remove the entire constant
148
1759
          if (next_s.getConst<Rational>().sgn() >= 0)
149
          {
150
664
            curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl));
151
664
            success = true;
152
664
            sindex++;
153
          }
154
          else
155
          {
156
            // we can remove part of the constant
157
            // lower bound minus the length of a concrete string is negative,
158
            // hence lowerBound cannot be larger than long max
159
1095
            Assert(lbr < Rational(String::maxSize()));
160
1095
            curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound));
161
1095
            uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
162
1095
            Assert(lbsize < slen);
163
1095
            if (dir == 1)
164
            {
165
              // strip partially from the front
166
1046
              nr.push_back(Word::prefix(s, lbsize));
167
1046
              n1[sindex_use] = Word::suffix(s, slen - lbsize);
168
            }
169
            else
170
            {
171
              // strip partially from the back
172
49
              nr.push_back(Word::suffix(s, lbsize));
173
49
              n1[sindex_use] = Word::prefix(s, slen - lbsize);
174
            }
175
1095
            ret = true;
176
          }
177
1759
          Assert(ArithEntail::check(curr));
178
        }
179
        else
180
        {
181
          // we cannot remove the constant
182
        }
183
      }
184
    }
185
    else
186
    {
187
      Node next_s = NodeManager::currentNM()->mkNode(
188
          MINUS,
189
          curr,
190
72530
          NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
191
36265
      next_s = Rewriter::rewrite(next_s);
192
36265
      if (ArithEntail::check(next_s))
193
      {
194
470
        success = true;
195
470
        curr = next_s;
196
470
        sindex++;
197
      }
198
    }
199
  }
200
56130
  if (sindex > 0 && (!strict || curr == zero))
201
  {
202
1011
    if (dir == 1)
203
    {
204
982
      nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
205
982
      n1.erase(n1.begin(), n1.begin() + sindex);
206
    }
207
    else
208
    {
209
29
      nr.insert(nr.end(), n1.end() - sindex, n1.end());
210
29
      n1.erase(n1.end() - sindex, n1.end());
211
    }
212
1011
    ret = true;
213
  }
214
112260
  return ret;
215
}
216
217
163593
int StringsEntail::componentContains(std::vector<Node>& n1,
218
                                     std::vector<Node>& n2,
219
                                     std::vector<Node>& nb,
220
                                     std::vector<Node>& ne,
221
                                     bool computeRemainder,
222
                                     int remainderDir)
223
{
224
163593
  Assert(nb.empty());
225
163593
  Assert(ne.empty());
226
163593
  Trace("strings-entail") << "Component contains: " << std::endl;
227
163593
  Trace("strings-entail") << "n1 = " << n1 << std::endl;
228
163593
  Trace("strings-entail") << "n2 = " << n2 << std::endl;
229
  // if n2 is a singleton, we can do optimized version here
230
163593
  if (n2.size() == 1)
231
  {
232
277869
    for (unsigned i = 0; i < n1.size(); i++)
233
    {
234
315071
      Node n1rb;
235
315071
      Node n1re;
236
158865
      if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
237
      {
238
2659
        if (computeRemainder)
239
        {
240
271
          n1[i] = n2[0];
241
271
          if (remainderDir != -1)
242
          {
243
271
            if (!n1re.isNull())
244
            {
245
37
              ne.push_back(n1re);
246
            }
247
271
            ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
248
271
            n1.erase(n1.begin() + i + 1, n1.end());
249
          }
250
          else if (!n1re.isNull())
251
          {
252
            n1[i] = Rewriter::rewrite(
253
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
254
          }
255
271
          if (remainderDir != 1)
256
          {
257
            nb.insert(nb.end(), n1.begin(), n1.begin() + i);
258
            n1.erase(n1.begin(), n1.begin() + i);
259
            if (!n1rb.isNull())
260
            {
261
              nb.push_back(n1rb);
262
            }
263
          }
264
271
          else if (!n1rb.isNull())
265
          {
266
94
            n1[i] = Rewriter::rewrite(
267
188
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
268
          }
269
        }
270
2659
        return i;
271
      }
272
    }
273
  }
274
41930
  else if (n1.size() >= n2.size())
275
  {
276
11263
    unsigned diff = n1.size() - n2.size();
277
29599
    for (unsigned i = 0; i <= diff; i++)
278
    {
279
36805
      Node n1rb_first;
280
36805
      Node n1re_first;
281
      // first component of n2 must be a suffix
282
36938
      if (componentContainsBase(n1[i],
283
18469
                                n2[0],
284
                                n1rb_first,
285
                                n1re_first,
286
                                1,
287
18469
                                computeRemainder && remainderDir != 1))
288
      {
289
4355
        Assert(n1re_first.isNull());
290
7549
        for (unsigned j = 1; j < n2.size(); j++)
291
        {
292
          // are we in the last component?
293
7549
          if (j + 1 == n2.size())
294
          {
295
947
            Node n1rb_last;
296
947
            Node n1re_last;
297
            // last component of n2 must be a prefix
298
1894
            if (componentContainsBase(n1[i + j],
299
947
                                      n2[j],
300
                                      n1rb_last,
301
                                      n1re_last,
302
                                      -1,
303
947
                                      computeRemainder && remainderDir != -1))
304
            {
305
266
              Trace("strings-entail-debug")
306
133
                  << "Last remainder begin is " << n1rb_last << std::endl;
307
266
              Trace("strings-entail-debug")
308
133
                  << "Last remainder end is " << n1re_last << std::endl;
309
133
              Assert(n1rb_last.isNull());
310
133
              if (computeRemainder)
311
              {
312
12
                if (remainderDir != -1)
313
                {
314
12
                  if (!n1re_last.isNull())
315
                  {
316
2
                    ne.push_back(n1re_last);
317
                  }
318
12
                  ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
319
12
                  n1.erase(n1.begin() + i + j + 1, n1.end());
320
12
                  n1[i + j] = n2[j];
321
                }
322
12
                if (remainderDir != 1)
323
                {
324
                  n1[i] = n2[0];
325
                  nb.insert(nb.end(), n1.begin(), n1.begin() + i);
326
                  n1.erase(n1.begin(), n1.begin() + i);
327
                  if (!n1rb_first.isNull())
328
                  {
329
                    nb.push_back(n1rb_first);
330
                  }
331
                }
332
              }
333
133
              Trace("strings-entail-debug") << "ne = " << ne << std::endl;
334
133
              Trace("strings-entail-debug") << "nb = " << nb << std::endl;
335
133
              Trace("strings-entail-debug") << "...return " << i << std::endl;
336
133
              return i;
337
            }
338
            else
339
            {
340
814
              break;
341
            }
342
          }
343
6602
          else if (n1[i + j] != n2[j])
344
          {
345
3408
            break;
346
          }
347
        }
348
      }
349
    }
350
  }
351
160801
  return -1;
352
}
353
354
178281
bool StringsEntail::componentContainsBase(
355
    Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
356
{
357
178281
  Assert(n1rb.isNull());
358
178281
  Assert(n1re.isNull());
359
360
178281
  NodeManager* nm = NodeManager::currentNM();
361
362
178281
  if (n1 == n2)
363
  {
364
6539
    return true;
365
  }
366
  else
367
  {
368
171742
    if (n1.isConst() && n2.isConst())
369
    {
370
2979
      size_t len1 = Word::getLength(n1);
371
2979
      size_t len2 = Word::getLength(n2);
372
2979
      if (len2 < len1)
373
      {
374
515
        if (dir == 1)
375
        {
376
340
          if (Word::suffix(n1, len2) == n2)
377
          {
378
64
            if (computeRemainder)
379
            {
380
              n1rb = Word::prefix(n1, len1 - len2);
381
            }
382
64
            return true;
383
          }
384
        }
385
175
        else if (dir == -1)
386
        {
387
4
          if (Word::prefix(n1, len2) == n2)
388
          {
389
4
            if (computeRemainder)
390
            {
391
              n1re = Word::suffix(n1, len1 - len2);
392
            }
393
4
            return true;
394
          }
395
        }
396
        else
397
        {
398
171
          size_t f = Word::find(n1, n2);
399
171
          if (f != std::string::npos)
400
          {
401
163
            if (computeRemainder)
402
            {
403
119
              if (f > 0)
404
              {
405
90
                n1rb = Word::prefix(n1, f);
406
              }
407
119
              if (len1 > f + len2)
408
              {
409
33
                n1re = Word::suffix(n1, len1 - (f + len2));
410
              }
411
            }
412
163
            return true;
413
          }
414
        }
415
      }
416
    }
417
    else
418
    {
419
      // cases for:
420
      //   n1 = x   containing   n2 = substr( x, n2[1], n2[2] )
421
168763
      if (n2.getKind() == STRING_SUBSTR)
422
      {
423
23968
        if (n2[0] == n1)
424
        {
425
185
          bool success = true;
426
185
          Node start_pos = n2[1];
427
185
          Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
428
185
          Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
429
185
          if (dir == 1)
430
          {
431
            // To be a suffix, start + length must be greater than
432
            // or equal to the length of the string.
433
4
            success = ArithEntail::check(end_pos, len_n2s);
434
          }
435
181
          else if (dir == -1)
436
          {
437
            // To be a prefix, must literally start at 0, since
438
            //   if we knew it started at <0, it should be rewritten to "",
439
            //   if we knew it started at 0, then n2[1] should be rewritten to
440
            //   0.
441
24
            success = start_pos.isConst()
442
12
                      && start_pos.getConst<Rational>().sgn() == 0;
443
          }
444
185
          if (success)
445
          {
446
185
            if (computeRemainder)
447
            {
448
              // we can only compute the remainder if start_pos and end_pos
449
              // are known to be non-negative.
450
54
              if (!ArithEntail::check(start_pos)
451
54
                  || !ArithEntail::check(end_pos))
452
              {
453
12
                return false;
454
              }
455
6
              if (dir != -1)
456
              {
457
12
                n1rb = nm->mkNode(
458
8
                    STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
459
              }
460
6
              if (dir != 1)
461
              {
462
6
                n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
463
              }
464
            }
465
173
            return true;
466
          }
467
        }
468
      }
469
470
168578
      if (!computeRemainder && dir == 0)
471
      {
472
154176
        if (n1.getKind() == STRING_STRREPL)
473
        {
474
          // (str.contains (str.replace x y z) w) ---> true
475
          // if (str.contains x w) --> true and (str.contains z w) ---> true
476
3972
          Node xCtnW = checkContains(n1[0], n2);
477
2088
          if (!xCtnW.isNull() && xCtnW.getConst<bool>())
478
          {
479
758
            Node zCtnW = checkContains(n1[2], n2);
480
481
            if (!zCtnW.isNull() && zCtnW.getConst<bool>())
481
            {
482
204
              return true;
483
            }
484
          }
485
        }
486
      }
487
    }
488
  }
489
171122
  return false;
490
}
491
492
164115
bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
493
                                           std::vector<Node>& n2,
494
                                           std::vector<Node>& nb,
495
                                           std::vector<Node>& ne,
496
                                           int dir)
497
{
498
164115
  Assert(nb.empty());
499
164115
  Assert(ne.empty());
500
164115
  bool changed = false;
501
  // for ( forwards, backwards ) direction
502
441867
  for (unsigned r = 0; r < 2; r++)
503
  {
504
303003
    if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
505
    {
506
299876
      unsigned index0 = r == 0 ? 0 : n1.size() - 1;
507
299876
      unsigned index1 = r == 0 ? 0 : n2.size() - 1;
508
299876
      bool removeComponent = false;
509
574501
      Node n1cmp = n1[index0];
510
511
299876
      if (n1cmp.isConst() && Word::isEmpty(n1cmp))
512
      {
513
25209
        return false;
514
      }
515
516
549292
      std::vector<Node> sss;
517
549292
      std::vector<Node> sls;
518
274667
      n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
519
549334
      Trace("strings-rewrite-debug2")
520
274667
          << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
521
274667
          << ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl;
522
274667
      if (n1cmp.isConst())
523
      {
524
151604
        Node s = n1cmp;
525
75802
        size_t slen = Word::getLength(s);
526
        // overlap is an overapproximation of the number of characters
527
        // n2[index1] can match in s
528
75802
        unsigned overlap = Word::getLength(s);
529
75802
        if (n2[index1].isConst())
530
        {
531
22676
          Node t = n2[index1];
532
11338
          std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t);
533
11338
          if (ret == std::string::npos)
534
          {
535
2803
            if (n1.size() == 1)
536
            {
537
              // can remove everything
538
              //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
539
              //   str.contains( "", str.++( "ba", x ) )
540
              //   or std.contains( str.substr( "abc", x, y ), "d" ) --->
541
              //   str.contains( "", "d" )
542
37
              removeComponent = true;
543
            }
544
2766
            else if (sss.empty())  // only if not substr
545
            {
546
              // check how much overlap there is
547
              // This is used to partially strip off the endpoint
548
              // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
549
              // str.contains( str.++( "c", x ), str.++( "cd", y ) )
550
2718
              overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
551
            }
552
            // note that we cannot process substring here, since t may
553
            // match only part of s. Consider:
554
            // (str.++ "C" (str.substr "AB" x y)), "CB"
555
            // where "AB" and "CB" have no overlap, but "C" is not part of what
556
            // is matched with "AB".
557
          }
558
8535
          else if (sss.empty())  // only if not substr
559
          {
560
5770
            Assert(ret < slen);
561
            // can strip off up to the find position, e.g.
562
            // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
563
            // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
564
            // and
565
            // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
566
            // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
567
5770
            overlap = slen - ret;
568
          }
569
        }
570
        else
571
        {
572
          // inconclusive
573
        }
574
75802
        Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl;
575
        // process the overlap
576
75802
        if (overlap < slen)
577
        {
578
213
          changed = true;
579
213
          if (overlap == 0)
580
          {
581
165
            removeComponent = true;
582
          }
583
          else
584
          {
585
            // can drop the prefix (resp. suffix) from the first (resp. last)
586
            // component
587
48
            if (r == 0)
588
            {
589
27
              nb.push_back(Word::prefix(s, slen - overlap));
590
27
              n1[index0] = Word::suffix(s, overlap);
591
            }
592
            else
593
            {
594
21
              ne.push_back(Word::suffix(s, slen - overlap));
595
21
              n1[index0] = Word::prefix(s, overlap);
596
            }
597
          }
598
        }
599
      }
600
198865
      else if (n1cmp.getKind() == STRING_ITOS)
601
      {
602
379
        if (n2[index1].isConst())
603
        {
604
29
          Assert(n2[index1].getType().isString());  // string-only
605
58
          cvc5::String t = n2[index1].getConst<String>();
606
29
          if (n1.size() == 1)
607
          {
608
            // if n1.size()==1, then if n2[index1] is not a number, we can drop
609
            // the entire component
610
            //    e.g. str.contains( int.to.str(x), "123a45") --> false
611
13
            if (!t.isNumber())
612
            {
613
5
              removeComponent = true;
614
            }
615
          }
616
          else
617
          {
618
16
            const std::vector<unsigned>& tvec = t.getVec();
619
16
            Assert(tvec.size() > 0);
620
621
            // if n1.size()>1, then if the first (resp. last) character of
622
            // n2[index1]
623
            //  is not a digit, we can drop the entire component, e.g.:
624
            //    str.contains( str.++( int.to.str(x), y ), "a12") -->
625
            //    str.contains( y, "a12" )
626
            //    str.contains( str.++( y, int.to.str(x) ), "a0b") -->
627
            //    str.contains( y, "a0b" )
628
16
            unsigned i = r == 0 ? 0 : (tvec.size() - 1);
629
16
            if (!String::isDigit(tvec[i]))
630
            {
631
12
              removeComponent = true;
632
            }
633
          }
634
        }
635
      }
636
274667
      if (removeComponent)
637
      {
638
219
        Trace("strings-rewrite-debug2") << "...remove component" << std::endl;
639
        // can drop entire first (resp. last) component
640
219
        if (r == 0)
641
        {
642
150
          nb.push_back(n1[index0]);
643
150
          n1.erase(n1.begin(), n1.begin() + 1);
644
        }
645
        else
646
        {
647
69
          ne.push_back(n1[index0]);
648
69
          n1.pop_back();
649
        }
650
219
        if (n1.empty())
651
        {
652
          // if we've removed everything, just return (we will rewrite to false)
653
42
          return true;
654
        }
655
        else
656
        {
657
177
          changed = true;
658
        }
659
      }
660
    }
661
  }
662
  // TODO (#1180) : computing the maximal overlap in this function may be
663
  // important.
664
  // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
665
  // false
666
  //   ...since str.to.int(x) can contain at most 1 character from "2aaaa",
667
  //   leaving 4 characters
668
  //      which is larger that the upper bound for length of str.substr(y,0,3),
669
  //      which is 3.
670
138864
  return changed;
671
}
672
673
184809
Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
674
{
675
184809
  NodeManager* nm = NodeManager::currentNM();
676
369618
  Node ctn = nm->mkNode(STRING_STRCTN, a, b);
677
678
184809
  if (fullRewriter)
679
  {
680
11290
    ctn = Rewriter::rewrite(ctn);
681
  }
682
  else
683
  {
684
347038
    Node prev;
685
170
    do
686
    {
687
173689
      prev = ctn;
688
173689
      ctn = d_rewriter.rewriteContains(ctn);
689
173689
    } while (prev != ctn && ctn.getKind() == STRING_STRCTN);
690
  }
691
692
184809
  Assert(ctn.getType().isBoolean());
693
369618
  return ctn.isConst() ? ctn : Node::null();
694
}
695
696
21353
bool StringsEntail::checkNonEmpty(Node a)
697
{
698
42706
  Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
699
21353
  len = Rewriter::rewrite(len);
700
42706
  return ArithEntail::check(len, true);
701
}
702
703
43160
bool StringsEntail::checkLengthOne(Node s, bool strict)
704
{
705
43160
  NodeManager* nm = NodeManager::currentNM();
706
86320
  Node one = nm->mkConst(Rational(1));
707
86320
  Node len = nm->mkNode(STRING_LENGTH, s);
708
43160
  len = Rewriter::rewrite(len);
709
172640
  return ArithEntail::check(one, len)
710
215800
         && (!strict || ArithEntail::check(len, true));
711
}
712
713
160228
bool StringsEntail::checkMultisetSubset(Node a, Node b)
714
{
715
320456
  std::vector<Node> avec;
716
160228
  utils::getConcat(getMultisetApproximation(a), avec);
717
320456
  std::vector<Node> bvec;
718
160228
  utils::getConcat(b, bvec);
719
720
320456
  std::map<Node, unsigned> num_nconst[2];
721
320456
  std::map<Node, unsigned> num_const[2];
722
480684
  for (unsigned j = 0; j < 2; j++)
723
  {
724
320456
    std::vector<Node>& jvec = j == 0 ? avec : bvec;
725
802335
    for (const Node& cc : jvec)
726
    {
727
481879
      if (cc.isConst())
728
      {
729
131186
        num_const[j][cc]++;
730
      }
731
      else
732
      {
733
350693
        num_nconst[j][cc]++;
734
      }
735
    }
736
  }
737
160228
  bool ms_success = true;
738
169209
  for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
739
  {
740
112509
    if (nncp.second > num_nconst[1][nncp.first])
741
    {
742
103528
      ms_success = false;
743
103528
      break;
744
    }
745
  }
746
160228
  if (ms_success)
747
  {
748
    // count the number of constant characters in the first argument
749
170100
    std::map<Node, unsigned> count_const[2];
750
113356
    std::vector<Node> chars;
751
170100
    for (unsigned j = 0; j < 2; j++)
752
    {
753
172549
      for (std::pair<const Node, unsigned>& ncp : num_const[j])
754
      {
755
118298
        Node cn = ncp.first;
756
59149
        Assert(cn.isConst());
757
118298
        std::vector<Node> cnChars = Word::getChars(cn);
758
186528
        for (const Node& ch : cnChars)
759
        {
760
127379
          count_const[j][ch] += ncp.second;
761
127379
          if (std::find(chars.begin(), chars.end(), ch) == chars.end())
762
          {
763
85324
            chars.push_back(ch);
764
          }
765
        }
766
      }
767
    }
768
113400
    Trace("strings-entail-ms-ss")
769
56700
        << "For " << a << " and " << b << " : " << std::endl;
770
141960
    for (const Node& ch : chars)
771
    {
772
85304
      Trace("strings-entail-ms-ss") << "  # occurrences of substring ";
773
85304
      Trace("strings-entail-ms-ss") << ch << " in arguments is ";
774
170608
      Trace("strings-entail-ms-ss")
775
85304
          << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
776
85304
      if (count_const[0][ch] < count_const[1][ch])
777
      {
778
44
        return true;
779
      }
780
    }
781
782
    // TODO (#1180): count the number of 2,3,4,.. character substrings
783
    // for example:
784
    // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
785
    // since the second argument contains more occurrences of "bb".
786
    // note this is orthogonal reasoning to inductive reasoning
787
    // via regular membership reduction in Liang et al CAV 2015.
788
  }
789
160184
  return false;
790
}
791
792
143800
Node StringsEntail::checkHomogeneousString(Node a)
793
{
794
287600
  std::vector<Node> avec;
795
143800
  utils::getConcat(getMultisetApproximation(a), avec);
796
797
143800
  bool cValid = false;
798
287600
  Node c;
799
216996
  for (const Node& ac : avec)
800
  {
801
147630
    if (ac.isConst())
802
    {
803
162780
      std::vector<Node> acv = Word::getChars(ac);
804
135668
      for (const Node& cc : acv)
805
      {
806
62472
        if (!cValid)
807
        {
808
41898
          cValid = true;
809
41898
          c = cc;
810
        }
811
20574
        else if (c != cc)
812
        {
813
          // Found a different character
814
16388
          return Node::null();
815
        }
816
      }
817
    }
818
    else
819
    {
820
      // Could produce a different character
821
58046
      return Node::null();
822
    }
823
  }
824
825
69366
  if (!cValid)
826
  {
827
46471
    return Word::mkEmptyWord(a.getType());
828
  }
829
830
22895
  return c;
831
}
832
833
449822
Node StringsEntail::getMultisetApproximation(Node a)
834
{
835
449822
  NodeManager* nm = NodeManager::currentNM();
836
449822
  if (a.getKind() == STRING_SUBSTR)
837
  {
838
60966
    return a[0];
839
  }
840
388856
  else if (a.getKind() == STRING_STRREPL)
841
  {
842
7323
    return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
843
  }
844
381533
  else if (a.getKind() == STRING_CONCAT)
845
  {
846
99850
    NodeBuilder nb(STRING_CONCAT);
847
188396
    for (const Node& ac : a)
848
    {
849
138471
      nb << getMultisetApproximation(ac);
850
    }
851
49925
    return nb.constructNode();
852
  }
853
  else
854
  {
855
331608
    return a;
856
  }
857
}
858
859
105072
Node StringsEntail::getStringOrEmpty(Node n)
860
{
861
105072
  Node res;
862
315256
  while (res.isNull())
863
  {
864
105092
    switch (n.getKind())
865
    {
866
510
      case STRING_STRREPL:
867
      {
868
510
        if (Word::isEmpty(n[0]))
869
        {
870
          // (str.replace "" x y) --> y
871
20
          n = n[2];
872
20
          break;
873
        }
874
875
490
        if (checkLengthOne(n[0]) && Word::isEmpty(n[2]))
876
        {
877
          // (str.replace "A" x "") --> "A"
878
36
          res = n[0];
879
36
          break;
880
        }
881
882
454
        res = n;
883
454
        break;
884
      }
885
6292
      case STRING_SUBSTR:
886
      {
887
6292
        if (checkLengthOne(n[0]))
888
        {
889
          // (str.substr "A" x y) --> "A"
890
549
          res = n[0];
891
549
          break;
892
        }
893
5743
        res = n;
894
5743
        break;
895
      }
896
98290
      default:
897
      {
898
98290
        res = n;
899
98290
        break;
900
      }
901
    }
902
  }
903
105072
  return res;
904
}
905
906
1696
Node StringsEntail::inferEqsFromContains(Node x, Node y)
907
{
908
1696
  NodeManager* nm = NodeManager::currentNM();
909
3392
  Node emp = Word::mkEmptyWord(x.getType());
910
1696
  Assert(x.getType() == y.getType());
911
3392
  TypeNode stype = x.getType();
912
913
3392
  Node xLen = nm->mkNode(STRING_LENGTH, x);
914
3392
  std::vector<Node> yLens;
915
1696
  if (y.getKind() != STRING_CONCAT)
916
  {
917
90
    yLens.push_back(nm->mkNode(STRING_LENGTH, y));
918
  }
919
  else
920
  {
921
5838
    for (const Node& yi : y)
922
    {
923
4232
      yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
924
    }
925
  }
926
927
3392
  std::vector<Node> zeroLens;
928
1696
  if (x == emp)
929
  {
930
    // If x is the empty string, then all ys must be empty, too, and we can
931
    // skip the expensive checks. Note that this is just a performance
932
    // optimization.
933
324
    zeroLens.swap(yLens);
934
  }
935
  else
936
  {
937
    // Check if we can infer that str.len(x) <= str.len(y). If that is the
938
    // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
939
    // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
940
    // true. The terms that can have length zero without making the inequality
941
    // false must be all be empty if (str.contains x y) is true.
942
1372
    if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens))
943
    {
944
      // We could not prove that the inequality holds
945
436
      return Node::null();
946
    }
947
936
    else if (yLens.size() == y.getNumChildren())
948
    {
949
      // We could only prove that the inequality holds but not that any of the
950
      // ys must be empty
951
443
      return nm->mkNode(EQUAL, x, y);
952
    }
953
  }
954
955
817
  if (y.getKind() != STRING_CONCAT)
956
  {
957
8
    if (zeroLens.size() == 1)
958
    {
959
      // y is not a concatenation and we found that it must be empty, so just
960
      // return (= y "")
961
      Assert(zeroLens[0][0] == y);
962
      return nm->mkNode(EQUAL, y, emp);
963
    }
964
    else
965
    {
966
8
      Assert(yLens.size() == 1 && yLens[0][0] == y);
967
8
      return nm->mkNode(EQUAL, x, y);
968
    }
969
  }
970
971
1618
  std::vector<Node> cs;
972
1366
  for (const Node& yiLen : yLens)
973
  {
974
557
    Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
975
557
    cs.push_back(yiLen[0]);
976
  }
977
978
1618
  NodeBuilder nb(AND);
979
  // (= x (str.++ y1' ... ym'))
980
809
  if (!cs.empty())
981
  {
982
485
    nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
983
  }
984
  // (= y1'' "") ... (= yk'' "")
985
2407
  for (const Node& zeroLen : zeroLens)
986
  {
987
1598
    Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
988
1598
    nb << nm->mkNode(EQUAL, zeroLen[0], emp);
989
  }
990
991
  // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
992
809
  return nb.constructNode();
993
}
994
995
}  // namespace strings
996
}  // namespace theory
997
28194
}  // namespace cvc5