GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/strings_entail.cpp Lines: 422 445 94.8 %
Date: 2021-09-29 Branches: 942 2027 46.5 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving strings.
14
 */
15
16
#include "theory/strings/strings_entail.h"
17
18
#include "expr/node_builder.h"
19
#include "theory/rewriter.h"
20
#include "theory/strings/arith_entail.h"
21
#include "theory/strings/sequences_rewriter.h"
22
#include "theory/strings/theory_strings_utils.h"
23
#include "theory/strings/word.h"
24
#include "util/rational.h"
25
#include "util/string.h"
26
27
using namespace cvc5::kind;
28
29
namespace cvc5 {
30
namespace theory {
31
namespace strings {
32
33
6535
StringsEntail::StringsEntail(Rewriter* r,
34
                             ArithEntail& aent,
35
6535
                             SequencesRewriter& rewriter)
36
6535
    : d_rr(r), d_arithEntail(aent), d_rewriter(rewriter)
37
{
38
6535
}
39
40
600
bool StringsEntail::canConstantContainConcat(Node c,
41
                                             Node n,
42
                                             int& firstc,
43
                                             int& lastc)
44
{
45
600
  Assert(c.isConst());
46
600
  Assert(n.getKind() == STRING_CONCAT);
47
  // must find constant components in order
48
600
  size_t pos = 0;
49
600
  firstc = -1;
50
600
  lastc = -1;
51
2219
  for (unsigned i = 0; i < n.getNumChildren(); i++)
52
  {
53
1636
    if (n[i].isConst())
54
    {
55
318
      firstc = firstc == -1 ? i : firstc;
56
318
      lastc = i;
57
318
      size_t new_pos = Word::find(c, n[i], pos);
58
318
      if (new_pos == std::string::npos)
59
      {
60
15
        return false;
61
      }
62
      else
63
      {
64
303
        pos = new_pos + Word::getLength(n[i]);
65
      }
66
    }
67
1318
    else if (n[i].getKind() == STRING_ITOS && d_arithEntail.check(n[i][0]))
68
    {
69
5
      Assert(c.getType().isString());  // string-only
70
5
      const std::vector<unsigned>& tvec = c.getConst<String>().getVec();
71
      // find the first occurrence of a digit starting at pos
72
29
      while (pos < tvec.size() && !String::isDigit(tvec[pos]))
73
      {
74
12
        pos++;
75
      }
76
5
      if (pos == tvec.size())
77
      {
78
2
        return false;
79
      }
80
      // must consume at least one digit here
81
3
      pos++;
82
    }
83
  }
84
583
  return true;
85
}
86
87
34323
bool StringsEntail::canConstantContainList(Node c,
88
                                           std::vector<Node>& l,
89
                                           int& firstc,
90
                                           int& lastc)
91
{
92
34323
  Assert(c.isConst());
93
  // must find constant components in order
94
34323
  size_t pos = 0;
95
34323
  firstc = -1;
96
34323
  lastc = -1;
97
99660
  for (unsigned i = 0; i < l.size(); i++)
98
  {
99
65656
    if (l[i].isConst())
100
    {
101
57017
      firstc = firstc == -1 ? i : firstc;
102
57017
      lastc = i;
103
57017
      size_t new_pos = Word::find(c, l[i], pos);
104
57017
      if (new_pos == std::string::npos)
105
      {
106
319
        return false;
107
      }
108
      else
109
      {
110
56698
        pos = new_pos + Word::getLength(l[i]);
111
      }
112
    }
113
  }
114
34004
  return true;
115
}
116
117
67666
bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
118
                                        std::vector<Node>& nr,
119
                                        int dir,
120
                                        Node& curr,
121
                                        bool strict)
122
{
123
67666
  Assert(dir == 1 || dir == -1);
124
67666
  Assert(nr.empty());
125
67666
  NodeManager* nm = NodeManager::currentNM();
126
135332
  Node zero = nm->mkConst(cvc5::Rational(0));
127
67666
  bool ret = false;
128
67666
  bool success = true;
129
67666
  unsigned sindex = 0;
130
193120
  while (success && curr != zero && sindex < n1.size())
131
  {
132
62727
    Assert(!curr.isNull());
133
62727
    success = false;
134
62727
    unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
135
62727
    if (n1[sindex_use].isConst())
136
    {
137
      // could strip part of a constant
138
29124
      Node lowerBound = d_arithEntail.getConstantBound(d_rr->rewrite(curr));
139
14562
      if (!lowerBound.isNull())
140
      {
141
6131
        Assert(lowerBound.isConst());
142
12262
        Rational lbr = lowerBound.getConst<Rational>();
143
6131
        if (lbr.sgn() > 0)
144
        {
145
4453
          Assert(d_arithEntail.check(curr, true));
146
8906
          Node s = n1[sindex_use];
147
4453
          size_t slen = Word::getLength(s);
148
8906
          Node ncl = nm->mkConst(cvc5::Rational(slen));
149
8906
          Node next_s = nm->mkNode(MINUS, lowerBound, ncl);
150
4453
          next_s = d_rr->rewrite(next_s);
151
4453
          Assert(next_s.isConst());
152
          // we can remove the entire constant
153
4453
          if (next_s.getConst<Rational>().sgn() >= 0)
154
          {
155
2811
            curr = d_rr->rewrite(nm->mkNode(MINUS, curr, ncl));
156
2811
            success = true;
157
2811
            sindex++;
158
          }
159
          else
160
          {
161
            // we can remove part of the constant
162
            // lower bound minus the length of a concrete string is negative,
163
            // hence lowerBound cannot be larger than long max
164
1642
            Assert(lbr < Rational(String::maxSize()));
165
1642
            curr = d_rr->rewrite(nm->mkNode(MINUS, curr, lowerBound));
166
1642
            uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
167
1642
            Assert(lbsize < slen);
168
1642
            if (dir == 1)
169
            {
170
              // strip partially from the front
171
1527
              nr.push_back(Word::prefix(s, lbsize));
172
1527
              n1[sindex_use] = Word::suffix(s, slen - lbsize);
173
            }
174
            else
175
            {
176
              // strip partially from the back
177
115
              nr.push_back(Word::suffix(s, lbsize));
178
115
              n1[sindex_use] = Word::prefix(s, slen - lbsize);
179
            }
180
1642
            ret = true;
181
          }
182
4453
          Assert(d_arithEntail.check(curr));
183
        }
184
        else
185
        {
186
          // we cannot remove the constant
187
        }
188
      }
189
    }
190
    else
191
    {
192
      Node next_s = NodeManager::currentNM()->mkNode(
193
          MINUS,
194
          curr,
195
96330
          NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
196
48165
      next_s = d_rr->rewrite(next_s);
197
48165
      if (d_arithEntail.check(next_s))
198
      {
199
1321
        success = true;
200
1321
        curr = next_s;
201
1321
        sindex++;
202
      }
203
    }
204
  }
205
67666
  if (sindex > 0 && (!strict || curr == zero))
206
  {
207
3366
    if (dir == 1)
208
    {
209
3064
      nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
210
3064
      n1.erase(n1.begin(), n1.begin() + sindex);
211
    }
212
    else
213
    {
214
302
      nr.insert(nr.end(), n1.end() - sindex, n1.end());
215
302
      n1.erase(n1.end() - sindex, n1.end());
216
    }
217
3366
    ret = true;
218
  }
219
135332
  return ret;
220
}
221
222
78310
int StringsEntail::componentContains(std::vector<Node>& n1,
223
                                     std::vector<Node>& n2,
224
                                     std::vector<Node>& nb,
225
                                     std::vector<Node>& ne,
226
                                     bool computeRemainder,
227
                                     int remainderDir)
228
{
229
78310
  Assert(nb.empty());
230
78310
  Assert(ne.empty());
231
78310
  Trace("strings-entail") << "Component contains: " << std::endl;
232
78310
  Trace("strings-entail") << "n1 = " << n1 << std::endl;
233
78310
  Trace("strings-entail") << "n2 = " << n2 << std::endl;
234
  // if n2 is a singleton, we can do optimized version here
235
78310
  if (n2.size() == 1)
236
  {
237
154975
    for (unsigned i = 0; i < n1.size(); i++)
238
    {
239
157708
      Node n1rb;
240
157708
      Node n1re;
241
79685
      if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
242
      {
243
1662
        if (computeRemainder)
244
        {
245
321
          n1[i] = n2[0];
246
321
          if (remainderDir != -1)
247
          {
248
321
            if (!n1re.isNull())
249
            {
250
36
              ne.push_back(n1re);
251
            }
252
321
            ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
253
321
            n1.erase(n1.begin() + i + 1, n1.end());
254
          }
255
          else if (!n1re.isNull())
256
          {
257
            n1[i] = d_rr->rewrite(
258
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
259
          }
260
321
          if (remainderDir != 1)
261
          {
262
            nb.insert(nb.end(), n1.begin(), n1.begin() + i);
263
            n1.erase(n1.begin(), n1.begin() + i);
264
            if (!n1rb.isNull())
265
            {
266
              nb.push_back(n1rb);
267
            }
268
          }
269
321
          else if (!n1rb.isNull())
270
          {
271
103
            n1[i] = d_rr->rewrite(
272
206
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
273
          }
274
        }
275
1662
        return i;
276
      }
277
    }
278
  }
279
1358
  else if (n1.size() >= n2.size())
280
  {
281
334
    unsigned diff = n1.size() - n2.size();
282
643
    for (unsigned i = 0; i <= diff; i++)
283
    {
284
726
      Node n1rb_first;
285
726
      Node n1re_first;
286
      // first component of n2 must be a suffix
287
834
      if (componentContainsBase(n1[i],
288
417
                                n2[0],
289
                                n1rb_first,
290
                                n1re_first,
291
                                1,
292
417
                                computeRemainder && remainderDir != 1))
293
      {
294
121
        Assert(n1re_first.isNull());
295
188
        for (unsigned j = 1; j < n2.size(); j++)
296
        {
297
          // are we in the last component?
298
188
          if (j + 1 == n2.size())
299
          {
300
117
            Node n1rb_last;
301
117
            Node n1re_last;
302
            // last component of n2 must be a prefix
303
234
            if (componentContainsBase(n1[i + j],
304
117
                                      n2[j],
305
                                      n1rb_last,
306
                                      n1re_last,
307
                                      -1,
308
117
                                      computeRemainder && remainderDir != -1))
309
            {
310
216
              Trace("strings-entail-debug")
311
108
                  << "Last remainder begin is " << n1rb_last << std::endl;
312
216
              Trace("strings-entail-debug")
313
108
                  << "Last remainder end is " << n1re_last << std::endl;
314
108
              Assert(n1rb_last.isNull());
315
108
              if (computeRemainder)
316
              {
317
56
                if (remainderDir != -1)
318
                {
319
56
                  if (!n1re_last.isNull())
320
                  {
321
2
                    ne.push_back(n1re_last);
322
                  }
323
56
                  ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
324
56
                  n1.erase(n1.begin() + i + j + 1, n1.end());
325
56
                  n1[i + j] = n2[j];
326
                }
327
56
                if (remainderDir != 1)
328
                {
329
                  n1[i] = n2[0];
330
                  nb.insert(nb.end(), n1.begin(), n1.begin() + i);
331
                  n1.erase(n1.begin(), n1.begin() + i);
332
                  if (!n1rb_first.isNull())
333
                  {
334
                    nb.push_back(n1rb_first);
335
                  }
336
                }
337
              }
338
108
              Trace("strings-entail-debug") << "ne = " << ne << std::endl;
339
108
              Trace("strings-entail-debug") << "nb = " << nb << std::endl;
340
108
              Trace("strings-entail-debug") << "...return " << i << std::endl;
341
108
              return i;
342
            }
343
            else
344
            {
345
9
              break;
346
            }
347
          }
348
71
          else if (n1[i + j] != n2[j])
349
          {
350
4
            break;
351
          }
352
        }
353
      }
354
    }
355
  }
356
76540
  return -1;
357
}
358
359
80219
bool StringsEntail::componentContainsBase(
360
    Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
361
{
362
80219
  Assert(n1rb.isNull());
363
80219
  Assert(n1re.isNull());
364
365
80219
  NodeManager* nm = NodeManager::currentNM();
366
367
80219
  if (n1 == n2)
368
  {
369
563
    return true;
370
  }
371
  else
372
  {
373
79656
    if (n1.isConst() && n2.isConst())
374
    {
375
485
      size_t len1 = Word::getLength(n1);
376
485
      size_t len2 = Word::getLength(n2);
377
485
      if (len2 < len1)
378
      {
379
212
        if (dir == 1)
380
        {
381
17
          if (Word::suffix(n1, len2) == n2)
382
          {
383
17
            if (computeRemainder)
384
            {
385
              n1rb = Word::prefix(n1, len1 - len2);
386
            }
387
17
            return true;
388
          }
389
        }
390
195
        else if (dir == -1)
391
        {
392
1
          if (Word::prefix(n1, len2) == n2)
393
          {
394
1
            if (computeRemainder)
395
            {
396
              n1re = Word::suffix(n1, len1 - len2);
397
            }
398
1
            return true;
399
          }
400
        }
401
        else
402
        {
403
194
          size_t f = Word::find(n1, n2);
404
194
          if (f != std::string::npos)
405
          {
406
183
            if (computeRemainder)
407
            {
408
125
              if (f > 0)
409
              {
410
102
                n1rb = Word::prefix(n1, f);
411
              }
412
125
              if (len1 > f + len2)
413
              {
414
35
                n1re = Word::suffix(n1, len1 - (f + len2));
415
              }
416
            }
417
183
            return true;
418
          }
419
        }
420
      }
421
    }
422
    else
423
    {
424
      // cases for:
425
      //   n1 = x   containing   n2 = substr( x, n2[1], n2[2] )
426
79171
      if (n2.getKind() == STRING_SUBSTR)
427
      {
428
43553
        if (n2[0] == n1)
429
        {
430
672
          bool success = true;
431
672
          Node start_pos = n2[1];
432
672
          Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
433
672
          Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
434
672
          if (dir == 1)
435
          {
436
            // To be a suffix, start + length must be greater than
437
            // or equal to the length of the string.
438
1
            success = d_arithEntail.check(end_pos, len_n2s);
439
          }
440
671
          else if (dir == -1)
441
          {
442
            // To be a prefix, must literally start at 0, since
443
            //   if we knew it started at <0, it should be rewritten to "",
444
            //   if we knew it started at 0, then n2[1] should be rewritten to
445
            //   0.
446
10
            success = start_pos.isConst()
447
5
                      && start_pos.getConst<Rational>().sgn() == 0;
448
          }
449
672
          if (success)
450
          {
451
672
            if (computeRemainder)
452
            {
453
              // we can only compute the remainder if start_pos and end_pos
454
              // are known to be non-negative.
455
45
              if (!d_arithEntail.check(start_pos)
456
45
                  || !d_arithEntail.check(end_pos))
457
              {
458
12
                return false;
459
              }
460
3
              if (dir != -1)
461
              {
462
3
                n1rb = nm->mkNode(
463
2
                    STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
464
              }
465
3
              if (dir != 1)
466
              {
467
3
                n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
468
              }
469
            }
470
660
            return true;
471
          }
472
        }
473
      }
474
475
78499
      if (!computeRemainder && dir == 0)
476
      {
477
77598
        if (n1.getKind() == STRING_REPLACE)
478
        {
479
          // (str.contains (str.replace x y z) w) ---> true
480
          // if (str.contains x w) --> true and (str.contains z w) ---> true
481
1553
          Node xCtnW = checkContains(n1[0], n2);
482
1010
          if (!xCtnW.isNull() && xCtnW.getConst<bool>())
483
          {
484
825
            Node zCtnW = checkContains(n1[2], n2);
485
646
            if (!zCtnW.isNull() && zCtnW.getConst<bool>())
486
            {
487
467
              return true;
488
            }
489
          }
490
        }
491
      }
492
    }
493
  }
494
78316
  return false;
495
}
496
497
78611
bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
498
                                           std::vector<Node>& n2,
499
                                           std::vector<Node>& nb,
500
                                           std::vector<Node>& ne,
501
                                           int dir)
502
{
503
78611
  Assert(nb.empty());
504
78611
  Assert(ne.empty());
505
78611
  bool changed = false;
506
  // for ( forwards, backwards ) direction
507
179800
  for (unsigned r = 0; r < 2; r++)
508
  {
509
129213
    if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
510
    {
511
127377
      unsigned index0 = r == 0 ? 0 : n1.size() - 1;
512
127377
      unsigned index1 = r == 0 ? 0 : n2.size() - 1;
513
127377
      bool removeComponent = false;
514
226730
      Node n1cmp = n1[index0];
515
516
127377
      if (n1cmp.isConst() && Word::isEmpty(n1cmp))
517
      {
518
27978
        return false;
519
      }
520
521
198752
      std::vector<Node> sss;
522
198752
      std::vector<Node> sls;
523
99399
      n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
524
198798
      Trace("strings-rewrite-debug2")
525
99399
          << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
526
99399
          << ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl;
527
99399
      if (n1cmp.isConst())
528
      {
529
99080
        Node s = n1cmp;
530
49540
        size_t slen = Word::getLength(s);
531
        // overlap is an overapproximation of the number of characters
532
        // n2[index1] can match in s
533
49540
        unsigned overlap = Word::getLength(s);
534
49540
        if (n2[index1].isConst())
535
        {
536
16996
          Node t = n2[index1];
537
8498
          std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t);
538
8498
          if (ret == std::string::npos)
539
          {
540
280
            if (n1.size() == 1)
541
            {
542
              // can remove everything
543
              //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
544
              //   str.contains( "", str.++( "ba", x ) )
545
              //   or std.contains( str.substr( "abc", x, y ), "d" ) --->
546
              //   str.contains( "", "d" )
547
11
              removeComponent = true;
548
            }
549
269
            else if (sss.empty())  // only if not substr
550
            {
551
              // check how much overlap there is
552
              // This is used to partially strip off the endpoint
553
              // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
554
              // str.contains( str.++( "c", x ), str.++( "cd", y ) )
555
257
              overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
556
            }
557
            // note that we cannot process substring here, since t may
558
            // match only part of s. Consider:
559
            // (str.++ "C" (str.substr "AB" x y)), "CB"
560
            // where "AB" and "CB" have no overlap, but "C" is not part of what
561
            // is matched with "AB".
562
          }
563
8218
          else if (sss.empty())  // only if not substr
564
          {
565
490
            Assert(ret < slen);
566
            // can strip off up to the find position, e.g.
567
            // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
568
            // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
569
            // and
570
            // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
571
            // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
572
490
            overlap = slen - ret;
573
          }
574
        }
575
        else
576
        {
577
          // inconclusive
578
        }
579
49540
        Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl;
580
        // process the overlap
581
49540
        if (overlap < slen)
582
        {
583
111
          changed = true;
584
111
          if (overlap == 0)
585
          {
586
88
            removeComponent = true;
587
          }
588
          else
589
          {
590
            // can drop the prefix (resp. suffix) from the first (resp. last)
591
            // component
592
23
            if (r == 0)
593
            {
594
7
              nb.push_back(Word::prefix(s, slen - overlap));
595
7
              n1[index0] = Word::suffix(s, overlap);
596
            }
597
            else
598
            {
599
16
              ne.push_back(Word::suffix(s, slen - overlap));
600
16
              n1[index0] = Word::prefix(s, overlap);
601
            }
602
          }
603
        }
604
      }
605
49859
      else if (n1cmp.getKind() == STRING_ITOS)
606
      {
607
244
        if (n2[index1].isConst())
608
        {
609
206
          Assert(n2[index1].getType().isString());  // string-only
610
412
          cvc5::String t = n2[index1].getConst<String>();
611
206
          if (n1.size() == 1)
612
          {
613
            // if n1.size()==1, then if n2[index1] is not a number, we can drop
614
            // the entire component
615
            //    e.g. str.contains( int.to.str(x), "123a45") --> false
616
199
            if (!t.isNumber())
617
            {
618
35
              removeComponent = true;
619
            }
620
          }
621
          else
622
          {
623
7
            const std::vector<unsigned>& tvec = t.getVec();
624
7
            Assert(tvec.size() > 0);
625
626
            // if n1.size()>1, then if the first (resp. last) character of
627
            // n2[index1]
628
            //  is not a digit, we can drop the entire component, e.g.:
629
            //    str.contains( str.++( int.to.str(x), y ), "a12") -->
630
            //    str.contains( y, "a12" )
631
            //    str.contains( str.++( y, int.to.str(x) ), "a0b") -->
632
            //    str.contains( y, "a0b" )
633
7
            unsigned i = r == 0 ? 0 : (tvec.size() - 1);
634
7
            if (!String::isDigit(tvec[i]))
635
            {
636
5
              removeComponent = true;
637
            }
638
          }
639
        }
640
      }
641
99399
      if (removeComponent)
642
      {
643
139
        Trace("strings-rewrite-debug2") << "...remove component" << std::endl;
644
        // can drop entire first (resp. last) component
645
139
        if (r == 0)
646
        {
647
101
          nb.push_back(n1[index0]);
648
101
          n1.erase(n1.begin(), n1.begin() + 1);
649
        }
650
        else
651
        {
652
38
          ne.push_back(n1[index0]);
653
38
          n1.pop_back();
654
        }
655
139
        if (n1.empty())
656
        {
657
          // if we've removed everything, just return (we will rewrite to false)
658
46
          return true;
659
        }
660
        else
661
        {
662
93
          changed = true;
663
        }
664
      }
665
    }
666
  }
667
  // TODO (#1180) : computing the maximal overlap in this function may be
668
  // important.
669
  // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
670
  // false
671
  //   ...since str.to.int(x) can contain at most 1 character from "2aaaa",
672
  //   leaving 4 characters
673
  //      which is larger that the upper bound for length of str.substr(y,0,3),
674
  //      which is 3.
675
50587
  return changed;
676
}
677
678
6825
Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
679
{
680
6825
  NodeManager* nm = NodeManager::currentNM();
681
13650
  Node ctn = nm->mkNode(STRING_CONTAINS, a, b);
682
683
6825
  if (fullRewriter)
684
  {
685
6825
    ctn = d_rr->rewrite(ctn);
686
  }
687
  else
688
  {
689
    Node prev;
690
    do
691
    {
692
      prev = ctn;
693
      ctn = d_rewriter.rewriteContains(ctn);
694
      if (ctn != prev)
695
      {
696
        ctn = d_rewriter.postProcessRewrite(prev, ctn);
697
      }
698
    } while (prev != ctn && ctn.getKind() == STRING_CONTAINS);
699
  }
700
701
6825
  Assert(ctn.getType().isBoolean());
702
13650
  return ctn.isConst() ? ctn : Node::null();
703
}
704
705
26902
bool StringsEntail::checkNonEmpty(Node a)
706
{
707
53804
  Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
708
26902
  len = d_rr->rewrite(len);
709
53804
  return d_arithEntail.check(len, true);
710
}
711
712
44371
bool StringsEntail::checkLengthOne(Node s, bool strict)
713
{
714
44371
  NodeManager* nm = NodeManager::currentNM();
715
88742
  Node one = nm->mkConst(Rational(1));
716
88742
  Node len = nm->mkNode(STRING_LENGTH, s);
717
44371
  len = d_rr->rewrite(len);
718
177484
  return d_arithEntail.check(one, len)
719
221855
         && (!strict || d_arithEntail.check(len, true));
720
}
721
722
75942
bool StringsEntail::checkMultisetSubset(Node a, Node b)
723
{
724
151884
  std::vector<Node> avec;
725
75942
  utils::getConcat(getMultisetApproximation(a), avec);
726
151884
  std::vector<Node> bvec;
727
75942
  utils::getConcat(b, bvec);
728
729
151884
  std::map<Node, unsigned> num_nconst[2];
730
151884
  std::map<Node, unsigned> num_const[2];
731
227826
  for (unsigned j = 0; j < 2; j++)
732
  {
733
151884
    std::vector<Node>& jvec = j == 0 ? avec : bvec;
734
315921
    for (const Node& cc : jvec)
735
    {
736
164037
      if (cc.isConst())
737
      {
738
82884
        num_const[j][cc]++;
739
      }
740
      else
741
      {
742
81153
        num_nconst[j][cc]++;
743
      }
744
    }
745
  }
746
75942
  bool ms_success = true;
747
76589
  for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
748
  {
749
24346
    if (nncp.second > num_nconst[1][nncp.first])
750
    {
751
23699
      ms_success = false;
752
23699
      break;
753
    }
754
  }
755
75942
  if (ms_success)
756
  {
757
    // count the number of constant characters in the first argument
758
156729
    std::map<Node, unsigned> count_const[2];
759
104471
    std::vector<Node> chars;
760
156729
    for (unsigned j = 0; j < 2; j++)
761
    {
762
160955
      for (std::pair<const Node, unsigned>& ncp : num_const[j])
763
      {
764
112938
        Node cn = ncp.first;
765
56469
        Assert(cn.isConst());
766
112938
        std::vector<Node> cnChars = Word::getChars(cn);
767
140675
        for (const Node& ch : cnChars)
768
        {
769
84206
          count_const[j][ch] += ncp.second;
770
84206
          if (std::find(chars.begin(), chars.end(), ch) == chars.end())
771
          {
772
61224
            chars.push_back(ch);
773
          }
774
        }
775
      }
776
    }
777
104486
    Trace("strings-entail-ms-ss")
778
52243
        << "For " << a << " and " << b << " : " << std::endl;
779
113452
    for (const Node& ch : chars)
780
    {
781
61224
      Trace("strings-entail-ms-ss") << "  # occurrences of substring ";
782
61224
      Trace("strings-entail-ms-ss") << ch << " in arguments is ";
783
122448
      Trace("strings-entail-ms-ss")
784
61224
          << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
785
61224
      if (count_const[0][ch] < count_const[1][ch])
786
      {
787
15
        return true;
788
      }
789
    }
790
791
    // TODO (#1180): count the number of 2,3,4,.. character substrings
792
    // for example:
793
    // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
794
    // since the second argument contains more occurrences of "bb".
795
    // note this is orthogonal reasoning to inductive reasoning
796
    // via regular membership reduction in Liang et al CAV 2015.
797
  }
798
75927
  return false;
799
}
800
801
99810
Node StringsEntail::checkHomogeneousString(Node a)
802
{
803
199620
  std::vector<Node> avec;
804
99810
  utils::getConcat(getMultisetApproximation(a), avec);
805
806
99810
  bool cValid = false;
807
199620
  Node c;
808
150100
  for (const Node& ac : avec)
809
  {
810
102587
    if (ac.isConst())
811
    {
812
111816
      std::vector<Node> acv = Word::getChars(ac);
813
98310
      for (const Node& cc : acv)
814
      {
815
48020
        if (!cValid)
816
        {
817
33385
          cValid = true;
818
33385
          c = cc;
819
        }
820
14635
        else if (c != cc)
821
        {
822
          // Found a different character
823
11236
          return Node::null();
824
        }
825
      }
826
    }
827
    else
828
    {
829
      // Could produce a different character
830
41061
      return Node::null();
831
    }
832
  }
833
834
47513
  if (!cValid)
835
  {
836
27846
    return Word::mkEmptyWord(a.getType());
837
  }
838
839
19667
  return c;
840
}
841
842
188807
Node StringsEntail::getMultisetApproximation(Node a)
843
{
844
188807
  NodeManager* nm = NodeManager::currentNM();
845
188807
  if (a.getKind() == STRING_SUBSTR)
846
  {
847
64920
    return a[0];
848
  }
849
123887
  else if (a.getKind() == STRING_REPLACE)
850
  {
851
2400
    return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
852
  }
853
121487
  else if (a.getKind() == STRING_CONCAT)
854
  {
855
9120
    NodeBuilder nb(STRING_CONCAT);
856
15215
    for (const Node& ac : a)
857
    {
858
10655
      nb << getMultisetApproximation(ac);
859
    }
860
4560
    return nb.constructNode();
861
  }
862
  else
863
  {
864
116927
    return a;
865
  }
866
}
867
868
127566
Node StringsEntail::getStringOrEmpty(Node n)
869
{
870
127566
  Node res;
871
382734
  while (res.isNull())
872
  {
873
127584
    switch (n.getKind())
874
    {
875
530
      case STRING_REPLACE:
876
      {
877
530
        if (Word::isEmpty(n[0]))
878
        {
879
          // (str.replace "" x y) --> y
880
18
          n = n[2];
881
18
          break;
882
        }
883
512
        if (checkLengthOne(n[0]) && Word::isEmpty(n[2]))
884
        {
885
          // (str.replace "A" x "") --> "A"
886
30
          res = n[0];
887
30
          break;
888
        }
889
890
482
        res = n;
891
482
        break;
892
      }
893
8475
      case STRING_SUBSTR:
894
      {
895
8475
        if (checkLengthOne(n[0]))
896
        {
897
          // (str.substr "A" x y) --> "A"
898
173
          res = n[0];
899
173
          break;
900
        }
901
8302
        res = n;
902
8302
        break;
903
      }
904
118579
      default:
905
      {
906
118579
        res = n;
907
118579
        break;
908
      }
909
    }
910
  }
911
127566
  return res;
912
}
913
914
687
Node StringsEntail::inferEqsFromContains(Node x, Node y)
915
{
916
687
  NodeManager* nm = NodeManager::currentNM();
917
1374
  Node emp = Word::mkEmptyWord(x.getType());
918
687
  Assert(x.getType() == y.getType());
919
1374
  TypeNode stype = x.getType();
920
921
1374
  Node xLen = nm->mkNode(STRING_LENGTH, x);
922
1374
  std::vector<Node> yLens;
923
687
  if (y.getKind() != STRING_CONCAT)
924
  {
925
66
    yLens.push_back(nm->mkNode(STRING_LENGTH, y));
926
  }
927
  else
928
  {
929
2384
    for (const Node& yi : y)
930
    {
931
1763
      yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
932
    }
933
  }
934
935
1374
  std::vector<Node> zeroLens;
936
687
  if (x == emp)
937
  {
938
    // If x is the empty string, then all ys must be empty, too, and we can
939
    // skip the expensive checks. Note that this is just a performance
940
    // optimization.
941
43
    zeroLens.swap(yLens);
942
  }
943
  else
944
  {
945
    // Check if we can infer that str.len(x) <= str.len(y). If that is the
946
    // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
947
    // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
948
    // true. The terms that can have length zero without making the inequality
949
    // false must be all be empty if (str.contains x y) is true.
950
644
    if (!d_arithEntail.inferZerosInSumGeq(xLen, yLens, zeroLens))
951
    {
952
      // We could not prove that the inequality holds
953
566
      return Node::null();
954
    }
955
78
    else if (yLens.size() == y.getNumChildren())
956
    {
957
      // We could only prove that the inequality holds but not that any of the
958
      // ys must be empty
959
40
      return nm->mkNode(EQUAL, x, y);
960
    }
961
  }
962
963
81
  if (y.getKind() != STRING_CONCAT)
964
  {
965
16
    if (zeroLens.size() == 1)
966
    {
967
      // y is not a concatenation and we found that it must be empty, so just
968
      // return (= y "")
969
      Assert(zeroLens[0][0] == y);
970
      return nm->mkNode(EQUAL, y, emp);
971
    }
972
    else
973
    {
974
16
      Assert(yLens.size() == 1 && yLens[0][0] == y);
975
16
      return nm->mkNode(EQUAL, x, y);
976
    }
977
  }
978
979
130
  std::vector<Node> cs;
980
92
  for (const Node& yiLen : yLens)
981
  {
982
27
    Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
983
27
    cs.push_back(yiLen[0]);
984
  }
985
986
130
  NodeBuilder nb(AND);
987
  // (= x (str.++ y1' ... ym'))
988
65
  if (!cs.empty())
989
  {
990
22
    nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
991
  }
992
  // (= y1'' "") ... (= yk'' "")
993
189
  for (const Node& zeroLen : zeroLens)
994
  {
995
124
    Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
996
124
    nb << nm->mkNode(EQUAL, zeroLen[0], emp);
997
  }
998
999
  // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
1000
65
  return nb.constructNode();
1001
}
1002
1003
}  // namespace strings
1004
}  // namespace theory
1005
22746
}  // namespace cvc5