GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/strings_entail.cpp Lines: 420 443 94.8 %
Date: 2021-09-18 Branches: 942 2027 46.5 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving strings.
14
 */
15
16
#include "theory/strings/strings_entail.h"
17
18
#include "expr/node_builder.h"
19
#include "theory/rewriter.h"
20
#include "theory/strings/arith_entail.h"
21
#include "theory/strings/sequences_rewriter.h"
22
#include "theory/strings/theory_strings_utils.h"
23
#include "theory/strings/word.h"
24
#include "util/rational.h"
25
#include "util/string.h"
26
27
using namespace cvc5::kind;
28
29
namespace cvc5 {
30
namespace theory {
31
namespace strings {
32
33
10262
StringsEntail::StringsEntail(SequencesRewriter& rewriter) : d_rewriter(rewriter)
34
{
35
10262
}
36
37
735
bool StringsEntail::canConstantContainConcat(Node c,
38
                                             Node n,
39
                                             int& firstc,
40
                                             int& lastc)
41
{
42
735
  Assert(c.isConst());
43
735
  Assert(n.getKind() == STRING_CONCAT);
44
  // must find constant components in order
45
735
  size_t pos = 0;
46
735
  firstc = -1;
47
735
  lastc = -1;
48
2704
  for (unsigned i = 0; i < n.getNumChildren(); i++)
49
  {
50
1996
    if (n[i].isConst())
51
    {
52
405
      firstc = firstc == -1 ? i : firstc;
53
405
      lastc = i;
54
405
      size_t new_pos = Word::find(c, n[i], pos);
55
405
      if (new_pos == std::string::npos)
56
      {
57
19
        return false;
58
      }
59
      else
60
      {
61
386
        pos = new_pos + Word::getLength(n[i]);
62
      }
63
    }
64
1591
    else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
65
    {
66
20
      Assert(c.getType().isString());  // string-only
67
20
      const std::vector<unsigned>& tvec = c.getConst<String>().getVec();
68
      // find the first occurrence of a digit starting at pos
69
116
      while (pos < tvec.size() && !String::isDigit(tvec[pos]))
70
      {
71
48
        pos++;
72
      }
73
20
      if (pos == tvec.size())
74
      {
75
8
        return false;
76
      }
77
      // must consume at least one digit here
78
12
      pos++;
79
    }
80
  }
81
708
  return true;
82
}
83
84
49046
bool StringsEntail::canConstantContainList(Node c,
85
                                           std::vector<Node>& l,
86
                                           int& firstc,
87
                                           int& lastc)
88
{
89
49046
  Assert(c.isConst());
90
  // must find constant components in order
91
49046
  size_t pos = 0;
92
49046
  firstc = -1;
93
49046
  lastc = -1;
94
141911
  for (unsigned i = 0; i < l.size(); i++)
95
  {
96
93386
    if (l[i].isConst())
97
    {
98
82315
      firstc = firstc == -1 ? i : firstc;
99
82315
      lastc = i;
100
82315
      size_t new_pos = Word::find(c, l[i], pos);
101
82315
      if (new_pos == std::string::npos)
102
      {
103
521
        return false;
104
      }
105
      else
106
      {
107
81794
        pos = new_pos + Word::getLength(l[i]);
108
      }
109
    }
110
  }
111
48525
  return true;
112
}
113
114
99507
bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
115
                                        std::vector<Node>& nr,
116
                                        int dir,
117
                                        Node& curr,
118
                                        bool strict)
119
{
120
99507
  Assert(dir == 1 || dir == -1);
121
99507
  Assert(nr.empty());
122
99507
  NodeManager* nm = NodeManager::currentNM();
123
199014
  Node zero = nm->mkConst(cvc5::Rational(0));
124
99507
  bool ret = false;
125
99507
  bool success = true;
126
99507
  unsigned sindex = 0;
127
278301
  while (success && curr != zero && sindex < n1.size())
128
  {
129
89397
    Assert(!curr.isNull());
130
89397
    success = false;
131
89397
    unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
132
89397
    if (n1[sindex_use].isConst())
133
    {
134
      // could strip part of a constant
135
36554
      Node lowerBound = ArithEntail::getConstantBound(Rewriter::rewrite(curr));
136
18277
      if (!lowerBound.isNull())
137
      {
138
7556
        Assert(lowerBound.isConst());
139
15112
        Rational lbr = lowerBound.getConst<Rational>();
140
7556
        if (lbr.sgn() > 0)
141
        {
142
5130
          Assert(ArithEntail::check(curr, true));
143
10260
          Node s = n1[sindex_use];
144
5130
          size_t slen = Word::getLength(s);
145
10260
          Node ncl = nm->mkConst(cvc5::Rational(slen));
146
10260
          Node next_s = nm->mkNode(MINUS, lowerBound, ncl);
147
5130
          next_s = Rewriter::rewrite(next_s);
148
5130
          Assert(next_s.isConst());
149
          // we can remove the entire constant
150
5130
          if (next_s.getConst<Rational>().sgn() >= 0)
151
          {
152
3309
            curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl));
153
3309
            success = true;
154
3309
            sindex++;
155
          }
156
          else
157
          {
158
            // we can remove part of the constant
159
            // lower bound minus the length of a concrete string is negative,
160
            // hence lowerBound cannot be larger than long max
161
1821
            Assert(lbr < Rational(String::maxSize()));
162
1821
            curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound));
163
1821
            uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
164
1821
            Assert(lbsize < slen);
165
1821
            if (dir == 1)
166
            {
167
              // strip partially from the front
168
1629
              nr.push_back(Word::prefix(s, lbsize));
169
1629
              n1[sindex_use] = Word::suffix(s, slen - lbsize);
170
            }
171
            else
172
            {
173
              // strip partially from the back
174
192
              nr.push_back(Word::suffix(s, lbsize));
175
192
              n1[sindex_use] = Word::prefix(s, slen - lbsize);
176
            }
177
1821
            ret = true;
178
          }
179
5130
          Assert(ArithEntail::check(curr));
180
        }
181
        else
182
        {
183
          // we cannot remove the constant
184
        }
185
      }
186
    }
187
    else
188
    {
189
      Node next_s = NodeManager::currentNM()->mkNode(
190
          MINUS,
191
          curr,
192
142240
          NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
193
71120
      next_s = Rewriter::rewrite(next_s);
194
71120
      if (ArithEntail::check(next_s))
195
      {
196
1522
        success = true;
197
1522
        curr = next_s;
198
1522
        sindex++;
199
      }
200
    }
201
  }
202
99507
  if (sindex > 0 && (!strict || curr == zero))
203
  {
204
3993
    if (dir == 1)
205
    {
206
3646
      nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
207
3646
      n1.erase(n1.begin(), n1.begin() + sindex);
208
    }
209
    else
210
    {
211
347
      nr.insert(nr.end(), n1.end() - sindex, n1.end());
212
347
      n1.erase(n1.end() - sindex, n1.end());
213
    }
214
3993
    ret = true;
215
  }
216
199014
  return ret;
217
}
218
219
102975
int StringsEntail::componentContains(std::vector<Node>& n1,
220
                                     std::vector<Node>& n2,
221
                                     std::vector<Node>& nb,
222
                                     std::vector<Node>& ne,
223
                                     bool computeRemainder,
224
                                     int remainderDir)
225
{
226
102975
  Assert(nb.empty());
227
102975
  Assert(ne.empty());
228
102975
  Trace("strings-entail") << "Component contains: " << std::endl;
229
102975
  Trace("strings-entail") << "n1 = " << n1 << std::endl;
230
102975
  Trace("strings-entail") << "n2 = " << n2 << std::endl;
231
  // if n2 is a singleton, we can do optimized version here
232
102975
  if (n2.size() == 1)
233
  {
234
202329
    for (unsigned i = 0; i < n1.size(); i++)
235
    {
236
206609
      Node n1rb;
237
206609
      Node n1re;
238
104870
      if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
239
      {
240
3131
        if (computeRemainder)
241
        {
242
556
          n1[i] = n2[0];
243
556
          if (remainderDir != -1)
244
          {
245
556
            if (!n1re.isNull())
246
            {
247
53
              ne.push_back(n1re);
248
            }
249
556
            ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
250
556
            n1.erase(n1.begin() + i + 1, n1.end());
251
          }
252
          else if (!n1re.isNull())
253
          {
254
            n1[i] = Rewriter::rewrite(
255
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
256
          }
257
556
          if (remainderDir != 1)
258
          {
259
            nb.insert(nb.end(), n1.begin(), n1.begin() + i);
260
            n1.erase(n1.begin(), n1.begin() + i);
261
            if (!n1rb.isNull())
262
            {
263
              nb.push_back(n1rb);
264
            }
265
          }
266
556
          else if (!n1rb.isNull())
267
          {
268
106
            n1[i] = Rewriter::rewrite(
269
212
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
270
          }
271
        }
272
3131
        return i;
273
      }
274
    }
275
  }
276
2385
  else if (n1.size() >= n2.size())
277
  {
278
786
    unsigned diff = n1.size() - n2.size();
279
1528
    for (unsigned i = 0; i <= diff; i++)
280
    {
281
1826
      Node n1rb_first;
282
1826
      Node n1re_first;
283
      // first component of n2 must be a suffix
284
2168
      if (componentContainsBase(n1[i],
285
1084
                                n2[0],
286
                                n1rb_first,
287
                                n1re_first,
288
                                1,
289
1084
                                computeRemainder && remainderDir != 1))
290
      {
291
365
        Assert(n1re_first.isNull());
292
610
        for (unsigned j = 1; j < n2.size(); j++)
293
        {
294
          // are we in the last component?
295
610
          if (j + 1 == n2.size())
296
          {
297
355
            Node n1rb_last;
298
355
            Node n1re_last;
299
            // last component of n2 must be a prefix
300
710
            if (componentContainsBase(n1[i + j],
301
355
                                      n2[j],
302
                                      n1rb_last,
303
                                      n1re_last,
304
                                      -1,
305
355
                                      computeRemainder && remainderDir != -1))
306
            {
307
684
              Trace("strings-entail-debug")
308
342
                  << "Last remainder begin is " << n1rb_last << std::endl;
309
684
              Trace("strings-entail-debug")
310
342
                  << "Last remainder end is " << n1re_last << std::endl;
311
342
              Assert(n1rb_last.isNull());
312
342
              if (computeRemainder)
313
              {
314
172
                if (remainderDir != -1)
315
                {
316
172
                  if (!n1re_last.isNull())
317
                  {
318
2
                    ne.push_back(n1re_last);
319
                  }
320
172
                  ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
321
172
                  n1.erase(n1.begin() + i + j + 1, n1.end());
322
172
                  n1[i + j] = n2[j];
323
                }
324
172
                if (remainderDir != 1)
325
                {
326
                  n1[i] = n2[0];
327
                  nb.insert(nb.end(), n1.begin(), n1.begin() + i);
328
                  n1.erase(n1.begin(), n1.begin() + i);
329
                  if (!n1rb_first.isNull())
330
                  {
331
                    nb.push_back(n1rb_first);
332
                  }
333
                }
334
              }
335
342
              Trace("strings-entail-debug") << "ne = " << ne << std::endl;
336
342
              Trace("strings-entail-debug") << "nb = " << nb << std::endl;
337
342
              Trace("strings-entail-debug") << "...return " << i << std::endl;
338
342
              return i;
339
            }
340
            else
341
            {
342
13
              break;
343
            }
344
          }
345
255
          else if (n1[i + j] != n2[j])
346
          {
347
10
            break;
348
          }
349
        }
350
      }
351
    }
352
  }
353
99502
  return -1;
354
}
355
356
106309
bool StringsEntail::componentContainsBase(
357
    Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
358
{
359
106309
  Assert(n1rb.isNull());
360
106309
  Assert(n1re.isNull());
361
362
106309
  NodeManager* nm = NodeManager::currentNM();
363
364
106309
  if (n1 == n2)
365
  {
366
1525
    return true;
367
  }
368
  else
369
  {
370
104784
    if (n1.isConst() && n2.isConst())
371
    {
372
634
      size_t len1 = Word::getLength(n1);
373
634
      size_t len2 = Word::getLength(n2);
374
634
      if (len2 < len1)
375
      {
376
277
        if (dir == 1)
377
        {
378
20
          if (Word::suffix(n1, len2) == n2)
379
          {
380
20
            if (computeRemainder)
381
            {
382
              n1rb = Word::prefix(n1, len1 - len2);
383
            }
384
20
            return true;
385
          }
386
        }
387
257
        else if (dir == -1)
388
        {
389
4
          if (Word::prefix(n1, len2) == n2)
390
          {
391
4
            if (computeRemainder)
392
            {
393
              n1re = Word::suffix(n1, len1 - len2);
394
            }
395
4
            return true;
396
          }
397
        }
398
        else
399
        {
400
253
          size_t f = Word::find(n1, n2);
401
253
          if (f != std::string::npos)
402
          {
403
239
            if (computeRemainder)
404
            {
405
139
              if (f > 0)
406
              {
407
102
                n1rb = Word::prefix(n1, f);
408
              }
409
139
              if (len1 > f + len2)
410
              {
411
49
                n1re = Word::suffix(n1, len1 - (f + len2));
412
              }
413
            }
414
239
            return true;
415
          }
416
        }
417
      }
418
    }
419
    else
420
    {
421
      // cases for:
422
      //   n1 = x   containing   n2 = substr( x, n2[1], n2[2] )
423
104150
      if (n2.getKind() == STRING_SUBSTR)
424
      {
425
53077
        if (n2[0] == n1)
426
        {
427
826
          bool success = true;
428
826
          Node start_pos = n2[1];
429
826
          Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
430
826
          Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
431
826
          if (dir == 1)
432
          {
433
            // To be a suffix, start + length must be greater than
434
            // or equal to the length of the string.
435
4
            success = ArithEntail::check(end_pos, len_n2s);
436
          }
437
822
          else if (dir == -1)
438
          {
439
            // To be a prefix, must literally start at 0, since
440
            //   if we knew it started at <0, it should be rewritten to "",
441
            //   if we knew it started at 0, then n2[1] should be rewritten to
442
            //   0.
443
16
            success = start_pos.isConst()
444
8
                      && start_pos.getConst<Rational>().sgn() == 0;
445
          }
446
826
          if (success)
447
          {
448
826
            if (computeRemainder)
449
            {
450
              // we can only compute the remainder if start_pos and end_pos
451
              // are known to be non-negative.
452
54
              if (!ArithEntail::check(start_pos)
453
54
                  || !ArithEntail::check(end_pos))
454
              {
455
12
                return false;
456
              }
457
6
              if (dir != -1)
458
              {
459
12
                n1rb = nm->mkNode(
460
8
                    STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
461
              }
462
6
              if (dir != 1)
463
              {
464
6
                n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
465
              }
466
            }
467
814
            return true;
468
          }
469
        }
470
      }
471
472
103324
      if (!computeRemainder && dir == 0)
473
      {
474
101537
        if (n1.getKind() == STRING_REPLACE)
475
        {
476
          // (str.contains (str.replace x y z) w) ---> true
477
          // if (str.contains x w) --> true and (str.contains z w) ---> true
478
3376
          Node xCtnW = checkContains(n1[0], n2);
479
2306
          if (!xCtnW.isNull() && xCtnW.getConst<bool>())
480
          {
481
1782
            Node zCtnW = checkContains(n1[2], n2);
482
1509
            if (!zCtnW.isNull() && zCtnW.getConst<bool>())
483
            {
484
1236
              return true;
485
            }
486
          }
487
        }
488
      }
489
    }
490
  }
491
102459
  return false;
492
}
493
494
103415
bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
495
                                           std::vector<Node>& n2,
496
                                           std::vector<Node>& nb,
497
                                           std::vector<Node>& ne,
498
                                           int dir)
499
{
500
103415
  Assert(nb.empty());
501
103415
  Assert(ne.empty());
502
103415
  bool changed = false;
503
  // for ( forwards, backwards ) direction
504
239163
  for (unsigned r = 0; r < 2; r++)
505
  {
506
171302
    if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
507
    {
508
167776
      unsigned index0 = r == 0 ? 0 : n1.size() - 1;
509
167776
      unsigned index1 = r == 0 ? 0 : n2.size() - 1;
510
167776
      bool removeComponent = false;
511
299998
      Node n1cmp = n1[index0];
512
513
167776
      if (n1cmp.isConst() && Word::isEmpty(n1cmp))
514
      {
515
35484
        return false;
516
      }
517
518
264514
      std::vector<Node> sss;
519
264514
      std::vector<Node> sls;
520
132292
      n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
521
264584
      Trace("strings-rewrite-debug2")
522
132292
          << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
523
132292
          << ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl;
524
132292
      if (n1cmp.isConst())
525
      {
526
122588
        Node s = n1cmp;
527
61294
        size_t slen = Word::getLength(s);
528
        // overlap is an overapproximation of the number of characters
529
        // n2[index1] can match in s
530
61294
        unsigned overlap = Word::getLength(s);
531
61294
        if (n2[index1].isConst())
532
        {
533
20048
          Node t = n2[index1];
534
10024
          std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t);
535
10024
          if (ret == std::string::npos)
536
          {
537
397
            if (n1.size() == 1)
538
            {
539
              // can remove everything
540
              //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
541
              //   str.contains( "", str.++( "ba", x ) )
542
              //   or std.contains( str.substr( "abc", x, y ), "d" ) --->
543
              //   str.contains( "", "d" )
544
16
              removeComponent = true;
545
            }
546
381
            else if (sss.empty())  // only if not substr
547
            {
548
              // check how much overlap there is
549
              // This is used to partially strip off the endpoint
550
              // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
551
              // str.contains( str.++( "c", x ), str.++( "cd", y ) )
552
369
              overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
553
            }
554
            // note that we cannot process substring here, since t may
555
            // match only part of s. Consider:
556
            // (str.++ "C" (str.substr "AB" x y)), "CB"
557
            // where "AB" and "CB" have no overlap, but "C" is not part of what
558
            // is matched with "AB".
559
          }
560
9627
          else if (sss.empty())  // only if not substr
561
          {
562
659
            Assert(ret < slen);
563
            // can strip off up to the find position, e.g.
564
            // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
565
            // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
566
            // and
567
            // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
568
            // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
569
659
            overlap = slen - ret;
570
          }
571
        }
572
        else
573
        {
574
          // inconclusive
575
        }
576
61294
        Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl;
577
        // process the overlap
578
61294
        if (overlap < slen)
579
        {
580
153
          changed = true;
581
153
          if (overlap == 0)
582
          {
583
122
            removeComponent = true;
584
          }
585
          else
586
          {
587
            // can drop the prefix (resp. suffix) from the first (resp. last)
588
            // component
589
31
            if (r == 0)
590
            {
591
15
              nb.push_back(Word::prefix(s, slen - overlap));
592
15
              n1[index0] = Word::suffix(s, overlap);
593
            }
594
            else
595
            {
596
16
              ne.push_back(Word::suffix(s, slen - overlap));
597
16
              n1[index0] = Word::prefix(s, overlap);
598
            }
599
          }
600
        }
601
      }
602
70998
      else if (n1cmp.getKind() == STRING_ITOS)
603
      {
604
300
        if (n2[index1].isConst())
605
        {
606
228
          Assert(n2[index1].getType().isString());  // string-only
607
456
          cvc5::String t = n2[index1].getConst<String>();
608
228
          if (n1.size() == 1)
609
          {
610
            // if n1.size()==1, then if n2[index1] is not a number, we can drop
611
            // the entire component
612
            //    e.g. str.contains( int.to.str(x), "123a45") --> false
613
218
            if (!t.isNumber())
614
            {
615
54
              removeComponent = true;
616
            }
617
          }
618
          else
619
          {
620
10
            const std::vector<unsigned>& tvec = t.getVec();
621
10
            Assert(tvec.size() > 0);
622
623
            // if n1.size()>1, then if the first (resp. last) character of
624
            // n2[index1]
625
            //  is not a digit, we can drop the entire component, e.g.:
626
            //    str.contains( str.++( int.to.str(x), y ), "a12") -->
627
            //    str.contains( y, "a12" )
628
            //    str.contains( str.++( y, int.to.str(x) ), "a0b") -->
629
            //    str.contains( y, "a0b" )
630
10
            unsigned i = r == 0 ? 0 : (tvec.size() - 1);
631
10
            if (!String::isDigit(tvec[i]))
632
            {
633
8
              removeComponent = true;
634
            }
635
          }
636
        }
637
      }
638
132292
      if (removeComponent)
639
      {
640
200
        Trace("strings-rewrite-debug2") << "...remove component" << std::endl;
641
        // can drop entire first (resp. last) component
642
200
        if (r == 0)
643
        {
644
139
          nb.push_back(n1[index0]);
645
139
          n1.erase(n1.begin(), n1.begin() + 1);
646
        }
647
        else
648
        {
649
61
          ne.push_back(n1[index0]);
650
61
          n1.pop_back();
651
        }
652
200
        if (n1.empty())
653
        {
654
          // if we've removed everything, just return (we will rewrite to false)
655
70
          return true;
656
        }
657
        else
658
        {
659
130
          changed = true;
660
        }
661
      }
662
    }
663
  }
664
  // TODO (#1180) : computing the maximal overlap in this function may be
665
  // important.
666
  // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
667
  // false
668
  //   ...since str.to.int(x) can contain at most 1 character from "2aaaa",
669
  //   leaving 4 characters
670
  //      which is larger that the upper bound for length of str.substr(y,0,3),
671
  //      which is 3.
672
67861
  return changed;
673
}
674
675
15218
Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
676
{
677
15218
  NodeManager* nm = NodeManager::currentNM();
678
30436
  Node ctn = nm->mkNode(STRING_CONTAINS, a, b);
679
680
15218
  if (fullRewriter)
681
  {
682
15218
    ctn = Rewriter::rewrite(ctn);
683
  }
684
  else
685
  {
686
    Node prev;
687
    do
688
    {
689
      prev = ctn;
690
      ctn = d_rewriter.rewriteContains(ctn);
691
      if (ctn != prev)
692
      {
693
        ctn = d_rewriter.postProcessRewrite(prev, ctn);
694
      }
695
    } while (prev != ctn && ctn.getKind() == STRING_CONTAINS);
696
  }
697
698
15218
  Assert(ctn.getType().isBoolean());
699
30436
  return ctn.isConst() ? ctn : Node::null();
700
}
701
702
35647
bool StringsEntail::checkNonEmpty(Node a)
703
{
704
71294
  Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
705
35647
  len = Rewriter::rewrite(len);
706
71294
  return ArithEntail::check(len, true);
707
}
708
709
59358
bool StringsEntail::checkLengthOne(Node s, bool strict)
710
{
711
59358
  NodeManager* nm = NodeManager::currentNM();
712
118716
  Node one = nm->mkConst(Rational(1));
713
118716
  Node len = nm->mkNode(STRING_LENGTH, s);
714
59358
  len = Rewriter::rewrite(len);
715
237432
  return ArithEntail::check(one, len)
716
296790
         && (!strict || ArithEntail::check(len, true));
717
}
718
719
98173
bool StringsEntail::checkMultisetSubset(Node a, Node b)
720
{
721
196346
  std::vector<Node> avec;
722
98173
  utils::getConcat(getMultisetApproximation(a), avec);
723
196346
  std::vector<Node> bvec;
724
98173
  utils::getConcat(b, bvec);
725
726
196346
  std::map<Node, unsigned> num_nconst[2];
727
196346
  std::map<Node, unsigned> num_const[2];
728
294519
  for (unsigned j = 0; j < 2; j++)
729
  {
730
196346
    std::vector<Node>& jvec = j == 0 ? avec : bvec;
731
411474
    for (const Node& cc : jvec)
732
    {
733
215128
      if (cc.isConst())
734
      {
735
107677
        num_const[j][cc]++;
736
      }
737
      else
738
      {
739
107451
        num_nconst[j][cc]++;
740
      }
741
    }
742
  }
743
98173
  bool ms_success = true;
744
99272
  for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
745
  {
746
34194
    if (nncp.second > num_nconst[1][nncp.first])
747
    {
748
33095
      ms_success = false;
749
33095
      break;
750
    }
751
  }
752
98173
  if (ms_success)
753
  {
754
    // count the number of constant characters in the first argument
755
195234
    std::map<Node, unsigned> count_const[2];
756
130136
    std::vector<Node> chars;
757
195234
    for (unsigned j = 0; j < 2; j++)
758
    {
759
200234
      for (std::pair<const Node, unsigned>& ncp : num_const[j])
760
      {
761
140156
        Node cn = ncp.first;
762
70078
        Assert(cn.isConst());
763
140156
        std::vector<Node> cnChars = Word::getChars(cn);
764
167850
        for (const Node& ch : cnChars)
765
        {
766
97772
          count_const[j][ch] += ncp.second;
767
97772
          if (std::find(chars.begin(), chars.end(), ch) == chars.end())
768
          {
769
70714
            chars.push_back(ch);
770
          }
771
        }
772
      }
773
    }
774
130156
    Trace("strings-entail-ms-ss")
775
65078
        << "For " << a << " and " << b << " : " << std::endl;
776
135772
    for (const Node& ch : chars)
777
    {
778
70714
      Trace("strings-entail-ms-ss") << "  # occurrences of substring ";
779
70714
      Trace("strings-entail-ms-ss") << ch << " in arguments is ";
780
141428
      Trace("strings-entail-ms-ss")
781
70714
          << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
782
70714
      if (count_const[0][ch] < count_const[1][ch])
783
      {
784
20
        return true;
785
      }
786
    }
787
788
    // TODO (#1180): count the number of 2,3,4,.. character substrings
789
    // for example:
790
    // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
791
    // since the second argument contains more occurrences of "bb".
792
    // note this is orthogonal reasoning to inductive reasoning
793
    // via regular membership reduction in Liang et al CAV 2015.
794
  }
795
98153
  return false;
796
}
797
798
128812
Node StringsEntail::checkHomogeneousString(Node a)
799
{
800
257624
  std::vector<Node> avec;
801
128812
  utils::getConcat(getMultisetApproximation(a), avec);
802
803
128812
  bool cValid = false;
804
257624
  Node c;
805
194972
  for (const Node& ac : avec)
806
  {
807
134102
    if (ac.isConst())
808
    {
809
146933
      std::vector<Node> acv = Word::getChars(ac);
810
130181
      for (const Node& cc : acv)
811
      {
812
64021
        if (!cValid)
813
        {
814
44609
          cValid = true;
815
44609
          c = cc;
816
        }
817
19412
        else if (c != cc)
818
        {
819
          // Found a different character
820
14613
          return Node::null();
821
        }
822
      }
823
    }
824
    else
825
    {
826
      // Could produce a different character
827
53329
      return Node::null();
828
    }
829
  }
830
831
60870
  if (!cValid)
832
  {
833
35279
    return Word::mkEmptyWord(a.getType());
834
  }
835
836
25591
  return c;
837
}
838
839
251878
Node StringsEntail::getMultisetApproximation(Node a)
840
{
841
251878
  NodeManager* nm = NodeManager::currentNM();
842
251878
  if (a.getKind() == STRING_SUBSTR)
843
  {
844
83106
    return a[0];
845
  }
846
168772
  else if (a.getKind() == STRING_REPLACE)
847
  {
848
5163
    return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
849
  }
850
163609
  else if (a.getKind() == STRING_CONCAT)
851
  {
852
17710
    NodeBuilder nb(STRING_CONCAT);
853
28585
    for (const Node& ac : a)
854
    {
855
19730
      nb << getMultisetApproximation(ac);
856
    }
857
8855
    return nb.constructNode();
858
  }
859
  else
860
  {
861
154754
    return a;
862
  }
863
}
864
865
186471
Node StringsEntail::getStringOrEmpty(Node n)
866
{
867
186471
  Node res;
868
559449
  while (res.isNull())
869
  {
870
186489
    switch (n.getKind())
871
    {
872
1778
      case STRING_REPLACE:
873
      {
874
1778
        if (Word::isEmpty(n[0]))
875
        {
876
          // (str.replace "" x y) --> y
877
18
          n = n[2];
878
18
          break;
879
        }
880
881
1760
        if (checkLengthOne(n[0]) && Word::isEmpty(n[2]))
882
        {
883
          // (str.replace "A" x "") --> "A"
884
60
          res = n[0];
885
60
          break;
886
        }
887
888
1700
        res = n;
889
1700
        break;
890
      }
891
13039
      case STRING_SUBSTR:
892
      {
893
13039
        if (checkLengthOne(n[0]))
894
        {
895
          // (str.substr "A" x y) --> "A"
896
611
          res = n[0];
897
611
          break;
898
        }
899
12428
        res = n;
900
12428
        break;
901
      }
902
171672
      default:
903
      {
904
171672
        res = n;
905
171672
        break;
906
      }
907
    }
908
  }
909
186471
  return res;
910
}
911
912
1019
Node StringsEntail::inferEqsFromContains(Node x, Node y)
913
{
914
1019
  NodeManager* nm = NodeManager::currentNM();
915
2038
  Node emp = Word::mkEmptyWord(x.getType());
916
1019
  Assert(x.getType() == y.getType());
917
2038
  TypeNode stype = x.getType();
918
919
2038
  Node xLen = nm->mkNode(STRING_LENGTH, x);
920
2038
  std::vector<Node> yLens;
921
1019
  if (y.getKind() != STRING_CONCAT)
922
  {
923
124
    yLens.push_back(nm->mkNode(STRING_LENGTH, y));
924
  }
925
  else
926
  {
927
3329
    for (const Node& yi : y)
928
    {
929
2434
      yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
930
    }
931
  }
932
933
2038
  std::vector<Node> zeroLens;
934
1019
  if (x == emp)
935
  {
936
    // If x is the empty string, then all ys must be empty, too, and we can
937
    // skip the expensive checks. Note that this is just a performance
938
    // optimization.
939
93
    zeroLens.swap(yLens);
940
  }
941
  else
942
  {
943
    // Check if we can infer that str.len(x) <= str.len(y). If that is the
944
    // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
945
    // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
946
    // true. The terms that can have length zero without making the inequality
947
    // false must be all be empty if (str.contains x y) is true.
948
926
    if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens))
949
    {
950
      // We could not prove that the inequality holds
951
824
      return Node::null();
952
    }
953
102
    else if (yLens.size() == y.getNumChildren())
954
    {
955
      // We could only prove that the inequality holds but not that any of the
956
      // ys must be empty
957
49
      return nm->mkNode(EQUAL, x, y);
958
    }
959
  }
960
961
146
  if (y.getKind() != STRING_CONCAT)
962
  {
963
20
    if (zeroLens.size() == 1)
964
    {
965
      // y is not a concatenation and we found that it must be empty, so just
966
      // return (= y "")
967
      Assert(zeroLens[0][0] == y);
968
      return nm->mkNode(EQUAL, y, emp);
969
    }
970
    else
971
    {
972
20
      Assert(yLens.size() == 1 && yLens[0][0] == y);
973
20
      return nm->mkNode(EQUAL, x, y);
974
    }
975
  }
976
977
252
  std::vector<Node> cs;
978
169
  for (const Node& yiLen : yLens)
979
  {
980
43
    Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
981
43
    cs.push_back(yiLen[0]);
982
  }
983
984
252
  NodeBuilder nb(AND);
985
  // (= x (str.++ y1' ... ym'))
986
126
  if (!cs.empty())
987
  {
988
33
    nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
989
  }
990
  // (= y1'' "") ... (= yk'' "")
991
384
  for (const Node& zeroLen : zeroLens)
992
  {
993
258
    Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
994
258
    nb << nm->mkNode(EQUAL, zeroLen[0], emp);
995
  }
996
997
  // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
998
126
  return nb.constructNode();
999
}
1000
1001
}  // namespace strings
1002
}  // namespace theory
1003
29574
}  // namespace cvc5