GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/strings_entail.cpp Lines: 425 441 96.4 %
Date: 2021-08-01 Branches: 957 2017 47.4 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving strings.
14
 */
15
16
#include "theory/strings/strings_entail.h"
17
18
#include "expr/node_builder.h"
19
#include "theory/rewriter.h"
20
#include "theory/strings/arith_entail.h"
21
#include "theory/strings/sequences_rewriter.h"
22
#include "theory/strings/theory_strings_utils.h"
23
#include "theory/strings/word.h"
24
#include "util/rational.h"
25
#include "util/string.h"
26
27
using namespace cvc5::kind;
28
29
namespace cvc5 {
30
namespace theory {
31
namespace strings {
32
33
10027
StringsEntail::StringsEntail(SequencesRewriter& rewriter) : d_rewriter(rewriter)
34
{
35
10027
}
36
37
2823
bool StringsEntail::canConstantContainConcat(Node c,
38
                                             Node n,
39
                                             int& firstc,
40
                                             int& lastc)
41
{
42
2823
  Assert(c.isConst());
43
2823
  Assert(n.getKind() == STRING_CONCAT);
44
  // must find constant components in order
45
2823
  size_t pos = 0;
46
2823
  firstc = -1;
47
2823
  lastc = -1;
48
10537
  for (unsigned i = 0; i < n.getNumChildren(); i++)
49
  {
50
7842
    if (n[i].isConst())
51
    {
52
1838
      firstc = firstc == -1 ? i : firstc;
53
1838
      lastc = i;
54
1838
      size_t new_pos = Word::find(c, n[i], pos);
55
1838
      if (new_pos == std::string::npos)
56
      {
57
120
        return false;
58
      }
59
      else
60
      {
61
1718
        pos = new_pos + Word::getLength(n[i]);
62
      }
63
    }
64
6004
    else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
65
    {
66
20
      Assert(c.getType().isString());  // string-only
67
20
      const std::vector<unsigned>& tvec = c.getConst<String>().getVec();
68
      // find the first occurrence of a digit starting at pos
69
116
      while (pos < tvec.size() && !String::isDigit(tvec[pos]))
70
      {
71
48
        pos++;
72
      }
73
20
      if (pos == tvec.size())
74
      {
75
8
        return false;
76
      }
77
      // must consume at least one digit here
78
12
      pos++;
79
    }
80
  }
81
2695
  return true;
82
}
83
84
46088
bool StringsEntail::canConstantContainList(Node c,
85
                                           std::vector<Node>& l,
86
                                           int& firstc,
87
                                           int& lastc)
88
{
89
46088
  Assert(c.isConst());
90
  // must find constant components in order
91
46088
  size_t pos = 0;
92
46088
  firstc = -1;
93
46088
  lastc = -1;
94
133878
  for (unsigned i = 0; i < l.size(); i++)
95
  {
96
88284
    if (l[i].isConst())
97
    {
98
77269
      firstc = firstc == -1 ? i : firstc;
99
77269
      lastc = i;
100
77269
      size_t new_pos = Word::find(c, l[i], pos);
101
77269
      if (new_pos == std::string::npos)
102
      {
103
494
        return false;
104
      }
105
      else
106
      {
107
76775
        pos = new_pos + Word::getLength(l[i]);
108
      }
109
    }
110
  }
111
45594
  return true;
112
}
113
114
78990
bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
115
                                        std::vector<Node>& nr,
116
                                        int dir,
117
                                        Node& curr,
118
                                        bool strict)
119
{
120
78990
  Assert(dir == 1 || dir == -1);
121
78990
  Assert(nr.empty());
122
78990
  NodeManager* nm = NodeManager::currentNM();
123
157980
  Node zero = nm->mkConst(cvc5::Rational(0));
124
78990
  bool ret = false;
125
78990
  bool success = true;
126
78990
  unsigned sindex = 0;
127
211816
  while (success && curr != zero && sindex < n1.size())
128
  {
129
66413
    Assert(!curr.isNull());
130
66413
    success = false;
131
66413
    unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
132
66413
    if (n1[sindex_use].isConst())
133
    {
134
      // could strip part of a constant
135
25028
      Node lowerBound = ArithEntail::getConstantBound(Rewriter::rewrite(curr));
136
12514
      if (!lowerBound.isNull())
137
      {
138
3942
        Assert(lowerBound.isConst());
139
7884
        Rational lbr = lowerBound.getConst<Rational>();
140
3942
        if (lbr.sgn() > 0)
141
        {
142
2319
          Assert(ArithEntail::check(curr, true));
143
4638
          Node s = n1[sindex_use];
144
2319
          size_t slen = Word::getLength(s);
145
4638
          Node ncl = nm->mkConst(cvc5::Rational(slen));
146
4638
          Node next_s = nm->mkNode(MINUS, lowerBound, ncl);
147
2319
          next_s = Rewriter::rewrite(next_s);
148
2319
          Assert(next_s.isConst());
149
          // we can remove the entire constant
150
2319
          if (next_s.getConst<Rational>().sgn() >= 0)
151
          {
152
1037
            curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl));
153
1037
            success = true;
154
1037
            sindex++;
155
          }
156
          else
157
          {
158
            // we can remove part of the constant
159
            // lower bound minus the length of a concrete string is negative,
160
            // hence lowerBound cannot be larger than long max
161
1282
            Assert(lbr < Rational(String::maxSize()));
162
1282
            curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound));
163
1282
            uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
164
1282
            Assert(lbsize < slen);
165
1282
            if (dir == 1)
166
            {
167
              // strip partially from the front
168
1128
              nr.push_back(Word::prefix(s, lbsize));
169
1128
              n1[sindex_use] = Word::suffix(s, slen - lbsize);
170
            }
171
            else
172
            {
173
              // strip partially from the back
174
154
              nr.push_back(Word::suffix(s, lbsize));
175
154
              n1[sindex_use] = Word::prefix(s, slen - lbsize);
176
            }
177
1282
            ret = true;
178
          }
179
2319
          Assert(ArithEntail::check(curr));
180
        }
181
        else
182
        {
183
          // we cannot remove the constant
184
        }
185
      }
186
    }
187
    else
188
    {
189
      Node next_s = NodeManager::currentNM()->mkNode(
190
          MINUS,
191
          curr,
192
107798
          NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
193
53899
      next_s = Rewriter::rewrite(next_s);
194
53899
      if (ArithEntail::check(next_s))
195
      {
196
362
        success = true;
197
362
        curr = next_s;
198
362
        sindex++;
199
      }
200
    }
201
  }
202
78990
  if (sindex > 0 && (!strict || curr == zero))
203
  {
204
1299
    if (dir == 1)
205
    {
206
1205
      nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
207
1205
      n1.erase(n1.begin(), n1.begin() + sindex);
208
    }
209
    else
210
    {
211
94
      nr.insert(nr.end(), n1.end() - sindex, n1.end());
212
94
      n1.erase(n1.end() - sindex, n1.end());
213
    }
214
1299
    ret = true;
215
  }
216
157980
  return ret;
217
}
218
219
210455
int StringsEntail::componentContains(std::vector<Node>& n1,
220
                                     std::vector<Node>& n2,
221
                                     std::vector<Node>& nb,
222
                                     std::vector<Node>& ne,
223
                                     bool computeRemainder,
224
                                     int remainderDir)
225
{
226
210455
  Assert(nb.empty());
227
210455
  Assert(ne.empty());
228
210455
  Trace("strings-entail") << "Component contains: " << std::endl;
229
210455
  Trace("strings-entail") << "n1 = " << n1 << std::endl;
230
210455
  Trace("strings-entail") << "n2 = " << n2 << std::endl;
231
  // if n2 is a singleton, we can do optimized version here
232
210455
  if (n2.size() == 1)
233
  {
234
348820
    for (unsigned i = 0; i < n1.size(); i++)
235
    {
236
399284
      Node n1rb;
237
399284
      Node n1re;
238
201512
      if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
239
      {
240
3740
        if (computeRemainder)
241
        {
242
266
          n1[i] = n2[0];
243
266
          if (remainderDir != -1)
244
          {
245
266
            if (!n1re.isNull())
246
            {
247
41
              ne.push_back(n1re);
248
            }
249
266
            ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
250
266
            n1.erase(n1.begin() + i + 1, n1.end());
251
          }
252
          else if (!n1re.isNull())
253
          {
254
            n1[i] = Rewriter::rewrite(
255
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
256
          }
257
266
          if (remainderDir != 1)
258
          {
259
            nb.insert(nb.end(), n1.begin(), n1.begin() + i);
260
            n1.erase(n1.begin(), n1.begin() + i);
261
            if (!n1rb.isNull())
262
            {
263
              nb.push_back(n1rb);
264
            }
265
          }
266
266
          else if (!n1rb.isNull())
267
          {
268
90
            n1[i] = Rewriter::rewrite(
269
180
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
270
          }
271
        }
272
3740
        return i;
273
      }
274
    }
275
  }
276
59407
  else if (n1.size() >= n2.size())
277
  {
278
17004
    unsigned diff = n1.size() - n2.size();
279
44033
    for (unsigned i = 0; i <= diff; i++)
280
    {
281
54503
      Node n1rb_first;
282
54503
      Node n1re_first;
283
      // first component of n2 must be a suffix
284
54948
      if (componentContainsBase(n1[i],
285
27474
                                n2[0],
286
                                n1rb_first,
287
                                n1re_first,
288
                                1,
289
27474
                                computeRemainder && remainderDir != 1))
290
      {
291
6880
        Assert(n1re_first.isNull());
292
10893
        for (unsigned j = 1; j < n2.size(); j++)
293
        {
294
          // are we in the last component?
295
10893
          if (j + 1 == n2.size())
296
          {
297
1927
            Node n1rb_last;
298
1927
            Node n1re_last;
299
            // last component of n2 must be a prefix
300
3854
            if (componentContainsBase(n1[i + j],
301
1927
                                      n2[j],
302
                                      n1rb_last,
303
                                      n1re_last,
304
                                      -1,
305
1927
                                      computeRemainder && remainderDir != -1))
306
            {
307
890
              Trace("strings-entail-debug")
308
445
                  << "Last remainder begin is " << n1rb_last << std::endl;
309
890
              Trace("strings-entail-debug")
310
445
                  << "Last remainder end is " << n1re_last << std::endl;
311
445
              Assert(n1rb_last.isNull());
312
445
              if (computeRemainder)
313
              {
314
12
                if (remainderDir != -1)
315
                {
316
12
                  if (!n1re_last.isNull())
317
                  {
318
2
                    ne.push_back(n1re_last);
319
                  }
320
12
                  ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
321
12
                  n1.erase(n1.begin() + i + j + 1, n1.end());
322
12
                  n1[i + j] = n2[j];
323
                }
324
12
                if (remainderDir != 1)
325
                {
326
                  n1[i] = n2[0];
327
                  nb.insert(nb.end(), n1.begin(), n1.begin() + i);
328
                  n1.erase(n1.begin(), n1.begin() + i);
329
                  if (!n1rb_first.isNull())
330
                  {
331
                    nb.push_back(n1rb_first);
332
                  }
333
                }
334
              }
335
445
              Trace("strings-entail-debug") << "ne = " << ne << std::endl;
336
445
              Trace("strings-entail-debug") << "nb = " << nb << std::endl;
337
445
              Trace("strings-entail-debug") << "...return " << i << std::endl;
338
445
              return i;
339
            }
340
            else
341
            {
342
1482
              break;
343
            }
344
          }
345
8966
          else if (n1[i + j] != n2[j])
346
          {
347
4953
            break;
348
          }
349
        }
350
      }
351
    }
352
  }
353
206270
  return -1;
354
}
355
356
230913
bool StringsEntail::componentContainsBase(
357
    Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
358
{
359
230913
  Assert(n1rb.isNull());
360
230913
  Assert(n1re.isNull());
361
362
230913
  NodeManager* nm = NodeManager::currentNM();
363
364
230913
  if (n1 == n2)
365
  {
366
10095
    return true;
367
  }
368
  else
369
  {
370
220818
    if (n1.isConst() && n2.isConst())
371
    {
372
3457
      size_t len1 = Word::getLength(n1);
373
3457
      size_t len2 = Word::getLength(n2);
374
3457
      if (len2 < len1)
375
      {
376
586
        if (dir == 1)
377
        {
378
358
          if (Word::suffix(n1, len2) == n2)
379
          {
380
64
            if (computeRemainder)
381
            {
382
              n1rb = Word::prefix(n1, len1 - len2);
383
            }
384
64
            return true;
385
          }
386
        }
387
228
        else if (dir == -1)
388
        {
389
4
          if (Word::prefix(n1, len2) == n2)
390
          {
391
4
            if (computeRemainder)
392
            {
393
              n1re = Word::suffix(n1, len1 - len2);
394
            }
395
4
            return true;
396
          }
397
        }
398
        else
399
        {
400
224
          size_t f = Word::find(n1, n2);
401
224
          if (f != std::string::npos)
402
          {
403
220
            if (computeRemainder)
404
            {
405
117
              if (f > 0)
406
              {
407
86
                n1rb = Word::prefix(n1, f);
408
              }
409
117
              if (len1 > f + len2)
410
              {
411
37
                n1re = Word::suffix(n1, len1 - (f + len2));
412
              }
413
            }
414
220
            return true;
415
          }
416
        }
417
      }
418
    }
419
    else
420
    {
421
      // cases for:
422
      //   n1 = x   containing   n2 = substr( x, n2[1], n2[2] )
423
217361
      if (n2.getKind() == STRING_SUBSTR)
424
      {
425
33584
        if (n2[0] == n1)
426
        {
427
464
          bool success = true;
428
464
          Node start_pos = n2[1];
429
464
          Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
430
464
          Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
431
464
          if (dir == 1)
432
          {
433
            // To be a suffix, start + length must be greater than
434
            // or equal to the length of the string.
435
4
            success = ArithEntail::check(end_pos, len_n2s);
436
          }
437
460
          else if (dir == -1)
438
          {
439
            // To be a prefix, must literally start at 0, since
440
            //   if we knew it started at <0, it should be rewritten to "",
441
            //   if we knew it started at 0, then n2[1] should be rewritten to
442
            //   0.
443
24
            success = start_pos.isConst()
444
12
                      && start_pos.getConst<Rational>().sgn() == 0;
445
          }
446
464
          if (success)
447
          {
448
464
            if (computeRemainder)
449
            {
450
              // we can only compute the remainder if start_pos and end_pos
451
              // are known to be non-negative.
452
54
              if (!ArithEntail::check(start_pos)
453
54
                  || !ArithEntail::check(end_pos))
454
              {
455
12
                return false;
456
              }
457
6
              if (dir != -1)
458
              {
459
12
                n1rb = nm->mkNode(
460
8
                    STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
461
              }
462
6
              if (dir != 1)
463
              {
464
6
                n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
465
              }
466
            }
467
452
            return true;
468
          }
469
        }
470
      }
471
472
216897
      if (!computeRemainder && dir == 0)
473
      {
474
195591
        if (n1.getKind() == STRING_REPLACE)
475
        {
476
          // (str.contains (str.replace x y z) w) ---> true
477
          // if (str.contains x w) --> true and (str.contains z w) ---> true
478
4118
          Node xCtnW = checkContains(n1[0], n2);
479
2174
          if (!xCtnW.isNull() && xCtnW.getConst<bool>())
480
          {
481
832
            Node zCtnW = checkContains(n1[2], n2);
482
531
            if (!zCtnW.isNull() && zCtnW.getConst<bool>())
483
            {
484
230
              return true;
485
            }
486
          }
487
        }
488
      }
489
    }
490
  }
491
219836
  return false;
492
}
493
494
210479
bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
495
                                           std::vector<Node>& n2,
496
                                           std::vector<Node>& nb,
497
                                           std::vector<Node>& ne,
498
                                           int dir)
499
{
500
210479
  Assert(nb.empty());
501
210479
  Assert(ne.empty());
502
210479
  bool changed = false;
503
  // for ( forwards, backwards ) direction
504
566447
  for (unsigned r = 0; r < 2; r++)
505
  {
506
388475
    if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
507
    {
508
384481
      unsigned index0 = r == 0 ? 0 : n1.size() - 1;
509
384481
      unsigned index1 = r == 0 ? 0 : n2.size() - 1;
510
384481
      bool removeComponent = false;
511
736455
      Node n1cmp = n1[index0];
512
513
384481
      if (n1cmp.isConst() && Word::isEmpty(n1cmp))
514
      {
515
32458
        return false;
516
      }
517
518
703997
      std::vector<Node> sss;
519
703997
      std::vector<Node> sls;
520
352023
      n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
521
704046
      Trace("strings-rewrite-debug2")
522
352023
          << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
523
352023
          << ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl;
524
352023
      if (n1cmp.isConst())
525
      {
526
180086
        Node s = n1cmp;
527
90043
        size_t slen = Word::getLength(s);
528
        // overlap is an overapproximation of the number of characters
529
        // n2[index1] can match in s
530
90043
        unsigned overlap = Word::getLength(s);
531
90043
        if (n2[index1].isConst())
532
        {
533
26642
          Node t = n2[index1];
534
13321
          std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t);
535
13321
          if (ret == std::string::npos)
536
          {
537
3362
            if (n1.size() == 1)
538
            {
539
              // can remove everything
540
              //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
541
              //   str.contains( "", str.++( "ba", x ) )
542
              //   or std.contains( str.substr( "abc", x, y ), "d" ) --->
543
              //   str.contains( "", "d" )
544
36
              removeComponent = true;
545
            }
546
3326
            else if (sss.empty())  // only if not substr
547
            {
548
              // check how much overlap there is
549
              // This is used to partially strip off the endpoint
550
              // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
551
              // str.contains( str.++( "c", x ), str.++( "cd", y ) )
552
3281
              overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
553
            }
554
            // note that we cannot process substring here, since t may
555
            // match only part of s. Consider:
556
            // (str.++ "C" (str.substr "AB" x y)), "CB"
557
            // where "AB" and "CB" have no overlap, but "C" is not part of what
558
            // is matched with "AB".
559
          }
560
9959
          else if (sss.empty())  // only if not substr
561
          {
562
6749
            Assert(ret < slen);
563
            // can strip off up to the find position, e.g.
564
            // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
565
            // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
566
            // and
567
            // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
568
            // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
569
6749
            overlap = slen - ret;
570
          }
571
        }
572
        else
573
        {
574
          // inconclusive
575
        }
576
90043
        Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl;
577
        // process the overlap
578
90043
        if (overlap < slen)
579
        {
580
298
          changed = true;
581
298
          if (overlap == 0)
582
          {
583
173
            removeComponent = true;
584
          }
585
          else
586
          {
587
            // can drop the prefix (resp. suffix) from the first (resp. last)
588
            // component
589
125
            if (r == 0)
590
            {
591
82
              nb.push_back(Word::prefix(s, slen - overlap));
592
82
              n1[index0] = Word::suffix(s, overlap);
593
            }
594
            else
595
            {
596
43
              ne.push_back(Word::suffix(s, slen - overlap));
597
43
              n1[index0] = Word::prefix(s, overlap);
598
            }
599
          }
600
        }
601
      }
602
261980
      else if (n1cmp.getKind() == STRING_ITOS)
603
      {
604
437
        if (n2[index1].isConst())
605
        {
606
41
          Assert(n2[index1].getType().isString());  // string-only
607
82
          cvc5::String t = n2[index1].getConst<String>();
608
41
          if (n1.size() == 1)
609
          {
610
            // if n1.size()==1, then if n2[index1] is not a number, we can drop
611
            // the entire component
612
            //    e.g. str.contains( int.to.str(x), "123a45") --> false
613
21
            if (!t.isNumber())
614
            {
615
13
              removeComponent = true;
616
            }
617
          }
618
          else
619
          {
620
20
            const std::vector<unsigned>& tvec = t.getVec();
621
20
            Assert(tvec.size() > 0);
622
623
            // if n1.size()>1, then if the first (resp. last) character of
624
            // n2[index1]
625
            //  is not a digit, we can drop the entire component, e.g.:
626
            //    str.contains( str.++( int.to.str(x), y ), "a12") -->
627
            //    str.contains( y, "a12" )
628
            //    str.contains( str.++( y, int.to.str(x) ), "a0b") -->
629
            //    str.contains( y, "a0b" )
630
20
            unsigned i = r == 0 ? 0 : (tvec.size() - 1);
631
20
            if (!String::isDigit(tvec[i]))
632
            {
633
16
              removeComponent = true;
634
            }
635
          }
636
        }
637
      }
638
352023
      if (removeComponent)
639
      {
640
238
        Trace("strings-rewrite-debug2") << "...remove component" << std::endl;
641
        // can drop entire first (resp. last) component
642
238
        if (r == 0)
643
        {
644
158
          nb.push_back(n1[index0]);
645
158
          n1.erase(n1.begin(), n1.begin() + 1);
646
        }
647
        else
648
        {
649
80
          ne.push_back(n1[index0]);
650
80
          n1.pop_back();
651
        }
652
238
        if (n1.empty())
653
        {
654
          // if we've removed everything, just return (we will rewrite to false)
655
49
          return true;
656
        }
657
        else
658
        {
659
189
          changed = true;
660
        }
661
      }
662
    }
663
  }
664
  // TODO (#1180) : computing the maximal overlap in this function may be
665
  // important.
666
  // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
667
  // false
668
  //   ...since str.to.int(x) can contain at most 1 character from "2aaaa",
669
  //   leaving 4 characters
670
  //      which is larger that the upper bound for length of str.substr(y,0,3),
671
  //      which is 3.
672
177972
  return changed;
673
}
674
675
239451
Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
676
{
677
239451
  NodeManager* nm = NodeManager::currentNM();
678
478902
  Node ctn = nm->mkNode(STRING_CONTAINS, a, b);
679
680
239451
  if (fullRewriter)
681
  {
682
13166
    ctn = Rewriter::rewrite(ctn);
683
  }
684
  else
685
  {
686
452570
    Node prev;
687
255
    do
688
    {
689
226540
      prev = ctn;
690
226540
      ctn = d_rewriter.rewriteContains(ctn);
691
226540
    } while (prev != ctn && ctn.getKind() == STRING_CONTAINS);
692
  }
693
694
239451
  Assert(ctn.getType().isBoolean());
695
478902
  return ctn.isConst() ? ctn : Node::null();
696
}
697
698
26384
bool StringsEntail::checkNonEmpty(Node a)
699
{
700
52768
  Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
701
26384
  len = Rewriter::rewrite(len);
702
52768
  return ArithEntail::check(len, true);
703
}
704
705
48877
bool StringsEntail::checkLengthOne(Node s, bool strict)
706
{
707
48877
  NodeManager* nm = NodeManager::currentNM();
708
97754
  Node one = nm->mkConst(Rational(1));
709
97754
  Node len = nm->mkNode(STRING_LENGTH, s);
710
48877
  len = Rewriter::rewrite(len);
711
195508
  return ArithEntail::check(one, len)
712
244385
         && (!strict || ArithEntail::check(len, true));
713
}
714
715
205506
bool StringsEntail::checkMultisetSubset(Node a, Node b)
716
{
717
411012
  std::vector<Node> avec;
718
205506
  utils::getConcat(getMultisetApproximation(a), avec);
719
411012
  std::vector<Node> bvec;
720
205506
  utils::getConcat(b, bvec);
721
722
411012
  std::map<Node, unsigned> num_nconst[2];
723
411012
  std::map<Node, unsigned> num_const[2];
724
616518
  for (unsigned j = 0; j < 2; j++)
725
  {
726
411012
    std::vector<Node>& jvec = j == 0 ? avec : bvec;
727
1046536
    for (const Node& cc : jvec)
728
    {
729
635524
      if (cc.isConst())
730
      {
731
171211
        num_const[j][cc]++;
732
      }
733
      else
734
      {
735
464313
        num_nconst[j][cc]++;
736
      }
737
    }
738
  }
739
205506
  bool ms_success = true;
740
219416
  for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
741
  {
742
150258
    if (nncp.second > num_nconst[1][nncp.first])
743
    {
744
136348
      ms_success = false;
745
136348
      break;
746
    }
747
  }
748
205506
  if (ms_success)
749
  {
750
    // count the number of constant characters in the first argument
751
207474
    std::map<Node, unsigned> count_const[2];
752
138268
    std::vector<Node> chars;
753
207474
    for (unsigned j = 0; j < 2; j++)
754
    {
755
210191
      for (std::pair<const Node, unsigned>& ncp : num_const[j])
756
      {
757
143750
        Node cn = ncp.first;
758
71875
        Assert(cn.isConst());
759
143750
        std::vector<Node> cnChars = Word::getChars(cn);
760
218166
        for (const Node& ch : cnChars)
761
        {
762
146291
          count_const[j][ch] += ncp.second;
763
146291
          if (std::find(chars.begin(), chars.end(), ch) == chars.end())
764
          {
765
96625
            chars.push_back(ch);
766
          }
767
        }
768
      }
769
    }
770
138316
    Trace("strings-entail-ms-ss")
771
69158
        << "For " << a << " and " << b << " : " << std::endl;
772
165715
    for (const Node& ch : chars)
773
    {
774
96605
      Trace("strings-entail-ms-ss") << "  # occurrences of substring ";
775
96605
      Trace("strings-entail-ms-ss") << ch << " in arguments is ";
776
193210
      Trace("strings-entail-ms-ss")
777
96605
          << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
778
96605
      if (count_const[0][ch] < count_const[1][ch])
779
      {
780
48
        return true;
781
      }
782
    }
783
784
    // TODO (#1180): count the number of 2,3,4,.. character substrings
785
    // for example:
786
    // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
787
    // since the second argument contains more occurrences of "bb".
788
    // note this is orthogonal reasoning to inductive reasoning
789
    // via regular membership reduction in Liang et al CAV 2015.
790
  }
791
205458
  return false;
792
}
793
794
177950
Node StringsEntail::checkHomogeneousString(Node a)
795
{
796
355900
  std::vector<Node> avec;
797
177950
  utils::getConcat(getMultisetApproximation(a), avec);
798
799
177950
  bool cValid = false;
800
355900
  Node c;
801
269130
  for (const Node& ac : avec)
802
  {
803
183923
    if (ac.isConst())
804
    {
805
199601
      std::vector<Node> acv = Word::getChars(ac);
806
161652
      for (const Node& cc : acv)
807
      {
808
70472
        if (!cValid)
809
        {
810
48271
          cValid = true;
811
48271
          c = cc;
812
        }
813
22201
        else if (c != cc)
814
        {
815
          // Found a different character
816
17241
          return Node::null();
817
        }
818
      }
819
    }
820
    else
821
    {
822
      // Could produce a different character
823
75502
      return Node::null();
824
    }
825
  }
826
827
85207
  if (!cValid)
828
  {
829
58929
    return Word::mkEmptyWord(a.getType());
830
  }
831
832
26278
  return c;
833
}
834
835
580290
Node StringsEntail::getMultisetApproximation(Node a)
836
{
837
580290
  NodeManager* nm = NodeManager::currentNM();
838
580290
  if (a.getKind() == STRING_SUBSTR)
839
  {
840
77704
    return a[0];
841
  }
842
502586
  else if (a.getKind() == STRING_REPLACE)
843
  {
844
8197
    return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
845
  }
846
494389
  else if (a.getKind() == STRING_CONCAT)
847
  {
848
136202
    NodeBuilder nb(STRING_CONCAT);
849
256738
    for (const Node& ac : a)
850
    {
851
188637
      nb << getMultisetApproximation(ac);
852
    }
853
68101
    return nb.constructNode();
854
  }
855
  else
856
  {
857
426288
    return a;
858
  }
859
}
860
861
141074
Node StringsEntail::getStringOrEmpty(Node n)
862
{
863
141074
  Node res;
864
423262
  while (res.isNull())
865
  {
866
141094
    switch (n.getKind())
867
    {
868
622
      case STRING_REPLACE:
869
      {
870
622
        if (Word::isEmpty(n[0]))
871
        {
872
          // (str.replace "" x y) --> y
873
20
          n = n[2];
874
20
          break;
875
        }
876
877
602
        if (checkLengthOne(n[0]) && Word::isEmpty(n[2]))
878
        {
879
          // (str.replace "A" x "") --> "A"
880
36
          res = n[0];
881
36
          break;
882
        }
883
884
566
        res = n;
885
566
        break;
886
      }
887
7985
      case STRING_SUBSTR:
888
      {
889
7985
        if (checkLengthOne(n[0]))
890
        {
891
          // (str.substr "A" x y) --> "A"
892
563
          res = n[0];
893
563
          break;
894
        }
895
7422
        res = n;
896
7422
        break;
897
      }
898
132487
      default:
899
      {
900
132487
        res = n;
901
132487
        break;
902
      }
903
    }
904
  }
905
141074
  return res;
906
}
907
908
2265
Node StringsEntail::inferEqsFromContains(Node x, Node y)
909
{
910
2265
  NodeManager* nm = NodeManager::currentNM();
911
4530
  Node emp = Word::mkEmptyWord(x.getType());
912
2265
  Assert(x.getType() == y.getType());
913
4530
  TypeNode stype = x.getType();
914
915
4530
  Node xLen = nm->mkNode(STRING_LENGTH, x);
916
4530
  std::vector<Node> yLens;
917
2265
  if (y.getKind() != STRING_CONCAT)
918
  {
919
92
    yLens.push_back(nm->mkNode(STRING_LENGTH, y));
920
  }
921
  else
922
  {
923
7946
    for (const Node& yi : y)
924
    {
925
5773
      yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
926
    }
927
  }
928
929
4530
  std::vector<Node> zeroLens;
930
2265
  if (x == emp)
931
  {
932
    // If x is the empty string, then all ys must be empty, too, and we can
933
    // skip the expensive checks. Note that this is just a performance
934
    // optimization.
935
426
    zeroLens.swap(yLens);
936
  }
937
  else
938
  {
939
    // Check if we can infer that str.len(x) <= str.len(y). If that is the
940
    // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
941
    // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
942
    // true. The terms that can have length zero without making the inequality
943
    // false must be all be empty if (str.contains x y) is true.
944
1839
    if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens))
945
    {
946
      // We could not prove that the inequality holds
947
532
      return Node::null();
948
    }
949
1307
    else if (yLens.size() == y.getNumChildren())
950
    {
951
      // We could only prove that the inequality holds but not that any of the
952
      // ys must be empty
953
563
      return nm->mkNode(EQUAL, x, y);
954
    }
955
  }
956
957
1170
  if (y.getKind() != STRING_CONCAT)
958
  {
959
8
    if (zeroLens.size() == 1)
960
    {
961
      // y is not a concatenation and we found that it must be empty, so just
962
      // return (= y "")
963
      Assert(zeroLens[0][0] == y);
964
      return nm->mkNode(EQUAL, y, emp);
965
    }
966
    else
967
    {
968
8
      Assert(yLens.size() == 1 && yLens[0][0] == y);
969
8
      return nm->mkNode(EQUAL, x, y);
970
    }
971
  }
972
973
2324
  std::vector<Node> cs;
974
2039
  for (const Node& yiLen : yLens)
975
  {
976
877
    Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
977
877
    cs.push_back(yiLen[0]);
978
  }
979
980
2324
  NodeBuilder nb(AND);
981
  // (= x (str.++ y1' ... ym'))
982
1162
  if (!cs.empty())
983
  {
984
736
    nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
985
  }
986
  // (= y1'' "") ... (= yk'' "")
987
3527
  for (const Node& zeroLen : zeroLens)
988
  {
989
2365
    Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
990
2365
    nb << nm->mkNode(EQUAL, zeroLen[0], emp);
991
  }
992
993
  // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
994
1162
  return nb.constructNode();
995
}
996
997
}  // namespace strings
998
}  // namespace theory
999
29280
}  // namespace cvc5