GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/strings_entail.cpp Lines: 422 445 94.8 %
Date: 2021-11-07 Branches: 942 2027 46.5 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving strings.
14
 */
15
16
#include "theory/strings/strings_entail.h"
17
18
#include "expr/node_builder.h"
19
#include "theory/rewriter.h"
20
#include "theory/strings/arith_entail.h"
21
#include "theory/strings/sequences_rewriter.h"
22
#include "theory/strings/theory_strings_utils.h"
23
#include "theory/strings/word.h"
24
#include "util/rational.h"
25
#include "util/string.h"
26
27
using namespace cvc5::kind;
28
29
namespace cvc5 {
30
namespace theory {
31
namespace strings {
32
33
15667
StringsEntail::StringsEntail(Rewriter* r,
34
                             ArithEntail& aent,
35
15667
                             SequencesRewriter& rewriter)
36
15667
    : d_rr(r), d_arithEntail(aent), d_rewriter(rewriter)
37
{
38
15667
}
39
40
791
bool StringsEntail::canConstantContainConcat(Node c,
41
                                             Node n,
42
                                             int& firstc,
43
                                             int& lastc)
44
{
45
791
  Assert(c.isConst());
46
791
  Assert(n.getKind() == STRING_CONCAT);
47
  // must find constant components in order
48
791
  size_t pos = 0;
49
791
  firstc = -1;
50
791
  lastc = -1;
51
2823
  for (unsigned i = 0; i < n.getNumChildren(); i++)
52
  {
53
2137
    if (n[i].isConst())
54
    {
55
477
      firstc = firstc == -1 ? i : firstc;
56
477
      lastc = i;
57
477
      size_t new_pos = Word::find(c, n[i], pos);
58
477
      if (new_pos == std::string::npos)
59
      {
60
97
        return false;
61
      }
62
      else
63
      {
64
380
        pos = new_pos + Word::getLength(n[i]);
65
      }
66
    }
67
1660
    else if (n[i].getKind() == STRING_ITOS && d_arithEntail.check(n[i][0]))
68
    {
69
20
      Assert(c.getType().isString());  // string-only
70
20
      const std::vector<unsigned>& tvec = c.getConst<String>().getVec();
71
      // find the first occurrence of a digit starting at pos
72
116
      while (pos < tvec.size() && !String::isDigit(tvec[pos]))
73
      {
74
48
        pos++;
75
      }
76
20
      if (pos == tvec.size())
77
      {
78
8
        return false;
79
      }
80
      // must consume at least one digit here
81
12
      pos++;
82
    }
83
  }
84
686
  return true;
85
}
86
87
48906
bool StringsEntail::canConstantContainList(Node c,
88
                                           std::vector<Node>& l,
89
                                           int& firstc,
90
                                           int& lastc)
91
{
92
48906
  Assert(c.isConst());
93
  // must find constant components in order
94
48906
  size_t pos = 0;
95
48906
  firstc = -1;
96
48906
  lastc = -1;
97
141552
  for (unsigned i = 0; i < l.size(); i++)
98
  {
99
93167
    if (l[i].isConst())
100
    {
101
82107
      firstc = firstc == -1 ? i : firstc;
102
82107
      lastc = i;
103
82107
      size_t new_pos = Word::find(c, l[i], pos);
104
82107
      if (new_pos == std::string::npos)
105
      {
106
521
        return false;
107
      }
108
      else
109
      {
110
81586
        pos = new_pos + Word::getLength(l[i]);
111
      }
112
    }
113
  }
114
48385
  return true;
115
}
116
117
83614
bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
118
                                        std::vector<Node>& nr,
119
                                        int dir,
120
                                        Node& curr,
121
                                        bool strict)
122
{
123
83614
  Assert(dir == 1 || dir == -1);
124
83614
  Assert(nr.empty());
125
83614
  NodeManager* nm = NodeManager::currentNM();
126
167228
  Node zero = nm->mkConst(cvc5::Rational(0));
127
83614
  bool ret = false;
128
83614
  bool success = true;
129
83614
  unsigned sindex = 0;
130
235318
  while (success && curr != zero && sindex < n1.size())
131
  {
132
75852
    Assert(!curr.isNull());
133
75852
    success = false;
134
75852
    unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
135
75852
    if (n1[sindex_use].isConst())
136
    {
137
      // could strip part of a constant
138
41998
      Node lowerBound = d_arithEntail.getConstantBound(d_rr->rewrite(curr));
139
20999
      if (!lowerBound.isNull())
140
      {
141
7209
        Assert(lowerBound.isConst());
142
14418
        Rational lbr = lowerBound.getConst<Rational>();
143
7209
        if (lbr.sgn() > 0)
144
        {
145
4956
          Assert(d_arithEntail.check(curr, true));
146
9912
          Node s = n1[sindex_use];
147
4956
          size_t slen = Word::getLength(s);
148
9912
          Node ncl = nm->mkConst(cvc5::Rational(slen));
149
9912
          Node next_s = nm->mkNode(MINUS, lowerBound, ncl);
150
4956
          next_s = d_rr->rewrite(next_s);
151
4956
          Assert(next_s.isConst());
152
          // we can remove the entire constant
153
4956
          if (next_s.getConst<Rational>().sgn() >= 0)
154
          {
155
2575
            curr = d_rr->rewrite(nm->mkNode(MINUS, curr, ncl));
156
2575
            success = true;
157
2575
            sindex++;
158
          }
159
          else
160
          {
161
            // we can remove part of the constant
162
            // lower bound minus the length of a concrete string is negative,
163
            // hence lowerBound cannot be larger than long max
164
2381
            Assert(lbr < Rational(String::maxSize()));
165
2381
            curr = d_rr->rewrite(nm->mkNode(MINUS, curr, lowerBound));
166
2381
            uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
167
2381
            Assert(lbsize < slen);
168
2381
            if (dir == 1)
169
            {
170
              // strip partially from the front
171
2211
              nr.push_back(Word::prefix(s, lbsize));
172
2211
              n1[sindex_use] = Word::suffix(s, slen - lbsize);
173
            }
174
            else
175
            {
176
              // strip partially from the back
177
170
              nr.push_back(Word::suffix(s, lbsize));
178
170
              n1[sindex_use] = Word::prefix(s, slen - lbsize);
179
            }
180
2381
            ret = true;
181
          }
182
4956
          Assert(d_arithEntail.check(curr));
183
        }
184
        else
185
        {
186
          // we cannot remove the constant
187
        }
188
      }
189
    }
190
    else
191
    {
192
      Node next_s = NodeManager::currentNM()->mkNode(
193
          MINUS,
194
          curr,
195
109706
          NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
196
54853
      next_s = d_rr->rewrite(next_s);
197
54853
      if (d_arithEntail.check(next_s))
198
      {
199
1267
        success = true;
200
1267
        curr = next_s;
201
1267
        sindex++;
202
      }
203
    }
204
  }
205
83614
  if (sindex > 0 && (!strict || curr == zero))
206
  {
207
3136
    if (dir == 1)
208
    {
209
2839
      nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
210
2839
      n1.erase(n1.begin(), n1.begin() + sindex);
211
    }
212
    else
213
    {
214
297
      nr.insert(nr.end(), n1.end() - sindex, n1.end());
215
297
      n1.erase(n1.end() - sindex, n1.end());
216
    }
217
3136
    ret = true;
218
  }
219
167228
  return ret;
220
}
221
222
109281
int StringsEntail::componentContains(std::vector<Node>& n1,
223
                                     std::vector<Node>& n2,
224
                                     std::vector<Node>& nb,
225
                                     std::vector<Node>& ne,
226
                                     bool computeRemainder,
227
                                     int remainderDir)
228
{
229
109281
  Assert(nb.empty());
230
109281
  Assert(ne.empty());
231
109281
  Trace("strings-entail") << "Component contains: " << std::endl;
232
109281
  Trace("strings-entail") << "n1 = " << n1 << std::endl;
233
109281
  Trace("strings-entail") << "n2 = " << n2 << std::endl;
234
  // if n2 is a singleton, we can do optimized version here
235
109281
  if (n2.size() == 1)
236
  {
237
215152
    for (unsigned i = 0; i < n1.size(); i++)
238
    {
239
219372
      Node n1rb;
240
219372
      Node n1re;
241
111200
      if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
242
      {
243
3028
        if (computeRemainder)
244
        {
245
507
          n1[i] = n2[0];
246
507
          if (remainderDir != -1)
247
          {
248
507
            if (!n1re.isNull())
249
            {
250
53
              ne.push_back(n1re);
251
            }
252
507
            ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
253
507
            n1.erase(n1.begin() + i + 1, n1.end());
254
          }
255
          else if (!n1re.isNull())
256
          {
257
            n1[i] = d_rr->rewrite(
258
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
259
          }
260
507
          if (remainderDir != 1)
261
          {
262
            nb.insert(nb.end(), n1.begin(), n1.begin() + i);
263
            n1.erase(n1.begin(), n1.begin() + i);
264
            if (!n1rb.isNull())
265
            {
266
              nb.push_back(n1rb);
267
            }
268
          }
269
507
          else if (!n1rb.isNull())
270
          {
271
100
            n1[i] = d_rr->rewrite(
272
200
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
273
          }
274
        }
275
3028
        return i;
276
      }
277
    }
278
  }
279
2301
  else if (n1.size() >= n2.size())
280
  {
281
748
    unsigned diff = n1.size() - n2.size();
282
1480
    for (unsigned i = 0; i <= diff; i++)
283
    {
284
1746
      Node n1rb_first;
285
1746
      Node n1re_first;
286
      // first component of n2 must be a suffix
287
2028
      if (componentContainsBase(n1[i],
288
1014
                                n2[0],
289
                                n1rb_first,
290
                                n1re_first,
291
                                1,
292
1014
                                computeRemainder && remainderDir != 1))
293
      {
294
313
        Assert(n1re_first.isNull());
295
512
        for (unsigned j = 1; j < n2.size(); j++)
296
        {
297
          // are we in the last component?
298
512
          if (j + 1 == n2.size())
299
          {
300
303
            Node n1rb_last;
301
303
            Node n1re_last;
302
            // last component of n2 must be a prefix
303
606
            if (componentContainsBase(n1[i + j],
304
303
                                      n2[j],
305
                                      n1rb_last,
306
                                      n1re_last,
307
                                      -1,
308
303
                                      computeRemainder && remainderDir != -1))
309
            {
310
564
              Trace("strings-entail-debug")
311
282
                  << "Last remainder begin is " << n1rb_last << std::endl;
312
564
              Trace("strings-entail-debug")
313
282
                  << "Last remainder end is " << n1re_last << std::endl;
314
282
              Assert(n1rb_last.isNull());
315
282
              if (computeRemainder)
316
              {
317
144
                if (remainderDir != -1)
318
                {
319
144
                  if (!n1re_last.isNull())
320
                  {
321
2
                    ne.push_back(n1re_last);
322
                  }
323
144
                  ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
324
144
                  n1.erase(n1.begin() + i + j + 1, n1.end());
325
144
                  n1[i + j] = n2[j];
326
                }
327
144
                if (remainderDir != 1)
328
                {
329
                  n1[i] = n2[0];
330
                  nb.insert(nb.end(), n1.begin(), n1.begin() + i);
331
                  n1.erase(n1.begin(), n1.begin() + i);
332
                  if (!n1rb_first.isNull())
333
                  {
334
                    nb.push_back(n1rb_first);
335
                  }
336
                }
337
              }
338
282
              Trace("strings-entail-debug") << "ne = " << ne << std::endl;
339
282
              Trace("strings-entail-debug") << "nb = " << nb << std::endl;
340
282
              Trace("strings-entail-debug") << "...return " << i << std::endl;
341
282
              return i;
342
            }
343
            else
344
            {
345
21
              break;
346
            }
347
          }
348
209
          else if (n1[i + j] != n2[j])
349
          {
350
10
            break;
351
          }
352
        }
353
      }
354
    }
355
  }
356
105971
  return -1;
357
}
358
359
112517
bool StringsEntail::componentContainsBase(
360
    Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
361
{
362
112517
  Assert(n1rb.isNull());
363
112517
  Assert(n1re.isNull());
364
365
112517
  NodeManager* nm = NodeManager::currentNM();
366
367
112517
  if (n1 == n2)
368
  {
369
1324
    return true;
370
  }
371
  else
372
  {
373
111193
    if (n1.isConst() && n2.isConst())
374
    {
375
674
      size_t len1 = Word::getLength(n1);
376
674
      size_t len2 = Word::getLength(n2);
377
674
      if (len2 < len1)
378
      {
379
262
        if (dir == 1)
380
        {
381
20
          if (Word::suffix(n1, len2) == n2)
382
          {
383
20
            if (computeRemainder)
384
            {
385
              n1rb = Word::prefix(n1, len1 - len2);
386
            }
387
20
            return true;
388
          }
389
        }
390
242
        else if (dir == -1)
391
        {
392
4
          if (Word::prefix(n1, len2) == n2)
393
          {
394
4
            if (computeRemainder)
395
            {
396
              n1re = Word::suffix(n1, len1 - len2);
397
            }
398
4
            return true;
399
          }
400
        }
401
        else
402
        {
403
238
          size_t f = Word::find(n1, n2);
404
238
          if (f != std::string::npos)
405
          {
406
222
            if (computeRemainder)
407
            {
408
133
              if (f > 0)
409
              {
410
96
                n1rb = Word::prefix(n1, f);
411
              }
412
133
              if (len1 > f + len2)
413
              {
414
49
                n1re = Word::suffix(n1, len1 - (f + len2));
415
              }
416
            }
417
222
            return true;
418
          }
419
        }
420
      }
421
    }
422
    else
423
    {
424
      // cases for:
425
      //   n1 = x   containing   n2 = substr( x, n2[1], n2[2] )
426
110519
      if (n2.getKind() == STRING_SUBSTR)
427
      {
428
62149
        if (n2[0] == n1)
429
        {
430
835
          bool success = true;
431
835
          Node start_pos = n2[1];
432
835
          Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
433
835
          Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
434
835
          if (dir == 1)
435
          {
436
            // To be a suffix, start + length must be greater than
437
            // or equal to the length of the string.
438
4
            success = d_arithEntail.check(end_pos, len_n2s);
439
          }
440
831
          else if (dir == -1)
441
          {
442
            // To be a prefix, must literally start at 0, since
443
            //   if we knew it started at <0, it should be rewritten to "",
444
            //   if we knew it started at 0, then n2[1] should be rewritten to
445
            //   0.
446
16
            success = start_pos.isConst()
447
8
                      && start_pos.getConst<Rational>().sgn() == 0;
448
          }
449
835
          if (success)
450
          {
451
835
            if (computeRemainder)
452
            {
453
              // we can only compute the remainder if start_pos and end_pos
454
              // are known to be non-negative.
455
54
              if (!d_arithEntail.check(start_pos)
456
54
                  || !d_arithEntail.check(end_pos))
457
              {
458
12
                return false;
459
              }
460
6
              if (dir != -1)
461
              {
462
12
                n1rb = nm->mkNode(
463
8
                    STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
464
              }
465
6
              if (dir != 1)
466
              {
467
6
                n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
468
              }
469
            }
470
823
            return true;
471
          }
472
        }
473
      }
474
475
109684
      if (!computeRemainder && dir == 0)
476
      {
477
108038
        if (n1.getKind() == STRING_REPLACE)
478
        {
479
          // (str.contains (str.replace x y z) w) ---> true
480
          // if (str.contains x w) --> true and (str.contains z w) ---> true
481
3400
          Node xCtnW = checkContains(n1[0], n2);
482
2315
          if (!xCtnW.isNull() && xCtnW.getConst<bool>())
483
          {
484
1780
            Node zCtnW = checkContains(n1[2], n2);
485
1505
            if (!zCtnW.isNull() && zCtnW.getConst<bool>())
486
            {
487
1230
              return true;
488
            }
489
          }
490
        }
491
      }
492
    }
493
  }
494
108882
  return false;
495
}
496
497
107548
bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
498
                                           std::vector<Node>& n2,
499
                                           std::vector<Node>& nb,
500
                                           std::vector<Node>& ne,
501
                                           int dir)
502
{
503
107548
  Assert(nb.empty());
504
107548
  Assert(ne.empty());
505
107548
  bool changed = false;
506
  // for ( forwards, backwards ) direction
507
252054
  for (unsigned r = 0; r < 2; r++)
508
  {
509
179814
    if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
510
    {
511
178532
      unsigned index0 = r == 0 ? 0 : n1.size() - 1;
512
178532
      unsigned index1 = r == 0 ? 0 : n2.size() - 1;
513
178532
      bool removeComponent = false;
514
321756
      Node n1cmp = n1[index0];
515
516
178532
      if (n1cmp.isConst() && Word::isEmpty(n1cmp))
517
      {
518
35236
        return false;
519
      }
520
521
286520
      std::vector<Node> sss;
522
286520
      std::vector<Node> sls;
523
143296
      n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
524
286592
      Trace("strings-rewrite-debug2")
525
143296
          << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
526
143296
          << ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl;
527
143296
      if (n1cmp.isConst())
528
      {
529
156556
        Node s = n1cmp;
530
78278
        size_t slen = Word::getLength(s);
531
        // overlap is an overapproximation of the number of characters
532
        // n2[index1] can match in s
533
78278
        unsigned overlap = Word::getLength(s);
534
78278
        if (n2[index1].isConst())
535
        {
536
23414
          Node t = n2[index1];
537
11707
          std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t);
538
11707
          if (ret == std::string::npos)
539
          {
540
404
            if (n1.size() == 1)
541
            {
542
              // can remove everything
543
              //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
544
              //   str.contains( "", str.++( "ba", x ) )
545
              //   or std.contains( str.substr( "abc", x, y ), "d" ) --->
546
              //   str.contains( "", "d" )
547
18
              removeComponent = true;
548
            }
549
386
            else if (sss.empty())  // only if not substr
550
            {
551
              // check how much overlap there is
552
              // This is used to partially strip off the endpoint
553
              // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
554
              // str.contains( str.++( "c", x ), str.++( "cd", y ) )
555
374
              overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
556
            }
557
            // note that we cannot process substring here, since t may
558
            // match only part of s. Consider:
559
            // (str.++ "C" (str.substr "AB" x y)), "CB"
560
            // where "AB" and "CB" have no overlap, but "C" is not part of what
561
            // is matched with "AB".
562
          }
563
11303
          else if (sss.empty())  // only if not substr
564
          {
565
659
            Assert(ret < slen);
566
            // can strip off up to the find position, e.g.
567
            // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
568
            // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
569
            // and
570
            // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
571
            // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
572
659
            overlap = slen - ret;
573
          }
574
        }
575
        else
576
        {
577
          // inconclusive
578
        }
579
78278
        Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl;
580
        // process the overlap
581
78278
        if (overlap < slen)
582
        {
583
184
          changed = true;
584
184
          if (overlap == 0)
585
          {
586
134
            removeComponent = true;
587
          }
588
          else
589
          {
590
            // can drop the prefix (resp. suffix) from the first (resp. last)
591
            // component
592
50
            if (r == 0)
593
            {
594
18
              nb.push_back(Word::prefix(s, slen - overlap));
595
18
              n1[index0] = Word::suffix(s, overlap);
596
            }
597
            else
598
            {
599
32
              ne.push_back(Word::suffix(s, slen - overlap));
600
32
              n1[index0] = Word::prefix(s, overlap);
601
            }
602
          }
603
        }
604
      }
605
65018
      else if (n1cmp.getKind() == STRING_ITOS)
606
      {
607
322
        if (n2[index1].isConst())
608
        {
609
228
          Assert(n2[index1].getType().isString());  // string-only
610
456
          cvc5::String t = n2[index1].getConst<String>();
611
228
          if (n1.size() == 1)
612
          {
613
            // if n1.size()==1, then if n2[index1] is not a number, we can drop
614
            // the entire component
615
            //    e.g. str.contains( int.to.str(x), "123a45") --> false
616
218
            if (!t.isNumber())
617
            {
618
54
              removeComponent = true;
619
            }
620
          }
621
          else
622
          {
623
10
            const std::vector<unsigned>& tvec = t.getVec();
624
10
            Assert(tvec.size() > 0);
625
626
            // if n1.size()>1, then if the first (resp. last) character of
627
            // n2[index1]
628
            //  is not a digit, we can drop the entire component, e.g.:
629
            //    str.contains( str.++( int.to.str(x), y ), "a12") -->
630
            //    str.contains( y, "a12" )
631
            //    str.contains( str.++( y, int.to.str(x) ), "a0b") -->
632
            //    str.contains( y, "a0b" )
633
10
            unsigned i = r == 0 ? 0 : (tvec.size() - 1);
634
10
            if (!String::isDigit(tvec[i]))
635
            {
636
8
              removeComponent = true;
637
            }
638
          }
639
        }
640
      }
641
143296
      if (removeComponent)
642
      {
643
214
        Trace("strings-rewrite-debug2") << "...remove component" << std::endl;
644
        // can drop entire first (resp. last) component
645
214
        if (r == 0)
646
        {
647
147
          nb.push_back(n1[index0]);
648
147
          n1.erase(n1.begin(), n1.begin() + 1);
649
        }
650
        else
651
        {
652
67
          ne.push_back(n1[index0]);
653
67
          n1.pop_back();
654
        }
655
214
        if (n1.empty())
656
        {
657
          // if we've removed everything, just return (we will rewrite to false)
658
72
          return true;
659
        }
660
        else
661
        {
662
142
          changed = true;
663
        }
664
      }
665
    }
666
  }
667
  // TODO (#1180) : computing the maximal overlap in this function may be
668
  // important.
669
  // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
670
  // false
671
  //   ...since str.to.int(x) can contain at most 1 character from "2aaaa",
672
  //   leaving 4 characters
673
  //      which is larger that the upper bound for length of str.substr(y,0,3),
674
  //      which is 3.
675
72240
  return changed;
676
}
677
678
12931
Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
679
{
680
12931
  NodeManager* nm = NodeManager::currentNM();
681
25862
  Node ctn = nm->mkNode(STRING_CONTAINS, a, b);
682
683
12931
  if (fullRewriter)
684
  {
685
12931
    ctn = d_rr->rewrite(ctn);
686
  }
687
  else
688
  {
689
    Node prev;
690
    do
691
    {
692
      prev = ctn;
693
      ctn = d_rewriter.rewriteContains(ctn);
694
      if (ctn != prev)
695
      {
696
        ctn = d_rewriter.postProcessRewrite(prev, ctn);
697
      }
698
    } while (prev != ctn && ctn.getKind() == STRING_CONTAINS);
699
  }
700
701
12931
  Assert(ctn.getType().isBoolean());
702
25862
  return ctn.isConst() ? ctn : Node::null();
703
}
704
705
35811
bool StringsEntail::checkNonEmpty(Node a)
706
{
707
71622
  Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
708
35811
  len = d_rr->rewrite(len);
709
71622
  return d_arithEntail.check(len, true);
710
}
711
712
68171
bool StringsEntail::checkLengthOne(Node s, bool strict)
713
{
714
68171
  NodeManager* nm = NodeManager::currentNM();
715
136342
  Node one = nm->mkConst(Rational(1));
716
136342
  Node len = nm->mkNode(STRING_LENGTH, s);
717
68171
  len = d_rr->rewrite(len);
718
272684
  return d_arithEntail.check(one, len)
719
340855
         && (!strict || d_arithEntail.check(len, true));
720
}
721
722
104636
bool StringsEntail::checkMultisetSubset(Node a, Node b)
723
{
724
209272
  std::vector<Node> avec;
725
104636
  utils::getConcat(getMultisetApproximation(a), avec);
726
209272
  std::vector<Node> bvec;
727
104636
  utils::getConcat(b, bvec);
728
729
209272
  std::map<Node, unsigned> num_nconst[2];
730
209272
  std::map<Node, unsigned> num_const[2];
731
313908
  for (unsigned j = 0; j < 2; j++)
732
  {
733
209272
    std::vector<Node>& jvec = j == 0 ? avec : bvec;
734
437176
    for (const Node& cc : jvec)
735
    {
736
227904
      if (cc.isConst())
737
      {
738
115316
        num_const[j][cc]++;
739
      }
740
      else
741
      {
742
112588
        num_nconst[j][cc]++;
743
      }
744
    }
745
  }
746
104636
  bool ms_success = true;
747
105705
  for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
748
  {
749
32382
    if (nncp.second > num_nconst[1][nncp.first])
750
    {
751
31313
      ms_success = false;
752
31313
      break;
753
    }
754
  }
755
104636
  if (ms_success)
756
  {
757
    // count the number of constant characters in the first argument
758
219969
    std::map<Node, unsigned> count_const[2];
759
146626
    std::vector<Node> chars;
760
219969
    for (unsigned j = 0; j < 2; j++)
761
    {
762
225773
      for (std::pair<const Node, unsigned>& ncp : num_const[j])
763
      {
764
158254
        Node cn = ncp.first;
765
79127
        Assert(cn.isConst());
766
158254
        std::vector<Node> cnChars = Word::getChars(cn);
767
208069
        for (const Node& ch : cnChars)
768
        {
769
128942
          count_const[j][ch] += ncp.second;
770
128942
          if (std::find(chars.begin(), chars.end(), ch) == chars.end())
771
          {
772
96729
            chars.push_back(ch);
773
          }
774
        }
775
      }
776
    }
777
146646
    Trace("strings-entail-ms-ss")
778
73323
        << "For " << a << " and " << b << " : " << std::endl;
779
170032
    for (const Node& ch : chars)
780
    {
781
96729
      Trace("strings-entail-ms-ss") << "  # occurrences of substring ";
782
96729
      Trace("strings-entail-ms-ss") << ch << " in arguments is ";
783
193458
      Trace("strings-entail-ms-ss")
784
96729
          << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
785
96729
      if (count_const[0][ch] < count_const[1][ch])
786
      {
787
20
        return true;
788
      }
789
    }
790
791
    // TODO (#1180): count the number of 2,3,4,.. character substrings
792
    // for example:
793
    // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
794
    // since the second argument contains more occurrences of "bb".
795
    // note this is orthogonal reasoning to inductive reasoning
796
    // via regular membership reduction in Liang et al CAV 2015.
797
  }
798
104616
  return false;
799
}
800
801
129506
Node StringsEntail::checkHomogeneousString(Node a)
802
{
803
259012
  std::vector<Node> avec;
804
129506
  utils::getConcat(getMultisetApproximation(a), avec);
805
806
129506
  bool cValid = false;
807
259012
  Node c;
808
196680
  for (const Node& ac : avec)
809
  {
810
134793
    if (ac.isConst())
811
    {
812
149652
      std::vector<Node> acv = Word::getChars(ac);
813
133223
      for (const Node& cc : acv)
814
      {
815
66049
        if (!cValid)
816
        {
817
46556
          cValid = true;
818
46556
          c = cc;
819
        }
820
19493
        else if (c != cc)
821
        {
822
          // Found a different character
823
15304
          return Node::null();
824
        }
825
      }
826
    }
827
    else
828
    {
829
      // Could produce a different character
830
52315
      return Node::null();
831
    }
832
  }
833
834
61887
  if (!cValid)
835
  {
836
35042
    return Word::mkEmptyWord(a.getType());
837
  }
838
839
26845
  return c;
840
}
841
842
259426
Node StringsEntail::getMultisetApproximation(Node a)
843
{
844
259426
  NodeManager* nm = NodeManager::currentNM();
845
259426
  if (a.getKind() == STRING_SUBSTR)
846
  {
847
86051
    return a[0];
848
  }
849
173375
  else if (a.getKind() == STRING_REPLACE)
850
  {
851
5266
    return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
852
  }
853
168109
  else if (a.getKind() == STRING_CONCAT)
854
  {
855
17946
    NodeBuilder nb(STRING_CONCAT);
856
28991
    for (const Node& ac : a)
857
    {
858
20018
      nb << getMultisetApproximation(ac);
859
    }
860
8973
    return nb.constructNode();
861
  }
862
  else
863
  {
864
159136
    return a;
865
  }
866
}
867
868
175306
Node StringsEntail::getStringOrEmpty(Node n)
869
{
870
175306
  Node res;
871
525954
  while (res.isNull())
872
  {
873
175324
    switch (n.getKind())
874
    {
875
1681
      case STRING_REPLACE:
876
      {
877
1681
        if (Word::isEmpty(n[0]))
878
        {
879
          // (str.replace "" x y) --> y
880
18
          n = n[2];
881
18
          break;
882
        }
883
1663
        if (checkLengthOne(n[0]) && Word::isEmpty(n[2]))
884
        {
885
          // (str.replace "A" x "") --> "A"
886
60
          res = n[0];
887
60
          break;
888
        }
889
890
1603
        res = n;
891
1603
        break;
892
      }
893
11945
      case STRING_SUBSTR:
894
      {
895
11945
        if (checkLengthOne(n[0]))
896
        {
897
          // (str.substr "A" x y) --> "A"
898
619
          res = n[0];
899
619
          break;
900
        }
901
11326
        res = n;
902
11326
        break;
903
      }
904
161698
      default:
905
      {
906
161698
        res = n;
907
161698
        break;
908
      }
909
    }
910
  }
911
175306
  return res;
912
}
913
914
1035
Node StringsEntail::inferEqsFromContains(Node x, Node y)
915
{
916
1035
  NodeManager* nm = NodeManager::currentNM();
917
2070
  Node emp = Word::mkEmptyWord(x.getType());
918
1035
  Assert(x.getType() == y.getType());
919
2070
  TypeNode stype = x.getType();
920
921
2070
  Node xLen = nm->mkNode(STRING_LENGTH, x);
922
2070
  std::vector<Node> yLens;
923
1035
  if (y.getKind() != STRING_CONCAT)
924
  {
925
134
    yLens.push_back(nm->mkNode(STRING_LENGTH, y));
926
  }
927
  else
928
  {
929
3355
    for (const Node& yi : y)
930
    {
931
2454
      yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
932
    }
933
  }
934
935
2070
  std::vector<Node> zeroLens;
936
1035
  if (x == emp)
937
  {
938
    // If x is the empty string, then all ys must be empty, too, and we can
939
    // skip the expensive checks. Note that this is just a performance
940
    // optimization.
941
101
    zeroLens.swap(yLens);
942
  }
943
  else
944
  {
945
    // Check if we can infer that str.len(x) <= str.len(y). If that is the
946
    // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
947
    // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
948
    // true. The terms that can have length zero without making the inequality
949
    // false must be all be empty if (str.contains x y) is true.
950
934
    if (!d_arithEntail.inferZerosInSumGeq(xLen, yLens, zeroLens))
951
    {
952
      // We could not prove that the inequality holds
953
830
      return Node::null();
954
    }
955
104
    else if (yLens.size() == y.getNumChildren())
956
    {
957
      // We could only prove that the inequality holds but not that any of the
958
      // ys must be empty
959
49
      return nm->mkNode(EQUAL, x, y);
960
    }
961
  }
962
963
156
  if (y.getKind() != STRING_CONCAT)
964
  {
965
24
    if (zeroLens.size() == 1)
966
    {
967
      // y is not a concatenation and we found that it must be empty, so just
968
      // return (= y "")
969
      Assert(zeroLens[0][0] == y);
970
      return nm->mkNode(EQUAL, y, emp);
971
    }
972
    else
973
    {
974
24
      Assert(yLens.size() == 1 && yLens[0][0] == y);
975
24
      return nm->mkNode(EQUAL, x, y);
976
    }
977
  }
978
979
264
  std::vector<Node> cs;
980
173
  for (const Node& yiLen : yLens)
981
  {
982
41
    Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
983
41
    cs.push_back(yiLen[0]);
984
  }
985
986
264
  NodeBuilder nb(AND);
987
  // (= x (str.++ y1' ... ym'))
988
132
  if (!cs.empty())
989
  {
990
31
    nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
991
  }
992
  // (= y1'' "") ... (= yk'' "")
993
406
  for (const Node& zeroLen : zeroLens)
994
  {
995
274
    Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
996
274
    nb << nm->mkNode(EQUAL, zeroLen[0], emp);
997
  }
998
999
  // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
1000
132
  return nb.constructNode();
1001
}
1002
1003
}  // namespace strings
1004
}  // namespace theory
1005
31137
}  // namespace cvc5