GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/strings_entail.cpp Lines: 420 443 94.8 %
Date: 2021-08-14 Branches: 942 2027 46.5 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Aina Niemetz
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of entailment tests involving strings.
14
 */
15
16
#include "theory/strings/strings_entail.h"
17
18
#include "expr/node_builder.h"
19
#include "theory/rewriter.h"
20
#include "theory/strings/arith_entail.h"
21
#include "theory/strings/sequences_rewriter.h"
22
#include "theory/strings/theory_strings_utils.h"
23
#include "theory/strings/word.h"
24
#include "util/rational.h"
25
#include "util/string.h"
26
27
using namespace cvc5::kind;
28
29
namespace cvc5 {
30
namespace theory {
31
namespace strings {
32
33
10068
StringsEntail::StringsEntail(SequencesRewriter& rewriter) : d_rewriter(rewriter)
34
{
35
10068
}
36
37
724
bool StringsEntail::canConstantContainConcat(Node c,
38
                                             Node n,
39
                                             int& firstc,
40
                                             int& lastc)
41
{
42
724
  Assert(c.isConst());
43
724
  Assert(n.getKind() == STRING_CONCAT);
44
  // must find constant components in order
45
724
  size_t pos = 0;
46
724
  firstc = -1;
47
724
  lastc = -1;
48
2647
  for (unsigned i = 0; i < n.getNumChildren(); i++)
49
  {
50
1948
    if (n[i].isConst())
51
    {
52
386
      firstc = firstc == -1 ? i : firstc;
53
386
      lastc = i;
54
386
      size_t new_pos = Word::find(c, n[i], pos);
55
386
      if (new_pos == std::string::npos)
56
      {
57
17
        return false;
58
      }
59
      else
60
      {
61
369
        pos = new_pos + Word::getLength(n[i]);
62
      }
63
    }
64
1562
    else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
65
    {
66
20
      Assert(c.getType().isString());  // string-only
67
20
      const std::vector<unsigned>& tvec = c.getConst<String>().getVec();
68
      // find the first occurrence of a digit starting at pos
69
116
      while (pos < tvec.size() && !String::isDigit(tvec[pos]))
70
      {
71
48
        pos++;
72
      }
73
20
      if (pos == tvec.size())
74
      {
75
8
        return false;
76
      }
77
      // must consume at least one digit here
78
12
      pos++;
79
    }
80
  }
81
699
  return true;
82
}
83
84
46598
bool StringsEntail::canConstantContainList(Node c,
85
                                           std::vector<Node>& l,
86
                                           int& firstc,
87
                                           int& lastc)
88
{
89
46598
  Assert(c.isConst());
90
  // must find constant components in order
91
46598
  size_t pos = 0;
92
46598
  firstc = -1;
93
46598
  lastc = -1;
94
135324
  for (unsigned i = 0; i < l.size(); i++)
95
  {
96
89225
    if (l[i].isConst())
97
    {
98
78226
      firstc = firstc == -1 ? i : firstc;
99
78226
      lastc = i;
100
78226
      size_t new_pos = Word::find(c, l[i], pos);
101
78226
      if (new_pos == std::string::npos)
102
      {
103
499
        return false;
104
      }
105
      else
106
      {
107
77727
        pos = new_pos + Word::getLength(l[i]);
108
      }
109
    }
110
  }
111
46099
  return true;
112
}
113
114
81162
bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
115
                                        std::vector<Node>& nr,
116
                                        int dir,
117
                                        Node& curr,
118
                                        bool strict)
119
{
120
81162
  Assert(dir == 1 || dir == -1);
121
81162
  Assert(nr.empty());
122
81162
  NodeManager* nm = NodeManager::currentNM();
123
162324
  Node zero = nm->mkConst(cvc5::Rational(0));
124
81162
  bool ret = false;
125
81162
  bool success = true;
126
81162
  unsigned sindex = 0;
127
217852
  while (success && curr != zero && sindex < n1.size())
128
  {
129
68345
    Assert(!curr.isNull());
130
68345
    success = false;
131
68345
    unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
132
68345
    if (n1[sindex_use].isConst())
133
    {
134
      // could strip part of a constant
135
24974
      Node lowerBound = ArithEntail::getConstantBound(Rewriter::rewrite(curr));
136
12487
      if (!lowerBound.isNull())
137
      {
138
3925
        Assert(lowerBound.isConst());
139
7850
        Rational lbr = lowerBound.getConst<Rational>();
140
3925
        if (lbr.sgn() > 0)
141
        {
142
2228
          Assert(ArithEntail::check(curr, true));
143
4456
          Node s = n1[sindex_use];
144
2228
          size_t slen = Word::getLength(s);
145
4456
          Node ncl = nm->mkConst(cvc5::Rational(slen));
146
4456
          Node next_s = nm->mkNode(MINUS, lowerBound, ncl);
147
2228
          next_s = Rewriter::rewrite(next_s);
148
2228
          Assert(next_s.isConst());
149
          // we can remove the entire constant
150
2228
          if (next_s.getConst<Rational>().sgn() >= 0)
151
          {
152
985
            curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl));
153
985
            success = true;
154
985
            sindex++;
155
          }
156
          else
157
          {
158
            // we can remove part of the constant
159
            // lower bound minus the length of a concrete string is negative,
160
            // hence lowerBound cannot be larger than long max
161
1243
            Assert(lbr < Rational(String::maxSize()));
162
1243
            curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound));
163
1243
            uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
164
1243
            Assert(lbsize < slen);
165
1243
            if (dir == 1)
166
            {
167
              // strip partially from the front
168
1102
              nr.push_back(Word::prefix(s, lbsize));
169
1102
              n1[sindex_use] = Word::suffix(s, slen - lbsize);
170
            }
171
            else
172
            {
173
              // strip partially from the back
174
141
              nr.push_back(Word::suffix(s, lbsize));
175
141
              n1[sindex_use] = Word::prefix(s, slen - lbsize);
176
            }
177
1243
            ret = true;
178
          }
179
2228
          Assert(ArithEntail::check(curr));
180
        }
181
        else
182
        {
183
          // we cannot remove the constant
184
        }
185
      }
186
    }
187
    else
188
    {
189
      Node next_s = NodeManager::currentNM()->mkNode(
190
          MINUS,
191
          curr,
192
111716
          NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
193
55858
      next_s = Rewriter::rewrite(next_s);
194
55858
      if (ArithEntail::check(next_s))
195
      {
196
383
        success = true;
197
383
        curr = next_s;
198
383
        sindex++;
199
      }
200
    }
201
  }
202
81162
  if (sindex > 0 && (!strict || curr == zero))
203
  {
204
1250
    if (dir == 1)
205
    {
206
1161
      nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
207
1161
      n1.erase(n1.begin(), n1.begin() + sindex);
208
    }
209
    else
210
    {
211
89
      nr.insert(nr.end(), n1.end() - sindex, n1.end());
212
89
      n1.erase(n1.end() - sindex, n1.end());
213
    }
214
1250
    ret = true;
215
  }
216
162324
  return ret;
217
}
218
219
78484
int StringsEntail::componentContains(std::vector<Node>& n1,
220
                                     std::vector<Node>& n2,
221
                                     std::vector<Node>& nb,
222
                                     std::vector<Node>& ne,
223
                                     bool computeRemainder,
224
                                     int remainderDir)
225
{
226
78484
  Assert(nb.empty());
227
78484
  Assert(ne.empty());
228
78484
  Trace("strings-entail") << "Component contains: " << std::endl;
229
78484
  Trace("strings-entail") << "n1 = " << n1 << std::endl;
230
78484
  Trace("strings-entail") << "n2 = " << n2 << std::endl;
231
  // if n2 is a singleton, we can do optimized version here
232
78484
  if (n2.size() == 1)
233
  {
234
153423
    for (unsigned i = 0; i < n1.size(); i++)
235
    {
236
157471
      Node n1rb;
237
157471
      Node n1re;
238
80209
      if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
239
      {
240
2947
        if (computeRemainder)
241
        {
242
457
          n1[i] = n2[0];
243
457
          if (remainderDir != -1)
244
          {
245
457
            if (!n1re.isNull())
246
            {
247
41
              ne.push_back(n1re);
248
            }
249
457
            ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
250
457
            n1.erase(n1.begin() + i + 1, n1.end());
251
          }
252
          else if (!n1re.isNull())
253
          {
254
            n1[i] = Rewriter::rewrite(
255
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
256
          }
257
457
          if (remainderDir != 1)
258
          {
259
            nb.insert(nb.end(), n1.begin(), n1.begin() + i);
260
            n1.erase(n1.begin(), n1.begin() + i);
261
            if (!n1rb.isNull())
262
            {
263
              nb.push_back(n1rb);
264
            }
265
          }
266
457
          else if (!n1rb.isNull())
267
          {
268
84
            n1[i] = Rewriter::rewrite(
269
168
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
270
          }
271
        }
272
2947
        return i;
273
      }
274
    }
275
  }
276
2323
  else if (n1.size() >= n2.size())
277
  {
278
704
    unsigned diff = n1.size() - n2.size();
279
1425
    for (unsigned i = 0; i <= diff; i++)
280
    {
281
1700
      Node n1rb_first;
282
1700
      Node n1re_first;
283
      // first component of n2 must be a suffix
284
1958
      if (componentContainsBase(n1[i],
285
979
                                n2[0],
286
                                n1rb_first,
287
                                n1re_first,
288
                                1,
289
979
                                computeRemainder && remainderDir != 1))
290
      {
291
276
        Assert(n1re_first.isNull());
292
455
        for (unsigned j = 1; j < n2.size(); j++)
293
        {
294
          // are we in the last component?
295
455
          if (j + 1 == n2.size())
296
          {
297
270
            Node n1rb_last;
298
270
            Node n1re_last;
299
            // last component of n2 must be a prefix
300
540
            if (componentContainsBase(n1[i + j],
301
270
                                      n2[j],
302
                                      n1rb_last,
303
                                      n1re_last,
304
                                      -1,
305
270
                                      computeRemainder && remainderDir != -1))
306
            {
307
516
              Trace("strings-entail-debug")
308
258
                  << "Last remainder begin is " << n1rb_last << std::endl;
309
516
              Trace("strings-entail-debug")
310
258
                  << "Last remainder end is " << n1re_last << std::endl;
311
258
              Assert(n1rb_last.isNull());
312
258
              if (computeRemainder)
313
              {
314
128
                if (remainderDir != -1)
315
                {
316
128
                  if (!n1re_last.isNull())
317
                  {
318
2
                    ne.push_back(n1re_last);
319
                  }
320
128
                  ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
321
128
                  n1.erase(n1.begin() + i + j + 1, n1.end());
322
128
                  n1[i + j] = n2[j];
323
                }
324
128
                if (remainderDir != 1)
325
                {
326
                  n1[i] = n2[0];
327
                  nb.insert(nb.end(), n1.begin(), n1.begin() + i);
328
                  n1.erase(n1.begin(), n1.begin() + i);
329
                  if (!n1rb_first.isNull())
330
                  {
331
                    nb.push_back(n1rb_first);
332
                  }
333
                }
334
              }
335
258
              Trace("strings-entail-debug") << "ne = " << ne << std::endl;
336
258
              Trace("strings-entail-debug") << "nb = " << nb << std::endl;
337
258
              Trace("strings-entail-debug") << "...return " << i << std::endl;
338
258
              return i;
339
            }
340
            else
341
            {
342
12
              break;
343
            }
344
          }
345
185
          else if (n1[i + j] != n2[j])
346
          {
347
6
            break;
348
          }
349
        }
350
      }
351
    }
352
  }
353
75279
  return -1;
354
}
355
356
81458
bool StringsEntail::componentContainsBase(
357
    Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
358
{
359
81458
  Assert(n1rb.isNull());
360
81458
  Assert(n1re.isNull());
361
362
81458
  NodeManager* nm = NodeManager::currentNM();
363
364
81458
  if (n1 == n2)
365
  {
366
1241
    return true;
367
  }
368
  else
369
  {
370
80217
    if (n1.isConst() && n2.isConst())
371
    {
372
573
      size_t len1 = Word::getLength(n1);
373
573
      size_t len2 = Word::getLength(n2);
374
573
      if (len2 < len1)
375
      {
376
236
        if (dir == 1)
377
        {
378
20
          if (Word::suffix(n1, len2) == n2)
379
          {
380
20
            if (computeRemainder)
381
            {
382
              n1rb = Word::prefix(n1, len1 - len2);
383
            }
384
20
            return true;
385
          }
386
        }
387
216
        else if (dir == -1)
388
        {
389
4
          if (Word::prefix(n1, len2) == n2)
390
          {
391
4
            if (computeRemainder)
392
            {
393
              n1re = Word::suffix(n1, len1 - len2);
394
            }
395
4
            return true;
396
          }
397
        }
398
        else
399
        {
400
212
          size_t f = Word::find(n1, n2);
401
212
          if (f != std::string::npos)
402
          {
403
208
            if (computeRemainder)
404
            {
405
111
              if (f > 0)
406
              {
407
80
                n1rb = Word::prefix(n1, f);
408
              }
409
111
              if (len1 > f + len2)
410
              {
411
37
                n1re = Word::suffix(n1, len1 - (f + len2));
412
              }
413
            }
414
208
            return true;
415
          }
416
        }
417
      }
418
    }
419
    else
420
    {
421
      // cases for:
422
      //   n1 = x   containing   n2 = substr( x, n2[1], n2[2] )
423
79644
      if (n2.getKind() == STRING_SUBSTR)
424
      {
425
34791
        if (n2[0] == n1)
426
        {
427
788
          bool success = true;
428
788
          Node start_pos = n2[1];
429
788
          Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
430
788
          Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
431
788
          if (dir == 1)
432
          {
433
            // To be a suffix, start + length must be greater than
434
            // or equal to the length of the string.
435
4
            success = ArithEntail::check(end_pos, len_n2s);
436
          }
437
784
          else if (dir == -1)
438
          {
439
            // To be a prefix, must literally start at 0, since
440
            //   if we knew it started at <0, it should be rewritten to "",
441
            //   if we knew it started at 0, then n2[1] should be rewritten to
442
            //   0.
443
16
            success = start_pos.isConst()
444
8
                      && start_pos.getConst<Rational>().sgn() == 0;
445
          }
446
788
          if (success)
447
          {
448
788
            if (computeRemainder)
449
            {
450
              // we can only compute the remainder if start_pos and end_pos
451
              // are known to be non-negative.
452
54
              if (!ArithEntail::check(start_pos)
453
54
                  || !ArithEntail::check(end_pos))
454
              {
455
12
                return false;
456
              }
457
6
              if (dir != -1)
458
              {
459
12
                n1rb = nm->mkNode(
460
8
                    STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
461
              }
462
6
              if (dir != 1)
463
              {
464
6
                n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
465
              }
466
            }
467
776
            return true;
468
          }
469
        }
470
      }
471
472
78856
      if (!computeRemainder && dir == 0)
473
      {
474
77262
        if (n1.getKind() == STRING_REPLACE)
475
        {
476
          // (str.contains (str.replace x y z) w) ---> true
477
          // if (str.contains x w) --> true and (str.contains z w) ---> true
478
3268
          Node xCtnW = checkContains(n1[0], n2);
479
2250
          if (!xCtnW.isNull() && xCtnW.getConst<bool>())
480
          {
481
1786
            Node zCtnW = checkContains(n1[2], n2);
482
1509
            if (!zCtnW.isNull() && zCtnW.getConst<bool>())
483
            {
484
1232
              return true;
485
            }
486
          }
487
        }
488
      }
489
    }
490
  }
491
77965
  return false;
492
}
493
494
79694
bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
495
                                           std::vector<Node>& n2,
496
                                           std::vector<Node>& nb,
497
                                           std::vector<Node>& ne,
498
                                           int dir)
499
{
500
79694
  Assert(nb.empty());
501
79694
  Assert(ne.empty());
502
79694
  bool changed = false;
503
  // for ( forwards, backwards ) direction
504
201122
  for (unsigned r = 0; r < 2; r++)
505
  {
506
140420
    if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
507
    {
508
136352
      unsigned index0 = r == 0 ? 0 : n1.size() - 1;
509
136352
      unsigned index1 = r == 0 ? 0 : n2.size() - 1;
510
136352
      bool removeComponent = false;
511
253712
      Node n1cmp = n1[index0];
512
513
136352
      if (n1cmp.isConst() && Word::isEmpty(n1cmp))
514
      {
515
18924
        return false;
516
      }
517
518
234788
      std::vector<Node> sss;
519
234788
      std::vector<Node> sls;
520
117428
      n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
521
234856
      Trace("strings-rewrite-debug2")
522
117428
          << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
523
117428
          << ", dir = " << r << ", sss/sls=" << sss << "/" << sls << std::endl;
524
117428
      if (n1cmp.isConst())
525
      {
526
112536
        Node s = n1cmp;
527
56268
        size_t slen = Word::getLength(s);
528
        // overlap is an overapproximation of the number of characters
529
        // n2[index1] can match in s
530
56268
        unsigned overlap = Word::getLength(s);
531
56268
        if (n2[index1].isConst())
532
        {
533
17212
          Node t = n2[index1];
534
8606
          std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t);
535
8606
          if (ret == std::string::npos)
536
          {
537
389
            if (n1.size() == 1)
538
            {
539
              // can remove everything
540
              //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
541
              //   str.contains( "", str.++( "ba", x ) )
542
              //   or std.contains( str.substr( "abc", x, y ), "d" ) --->
543
              //   str.contains( "", "d" )
544
14
              removeComponent = true;
545
            }
546
375
            else if (sss.empty())  // only if not substr
547
            {
548
              // check how much overlap there is
549
              // This is used to partially strip off the endpoint
550
              // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
551
              // str.contains( str.++( "c", x ), str.++( "cd", y ) )
552
363
              overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
553
            }
554
            // note that we cannot process substring here, since t may
555
            // match only part of s. Consider:
556
            // (str.++ "C" (str.substr "AB" x y)), "CB"
557
            // where "AB" and "CB" have no overlap, but "C" is not part of what
558
            // is matched with "AB".
559
          }
560
8217
          else if (sss.empty())  // only if not substr
561
          {
562
661
            Assert(ret < slen);
563
            // can strip off up to the find position, e.g.
564
            // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
565
            // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
566
            // and
567
            // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
568
            // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
569
661
            overlap = slen - ret;
570
          }
571
        }
572
        else
573
        {
574
          // inconclusive
575
        }
576
56268
        Trace("strings-rewrite-debug2") << "rem = " << removeComponent << ", overlap = " << overlap << std::endl;
577
        // process the overlap
578
56268
        if (overlap < slen)
579
        {
580
121
          changed = true;
581
121
          if (overlap == 0)
582
          {
583
92
            removeComponent = true;
584
          }
585
          else
586
          {
587
            // can drop the prefix (resp. suffix) from the first (resp. last)
588
            // component
589
29
            if (r == 0)
590
            {
591
13
              nb.push_back(Word::prefix(s, slen - overlap));
592
13
              n1[index0] = Word::suffix(s, overlap);
593
            }
594
            else
595
            {
596
16
              ne.push_back(Word::suffix(s, slen - overlap));
597
16
              n1[index0] = Word::prefix(s, overlap);
598
            }
599
          }
600
        }
601
      }
602
61160
      else if (n1cmp.getKind() == STRING_ITOS)
603
      {
604
300
        if (n2[index1].isConst())
605
        {
606
228
          Assert(n2[index1].getType().isString());  // string-only
607
456
          cvc5::String t = n2[index1].getConst<String>();
608
228
          if (n1.size() == 1)
609
          {
610
            // if n1.size()==1, then if n2[index1] is not a number, we can drop
611
            // the entire component
612
            //    e.g. str.contains( int.to.str(x), "123a45") --> false
613
218
            if (!t.isNumber())
614
            {
615
54
              removeComponent = true;
616
            }
617
          }
618
          else
619
          {
620
10
            const std::vector<unsigned>& tvec = t.getVec();
621
10
            Assert(tvec.size() > 0);
622
623
            // if n1.size()>1, then if the first (resp. last) character of
624
            // n2[index1]
625
            //  is not a digit, we can drop the entire component, e.g.:
626
            //    str.contains( str.++( int.to.str(x), y ), "a12") -->
627
            //    str.contains( y, "a12" )
628
            //    str.contains( str.++( y, int.to.str(x) ), "a0b") -->
629
            //    str.contains( y, "a0b" )
630
10
            unsigned i = r == 0 ? 0 : (tvec.size() - 1);
631
10
            if (!String::isDigit(tvec[i]))
632
            {
633
8
              removeComponent = true;
634
            }
635
          }
636
        }
637
      }
638
117428
      if (removeComponent)
639
      {
640
168
        Trace("strings-rewrite-debug2") << "...remove component" << std::endl;
641
        // can drop entire first (resp. last) component
642
168
        if (r == 0)
643
        {
644
113
          nb.push_back(n1[index0]);
645
113
          n1.erase(n1.begin(), n1.begin() + 1);
646
        }
647
        else
648
        {
649
55
          ne.push_back(n1[index0]);
650
55
          n1.pop_back();
651
        }
652
168
        if (n1.empty())
653
        {
654
          // if we've removed everything, just return (we will rewrite to false)
655
68
          return true;
656
        }
657
        else
658
        {
659
100
          changed = true;
660
        }
661
      }
662
    }
663
  }
664
  // TODO (#1180) : computing the maximal overlap in this function may be
665
  // important.
666
  // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
667
  // false
668
  //   ...since str.to.int(x) can contain at most 1 character from "2aaaa",
669
  //   leaving 4 characters
670
  //      which is larger that the upper bound for length of str.substr(y,0,3),
671
  //      which is 3.
672
60702
  return changed;
673
}
674
675
15499
Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
676
{
677
15499
  NodeManager* nm = NodeManager::currentNM();
678
30998
  Node ctn = nm->mkNode(STRING_CONTAINS, a, b);
679
680
15499
  if (fullRewriter)
681
  {
682
15499
    ctn = Rewriter::rewrite(ctn);
683
  }
684
  else
685
  {
686
    Node prev;
687
    do
688
    {
689
      prev = ctn;
690
      ctn = d_rewriter.rewriteContains(ctn);
691
      if (ctn != prev)
692
      {
693
        ctn = d_rewriter.postProcessRewrite(prev, ctn);
694
      }
695
    } while (prev != ctn && ctn.getKind() == STRING_CONTAINS);
696
  }
697
698
15499
  Assert(ctn.getType().isBoolean());
699
30998
  return ctn.isConst() ? ctn : Node::null();
700
}
701
702
19419
bool StringsEntail::checkNonEmpty(Node a)
703
{
704
38838
  Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
705
19419
  len = Rewriter::rewrite(len);
706
38838
  return ArithEntail::check(len, true);
707
}
708
709
48021
bool StringsEntail::checkLengthOne(Node s, bool strict)
710
{
711
48021
  NodeManager* nm = NodeManager::currentNM();
712
96042
  Node one = nm->mkConst(Rational(1));
713
96042
  Node len = nm->mkNode(STRING_LENGTH, s);
714
48021
  len = Rewriter::rewrite(len);
715
192084
  return ArithEntail::check(one, len)
716
240105
         && (!strict || ArithEntail::check(len, true));
717
}
718
719
74020
bool StringsEntail::checkMultisetSubset(Node a, Node b)
720
{
721
148040
  std::vector<Node> avec;
722
74020
  utils::getConcat(getMultisetApproximation(a), avec);
723
148040
  std::vector<Node> bvec;
724
74020
  utils::getConcat(b, bvec);
725
726
148040
  std::map<Node, unsigned> num_nconst[2];
727
148040
  std::map<Node, unsigned> num_const[2];
728
222060
  for (unsigned j = 0; j < 2; j++)
729
  {
730
148040
    std::vector<Node>& jvec = j == 0 ? avec : bvec;
731
311975
    for (const Node& cc : jvec)
732
    {
733
163935
      if (cc.isConst())
734
      {
735
81189
        num_const[j][cc]++;
736
      }
737
      else
738
      {
739
82746
        num_nconst[j][cc]++;
740
      }
741
    }
742
  }
743
74020
  bool ms_success = true;
744
75136
  for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
745
  {
746
29169
    if (nncp.second > num_nconst[1][nncp.first])
747
    {
748
28053
      ms_success = false;
749
28053
      break;
750
    }
751
  }
752
74020
  if (ms_success)
753
  {
754
    // count the number of constant characters in the first argument
755
137901
    std::map<Node, unsigned> count_const[2];
756
91916
    std::vector<Node> chars;
757
137901
    for (unsigned j = 0; j < 2; j++)
758
    {
759
142181
      for (std::pair<const Node, unsigned>& ncp : num_const[j])
760
      {
761
100494
        Node cn = ncp.first;
762
50247
        Assert(cn.isConst());
763
100494
        std::vector<Node> cnChars = Word::getChars(cn);
764
144232
        for (const Node& ch : cnChars)
765
        {
766
93985
          count_const[j][ch] += ncp.second;
767
93985
          if (std::find(chars.begin(), chars.end(), ch) == chars.end())
768
          {
769
67849
            chars.push_back(ch);
770
          }
771
        }
772
      }
773
    }
774
91934
    Trace("strings-entail-ms-ss")
775
45967
        << "For " << a << " and " << b << " : " << std::endl;
776
113798
    for (const Node& ch : chars)
777
    {
778
67849
      Trace("strings-entail-ms-ss") << "  # occurrences of substring ";
779
67849
      Trace("strings-entail-ms-ss") << ch << " in arguments is ";
780
135698
      Trace("strings-entail-ms-ss")
781
67849
          << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
782
67849
      if (count_const[0][ch] < count_const[1][ch])
783
      {
784
18
        return true;
785
      }
786
    }
787
788
    // TODO (#1180): count the number of 2,3,4,.. character substrings
789
    // for example:
790
    // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
791
    // since the second argument contains more occurrences of "bb".
792
    // note this is orthogonal reasoning to inductive reasoning
793
    // via regular membership reduction in Liang et al CAV 2015.
794
  }
795
74002
  return false;
796
}
797
798
84299
Node StringsEntail::checkHomogeneousString(Node a)
799
{
800
168598
  std::vector<Node> avec;
801
84299
  utils::getConcat(getMultisetApproximation(a), avec);
802
803
84299
  bool cValid = false;
804
168598
  Node c;
805
126334
  for (const Node& ac : avec)
806
  {
807
88831
    if (ac.isConst())
808
    {
809
97837
      std::vector<Node> acv = Word::getChars(ac);
810
96516
      for (const Node& cc : acv)
811
      {
812
54481
        if (!cValid)
813
        {
814
36195
          cValid = true;
815
36195
          c = cc;
816
        }
817
18286
        else if (c != cc)
818
        {
819
          // Found a different character
820
13767
          return Node::null();
821
        }
822
      }
823
    }
824
    else
825
    {
826
      // Could produce a different character
827
33029
      return Node::null();
828
    }
829
  }
830
831
37503
  if (!cValid)
832
  {
833
18716
    return Word::mkEmptyWord(a.getType());
834
  }
835
836
18787
  return c;
837
}
838
839
183283
Node StringsEntail::getMultisetApproximation(Node a)
840
{
841
183283
  NodeManager* nm = NodeManager::currentNM();
842
183283
  if (a.getKind() == STRING_SUBSTR)
843
  {
844
55468
    return a[0];
845
  }
846
127815
  else if (a.getKind() == STRING_REPLACE)
847
  {
848
5072
    return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
849
  }
850
122743
  else if (a.getKind() == STRING_CONCAT)
851
  {
852
17834
    NodeBuilder nb(STRING_CONCAT);
853
28809
    for (const Node& ac : a)
854
    {
855
19892
      nb << getMultisetApproximation(ac);
856
    }
857
8917
    return nb.constructNode();
858
  }
859
  else
860
  {
861
113826
    return a;
862
  }
863
}
864
865
156205
Node StringsEntail::getStringOrEmpty(Node n)
866
{
867
156205
  Node res;
868
468655
  while (res.isNull())
869
  {
870
156225
    switch (n.getKind())
871
    {
872
1439
      case STRING_REPLACE:
873
      {
874
1439
        if (Word::isEmpty(n[0]))
875
        {
876
          // (str.replace "" x y) --> y
877
20
          n = n[2];
878
20
          break;
879
        }
880
881
1419
        if (checkLengthOne(n[0]) && Word::isEmpty(n[2]))
882
        {
883
          // (str.replace "A" x "") --> "A"
884
36
          res = n[0];
885
36
          break;
886
        }
887
888
1383
        res = n;
889
1383
        break;
890
      }
891
8846
      case STRING_SUBSTR:
892
      {
893
8846
        if (checkLengthOne(n[0]))
894
        {
895
          // (str.substr "A" x y) --> "A"
896
575
          res = n[0];
897
575
          break;
898
        }
899
8271
        res = n;
900
8271
        break;
901
      }
902
145940
      default:
903
      {
904
145940
        res = n;
905
145940
        break;
906
      }
907
    }
908
  }
909
156205
  return res;
910
}
911
912
1064
Node StringsEntail::inferEqsFromContains(Node x, Node y)
913
{
914
1064
  NodeManager* nm = NodeManager::currentNM();
915
2128
  Node emp = Word::mkEmptyWord(x.getType());
916
1064
  Assert(x.getType() == y.getType());
917
2128
  TypeNode stype = x.getType();
918
919
2128
  Node xLen = nm->mkNode(STRING_LENGTH, x);
920
2128
  std::vector<Node> yLens;
921
1064
  if (y.getKind() != STRING_CONCAT)
922
  {
923
108
    yLens.push_back(nm->mkNode(STRING_LENGTH, y));
924
  }
925
  else
926
  {
927
3524
    for (const Node& yi : y)
928
    {
929
2568
      yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
930
    }
931
  }
932
933
2128
  std::vector<Node> zeroLens;
934
1064
  if (x == emp)
935
  {
936
    // If x is the empty string, then all ys must be empty, too, and we can
937
    // skip the expensive checks. Note that this is just a performance
938
    // optimization.
939
92
    zeroLens.swap(yLens);
940
  }
941
  else
942
  {
943
    // Check if we can infer that str.len(x) <= str.len(y). If that is the
944
    // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
945
    // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
946
    // true. The terms that can have length zero without making the inequality
947
    // false must be all be empty if (str.contains x y) is true.
948
972
    if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens))
949
    {
950
      // We could not prove that the inequality holds
951
882
      return Node::null();
952
    }
953
90
    else if (yLens.size() == y.getNumChildren())
954
    {
955
      // We could only prove that the inequality holds but not that any of the
956
      // ys must be empty
957
49
      return nm->mkNode(EQUAL, x, y);
958
    }
959
  }
960
961
133
  if (y.getKind() != STRING_CONCAT)
962
  {
963
8
    if (zeroLens.size() == 1)
964
    {
965
      // y is not a concatenation and we found that it must be empty, so just
966
      // return (= y "")
967
      Assert(zeroLens[0][0] == y);
968
      return nm->mkNode(EQUAL, y, emp);
969
    }
970
    else
971
    {
972
8
      Assert(yLens.size() == 1 && yLens[0][0] == y);
973
8
      return nm->mkNode(EQUAL, x, y);
974
    }
975
  }
976
977
250
  std::vector<Node> cs;
978
166
  for (const Node& yiLen : yLens)
979
  {
980
41
    Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
981
41
    cs.push_back(yiLen[0]);
982
  }
983
984
250
  NodeBuilder nb(AND);
985
  // (= x (str.++ y1' ... ym'))
986
125
  if (!cs.empty())
987
  {
988
33
    nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
989
  }
990
  // (= y1'' "") ... (= yk'' "")
991
379
  for (const Node& zeroLen : zeroLens)
992
  {
993
254
    Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
994
254
    nb << nm->mkNode(EQUAL, zeroLen[0], emp);
995
  }
996
997
  // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
998
125
  return nb.constructNode();
999
}
1000
1001
}  // namespace strings
1002
}  // namespace theory
1003
29340
}  // namespace cvc5