GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/strings_entail.cpp Lines: 425 441 96.4 %
Date: 2021-03-23 Branches: 958 2017 47.5 %

Line Exec Source
1
/*********************                                                        */
2
/*! \file strings_entail.cpp
3
 ** \verbatim
4
 ** Top contributors (to current version):
5
 **   Andrew Reynolds, Andres Noetzli
6
 ** This file is part of the CVC4 project.
7
 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 ** in the top-level source directory and their institutional affiliations.
9
 ** All rights reserved.  See the file COPYING in the top-level source
10
 ** directory for licensing information.\endverbatim
11
 **
12
 ** \brief Implementation of entailment tests involving strings.
13
 **/
14
15
#include "theory/strings/strings_entail.h"
16
17
#include "expr/node_builder.h"
18
#include "theory/rewriter.h"
19
#include "theory/strings/arith_entail.h"
20
#include "theory/strings/sequences_rewriter.h"
21
#include "theory/strings/theory_strings_utils.h"
22
#include "theory/strings/word.h"
23
24
using namespace CVC4::kind;
25
26
namespace CVC4 {
27
namespace theory {
28
namespace strings {
29
30
9279
StringsEntail::StringsEntail(SequencesRewriter& rewriter) : d_rewriter(rewriter)
31
{
32
9279
}
33
34
2308
bool StringsEntail::canConstantContainConcat(Node c,
35
                                             Node n,
36
                                             int& firstc,
37
                                             int& lastc)
38
{
39
2308
  Assert(c.isConst());
40
2308
  Assert(n.getKind() == STRING_CONCAT);
41
  // must find constant components in order
42
2308
  size_t pos = 0;
43
2308
  firstc = -1;
44
2308
  lastc = -1;
45
8779
  for (unsigned i = 0; i < n.getNumChildren(); i++)
46
  {
47
6589
    if (n[i].isConst())
48
    {
49
1704
      firstc = firstc == -1 ? i : firstc;
50
1704
      lastc = i;
51
1704
      size_t new_pos = Word::find(c, n[i], pos);
52
1704
      if (new_pos == std::string::npos)
53
      {
54
110
        return false;
55
      }
56
      else
57
      {
58
1594
        pos = new_pos + Word::getLength(n[i]);
59
      }
60
    }
61
4885
    else if (n[i].getKind() == STRING_ITOS && ArithEntail::check(n[i][0]))
62
    {
63
20
      Assert(c.getType().isString());  // string-only
64
20
      const std::vector<unsigned>& tvec = c.getConst<String>().getVec();
65
      // find the first occurrence of a digit starting at pos
66
116
      while (pos < tvec.size() && !String::isDigit(tvec[pos]))
67
      {
68
48
        pos++;
69
      }
70
20
      if (pos == tvec.size())
71
      {
72
8
        return false;
73
      }
74
      // must consume at least one digit here
75
12
      pos++;
76
    }
77
  }
78
2190
  return true;
79
}
80
81
44454
bool StringsEntail::canConstantContainList(Node c,
82
                                           std::vector<Node>& l,
83
                                           int& firstc,
84
                                           int& lastc)
85
{
86
44454
  Assert(c.isConst());
87
  // must find constant components in order
88
44454
  size_t pos = 0;
89
44454
  firstc = -1;
90
44454
  lastc = -1;
91
127719
  for (unsigned i = 0; i < l.size(); i++)
92
  {
93
83821
    if (l[i].isConst())
94
    {
95
72967
      firstc = firstc == -1 ? i : firstc;
96
72967
      lastc = i;
97
72967
      size_t new_pos = Word::find(c, l[i], pos);
98
72967
      if (new_pos == std::string::npos)
99
      {
100
556
        return false;
101
      }
102
      else
103
      {
104
72411
        pos = new_pos + Word::getLength(l[i]);
105
      }
106
    }
107
  }
108
43898
  return true;
109
}
110
111
59963
bool StringsEntail::stripSymbolicLength(std::vector<Node>& n1,
112
                                        std::vector<Node>& nr,
113
                                        int dir,
114
                                        Node& curr,
115
                                        bool strict)
116
{
117
59963
  Assert(dir == 1 || dir == -1);
118
59963
  Assert(nr.empty());
119
59963
  NodeManager* nm = NodeManager::currentNM();
120
119926
  Node zero = nm->mkConst(CVC4::Rational(0));
121
59963
  bool ret = false;
122
59963
  bool success = true;
123
59963
  unsigned sindex = 0;
124
161529
  while (success && curr != zero && sindex < n1.size())
125
  {
126
50783
    Assert(!curr.isNull());
127
50783
    success = false;
128
50783
    unsigned sindex_use = dir == 1 ? sindex : ((n1.size() - 1) - sindex);
129
50783
    if (n1[sindex_use].isConst())
130
    {
131
      // could strip part of a constant
132
28972
      Node lowerBound = ArithEntail::getConstantBound(Rewriter::rewrite(curr));
133
14486
      if (!lowerBound.isNull())
134
      {
135
3968
        Assert(lowerBound.isConst());
136
7936
        Rational lbr = lowerBound.getConst<Rational>();
137
3968
        if (lbr.sgn() > 0)
138
        {
139
2680
          Assert(ArithEntail::check(curr, true));
140
5360
          Node s = n1[sindex_use];
141
2680
          size_t slen = Word::getLength(s);
142
5360
          Node ncl = nm->mkConst(CVC4::Rational(slen));
143
5360
          Node next_s = nm->mkNode(MINUS, lowerBound, ncl);
144
2680
          next_s = Rewriter::rewrite(next_s);
145
2680
          Assert(next_s.isConst());
146
          // we can remove the entire constant
147
2680
          if (next_s.getConst<Rational>().sgn() >= 0)
148
          {
149
737
            curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, ncl));
150
737
            success = true;
151
737
            sindex++;
152
          }
153
          else
154
          {
155
            // we can remove part of the constant
156
            // lower bound minus the length of a concrete string is negative,
157
            // hence lowerBound cannot be larger than long max
158
1943
            Assert(lbr < Rational(String::maxSize()));
159
1943
            curr = Rewriter::rewrite(nm->mkNode(MINUS, curr, lowerBound));
160
1943
            uint32_t lbsize = lbr.getNumerator().toUnsignedInt();
161
1943
            Assert(lbsize < slen);
162
1943
            if (dir == 1)
163
            {
164
              // strip partially from the front
165
1894
              nr.push_back(Word::prefix(s, lbsize));
166
1894
              n1[sindex_use] = Word::suffix(s, slen - lbsize);
167
            }
168
            else
169
            {
170
              // strip partially from the back
171
49
              nr.push_back(Word::suffix(s, lbsize));
172
49
              n1[sindex_use] = Word::prefix(s, slen - lbsize);
173
            }
174
1943
            ret = true;
175
          }
176
2680
          Assert(ArithEntail::check(curr));
177
        }
178
        else
179
        {
180
          // we cannot remove the constant
181
        }
182
      }
183
    }
184
    else
185
    {
186
      Node next_s = NodeManager::currentNM()->mkNode(
187
          MINUS,
188
          curr,
189
72594
          NodeManager::currentNM()->mkNode(STRING_LENGTH, n1[sindex_use]));
190
36297
      next_s = Rewriter::rewrite(next_s);
191
36297
      if (ArithEntail::check(next_s))
192
      {
193
442
        success = true;
194
442
        curr = next_s;
195
442
        sindex++;
196
      }
197
    }
198
  }
199
59963
  if (sindex > 0 && (!strict || curr == zero))
200
  {
201
1053
    if (dir == 1)
202
    {
203
1031
      nr.insert(nr.begin(), n1.begin(), n1.begin() + sindex);
204
1031
      n1.erase(n1.begin(), n1.begin() + sindex);
205
    }
206
    else
207
    {
208
22
      nr.insert(nr.end(), n1.end() - sindex, n1.end());
209
22
      n1.erase(n1.end() - sindex, n1.end());
210
    }
211
1053
    ret = true;
212
  }
213
119926
  return ret;
214
}
215
216
173797
int StringsEntail::componentContains(std::vector<Node>& n1,
217
                                     std::vector<Node>& n2,
218
                                     std::vector<Node>& nb,
219
                                     std::vector<Node>& ne,
220
                                     bool computeRemainder,
221
                                     int remainderDir)
222
{
223
173797
  Assert(nb.empty());
224
173797
  Assert(ne.empty());
225
173797
  Trace("strings-entail") << "Component contains: " << std::endl;
226
173797
  Trace("strings-entail") << "n1 = " << n1 << std::endl;
227
173797
  Trace("strings-entail") << "n2 = " << n2 << std::endl;
228
  // if n2 is a singleton, we can do optimized version here
229
173797
  if (n2.size() == 1)
230
  {
231
297151
    for (unsigned i = 0; i < n1.size(); i++)
232
    {
233
333004
      Node n1rb;
234
333004
      Node n1re;
235
167753
      if (componentContainsBase(n1[i], n2[0], n1rb, n1re, 0, computeRemainder))
236
      {
237
2502
        if (computeRemainder)
238
        {
239
272
          n1[i] = n2[0];
240
272
          if (remainderDir != -1)
241
          {
242
272
            if (!n1re.isNull())
243
            {
244
37
              ne.push_back(n1re);
245
            }
246
272
            ne.insert(ne.end(), n1.begin() + i + 1, n1.end());
247
272
            n1.erase(n1.begin() + i + 1, n1.end());
248
          }
249
          else if (!n1re.isNull())
250
          {
251
            n1[i] = Rewriter::rewrite(
252
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1[i], n1re));
253
          }
254
272
          if (remainderDir != 1)
255
          {
256
            nb.insert(nb.end(), n1.begin(), n1.begin() + i);
257
            n1.erase(n1.begin(), n1.begin() + i);
258
            if (!n1rb.isNull())
259
            {
260
              nb.push_back(n1rb);
261
            }
262
          }
263
272
          else if (!n1rb.isNull())
264
          {
265
94
            n1[i] = Rewriter::rewrite(
266
188
                NodeManager::currentNM()->mkNode(STRING_CONCAT, n1rb, n1[i]));
267
          }
268
        }
269
2502
        return i;
270
      }
271
    }
272
  }
273
41897
  else if (n1.size() >= n2.size())
274
  {
275
11850
    unsigned diff = n1.size() - n2.size();
276
31119
    for (unsigned i = 0; i <= diff; i++)
277
    {
278
38667
      Node n1rb_first;
279
38667
      Node n1re_first;
280
      // first component of n2 must be a suffix
281
38796
      if (componentContainsBase(n1[i],
282
19398
                                n2[0],
283
                                n1rb_first,
284
                                n1re_first,
285
                                1,
286
19398
                                computeRemainder && remainderDir != 1))
287
      {
288
4180
        Assert(n1re_first.isNull());
289
7090
        for (unsigned j = 1; j < n2.size(); j++)
290
        {
291
          // are we in the last component?
292
7090
          if (j + 1 == n2.size())
293
          {
294
886
            Node n1rb_last;
295
886
            Node n1re_last;
296
            // last component of n2 must be a prefix
297
1772
            if (componentContainsBase(n1[i + j],
298
886
                                      n2[j],
299
                                      n1rb_last,
300
                                      n1re_last,
301
                                      -1,
302
886
                                      computeRemainder && remainderDir != -1))
303
            {
304
258
              Trace("strings-entail-debug")
305
129
                  << "Last remainder begin is " << n1rb_last << std::endl;
306
258
              Trace("strings-entail-debug")
307
129
                  << "Last remainder end is " << n1re_last << std::endl;
308
129
              Assert(n1rb_last.isNull());
309
129
              if (computeRemainder)
310
              {
311
12
                if (remainderDir != -1)
312
                {
313
12
                  if (!n1re_last.isNull())
314
                  {
315
2
                    ne.push_back(n1re_last);
316
                  }
317
12
                  ne.insert(ne.end(), n1.begin() + i + j + 1, n1.end());
318
12
                  n1.erase(n1.begin() + i + j + 1, n1.end());
319
12
                  n1[i + j] = n2[j];
320
                }
321
12
                if (remainderDir != 1)
322
                {
323
                  n1[i] = n2[0];
324
                  nb.insert(nb.end(), n1.begin(), n1.begin() + i);
325
                  n1.erase(n1.begin(), n1.begin() + i);
326
                  if (!n1rb_first.isNull())
327
                  {
328
                    nb.push_back(n1rb_first);
329
                  }
330
                }
331
              }
332
129
              Trace("strings-entail-debug") << "ne = " << ne << std::endl;
333
129
              Trace("strings-entail-debug") << "nb = " << nb << std::endl;
334
129
              Trace("strings-entail-debug") << "...return " << i << std::endl;
335
129
              return i;
336
            }
337
            else
338
            {
339
757
              break;
340
            }
341
          }
342
6204
          else if (n1[i + j] != n2[j])
343
          {
344
3294
            break;
345
          }
346
        }
347
      }
348
    }
349
  }
350
171166
  return -1;
351
}
352
353
188037
bool StringsEntail::componentContainsBase(
354
    Node n1, Node n2, Node& n1rb, Node& n1re, int dir, bool computeRemainder)
355
{
356
188037
  Assert(n1rb.isNull());
357
188037
  Assert(n1re.isNull());
358
359
188037
  NodeManager* nm = NodeManager::currentNM();
360
361
188037
  if (n1 == n2)
362
  {
363
6290
    return true;
364
  }
365
  else
366
  {
367
181747
    if (n1.isConst() && n2.isConst())
368
    {
369
3158
      size_t len1 = Word::getLength(n1);
370
3158
      size_t len2 = Word::getLength(n2);
371
3158
      if (len2 < len1)
372
      {
373
539
        if (dir == 1)
374
        {
375
364
          if (Word::suffix(n1, len2) == n2)
376
          {
377
64
            if (computeRemainder)
378
            {
379
              n1rb = Word::prefix(n1, len1 - len2);
380
            }
381
64
            return true;
382
          }
383
        }
384
175
        else if (dir == -1)
385
        {
386
4
          if (Word::prefix(n1, len2) == n2)
387
          {
388
4
            if (computeRemainder)
389
            {
390
              n1re = Word::suffix(n1, len1 - len2);
391
            }
392
4
            return true;
393
          }
394
        }
395
        else
396
        {
397
171
          size_t f = Word::find(n1, n2);
398
171
          if (f != std::string::npos)
399
          {
400
163
            if (computeRemainder)
401
            {
402
119
              if (f > 0)
403
              {
404
90
                n1rb = Word::prefix(n1, f);
405
              }
406
119
              if (len1 > f + len2)
407
              {
408
33
                n1re = Word::suffix(n1, len1 - (f + len2));
409
              }
410
            }
411
163
            return true;
412
          }
413
        }
414
      }
415
    }
416
    else
417
    {
418
      // cases for:
419
      //   n1 = x   containing   n2 = substr( x, n2[1], n2[2] )
420
178589
      if (n2.getKind() == STRING_SUBSTR)
421
      {
422
32642
        if (n2[0] == n1)
423
        {
424
203
          bool success = true;
425
203
          Node start_pos = n2[1];
426
203
          Node end_pos = nm->mkNode(PLUS, n2[1], n2[2]);
427
203
          Node len_n2s = nm->mkNode(STRING_LENGTH, n2[0]);
428
203
          if (dir == 1)
429
          {
430
            // To be a suffix, start + length must be greater than
431
            // or equal to the length of the string.
432
4
            success = ArithEntail::check(end_pos, len_n2s);
433
          }
434
199
          else if (dir == -1)
435
          {
436
            // To be a prefix, must literally start at 0, since
437
            //   if we knew it started at <0, it should be rewritten to "",
438
            //   if we knew it started at 0, then n2[1] should be rewritten to
439
            //   0.
440
24
            success = start_pos.isConst()
441
12
                      && start_pos.getConst<Rational>().sgn() == 0;
442
          }
443
203
          if (success)
444
          {
445
203
            if (computeRemainder)
446
            {
447
              // we can only compute the remainder if start_pos and end_pos
448
              // are known to be non-negative.
449
54
              if (!ArithEntail::check(start_pos)
450
54
                  || !ArithEntail::check(end_pos))
451
              {
452
12
                return false;
453
              }
454
6
              if (dir != -1)
455
              {
456
12
                n1rb = nm->mkNode(
457
8
                    STRING_SUBSTR, n2[0], nm->mkConst(Rational(0)), start_pos);
458
              }
459
6
              if (dir != 1)
460
              {
461
6
                n1re = nm->mkNode(STRING_SUBSTR, n2[0], end_pos, len_n2s);
462
              }
463
            }
464
191
            return true;
465
          }
466
        }
467
      }
468
469
178386
      if (!computeRemainder && dir == 0)
470
      {
471
163149
        if (n1.getKind() == STRING_STRREPL)
472
        {
473
          // (str.contains (str.replace x y z) w) ---> true
474
          // if (str.contains x w) --> true and (str.contains z w) ---> true
475
2841
          Node xCtnW = checkContains(n1[0], n2);
476
1470
          if (!xCtnW.isNull() && xCtnW.getConst<bool>())
477
          {
478
477
            Node zCtnW = checkContains(n1[2], n2);
479
288
            if (!zCtnW.isNull() && zCtnW.getConst<bool>())
480
            {
481
99
              return true;
482
            }
483
          }
484
        }
485
      }
486
    }
487
  }
488
181214
  return false;
489
}
490
491
174379
bool StringsEntail::stripConstantEndpoints(std::vector<Node>& n1,
492
                                           std::vector<Node>& n2,
493
                                           std::vector<Node>& nb,
494
                                           std::vector<Node>& ne,
495
                                           int dir)
496
{
497
174379
  Assert(nb.empty());
498
174379
  Assert(ne.empty());
499
174379
  bool changed = false;
500
  // for ( forwards, backwards ) direction
501
474977
  for (unsigned r = 0; r < 2; r++)
502
  {
503
324686
    if (dir == 0 || (r == 0 && dir == 1) || (r == 1 && dir == -1))
504
    {
505
321666
      unsigned index0 = r == 0 ? 0 : n1.size() - 1;
506
321666
      unsigned index1 = r == 0 ? 0 : n2.size() - 1;
507
321666
      bool removeComponent = false;
508
619244
      Node n1cmp = n1[index0];
509
510
321666
      if (n1cmp.isConst() && Word::isEmpty(n1cmp))
511
      {
512
24066
        return false;
513
      }
514
515
595178
      std::vector<Node> sss;
516
595178
      std::vector<Node> sls;
517
297600
      n1cmp = utils::decomposeSubstrChain(n1cmp, sss, sls);
518
595200
      Trace("strings-rewrite-debug2")
519
297600
          << "stripConstantEndpoints : Compare " << n1cmp << " " << n2[index1]
520
297600
          << ", dir = " << dir << std::endl;
521
297600
      if (n1cmp.isConst())
522
      {
523
199432
        Node s = n1cmp;
524
99716
        size_t slen = Word::getLength(s);
525
        // overlap is an overapproximation of the number of characters
526
        // n2[index1] can match in s
527
99716
        unsigned overlap = Word::getLength(s);
528
99716
        if (n2[index1].isConst())
529
        {
530
26612
          Node t = n2[index1];
531
13306
          std::size_t ret = r == 0 ? Word::find(s, t) : Word::rfind(s, t);
532
13306
          if (ret == std::string::npos)
533
          {
534
3032
            if (n1.size() == 1)
535
            {
536
              // can remove everything
537
              //   e.g. str.contains( "abc", str.++( "ba", x ) ) -->
538
              //   str.contains( "", str.++( "ba", x ) )
539
20
              removeComponent = true;
540
            }
541
3012
            else if (sss.empty())  // only if not substr
542
            {
543
              // check how much overlap there is
544
              // This is used to partially strip off the endpoint
545
              // e.g. str.contains( str.++( "abc", x ), str.++( "cd", y ) ) -->
546
              // str.contains( str.++( "c", x ), str.++( "cd", y ) )
547
2994
              overlap = r == 0 ? Word::overlap(s, t) : Word::overlap(t, s);
548
            }
549
            else
550
            {
551
              // if we are looking at a substring, we can remove the component
552
              // if there is no overlap
553
              //   e.g. str.contains( str.++( str.substr( "c", i, j ), x), "a" )
554
              //        --> str.contains( x, "a" )
555
18
              removeComponent =
556
36
                  ((r == 0 ? Word::overlap(s, t) : Word::overlap(t, s)) == 0);
557
            }
558
          }
559
10274
          else if (sss.empty())  // only if not substr
560
          {
561
6536
            Assert(ret < slen);
562
            // can strip off up to the find position, e.g.
563
            // str.contains( str.++( "abc", x ), str.++( "b", y ) ) -->
564
            // str.contains( str.++( "bc", x ), str.++( "b", y ) ),
565
            // and
566
            // str.contains( str.++( x, "abbd" ), str.++( y, "b" ) ) -->
567
            // str.contains( str.++( x, "abb" ), str.++( y, "b" ) )
568
6536
            overlap = slen - ret;
569
          }
570
        }
571
        else
572
        {
573
          // inconclusive
574
        }
575
        // process the overlap
576
99716
        if (overlap < slen)
577
        {
578
212
          changed = true;
579
212
          if (overlap == 0)
580
          {
581
166
            removeComponent = true;
582
          }
583
          else
584
          {
585
            // can drop the prefix (resp. suffix) from the first (resp. last)
586
            // component
587
46
            if (r == 0)
588
            {
589
25
              nb.push_back(Word::prefix(s, slen - overlap));
590
25
              n1[index0] = Word::suffix(s, overlap);
591
            }
592
            else
593
            {
594
21
              ne.push_back(Word::suffix(s, slen - overlap));
595
21
              n1[index0] = Word::prefix(s, overlap);
596
            }
597
          }
598
        }
599
      }
600
197884
      else if (n1cmp.getKind() == STRING_ITOS)
601
      {
602
280
        if (n2[index1].isConst())
603
        {
604
26
          Assert(n2[index1].getType().isString());  // string-only
605
52
          CVC4::String t = n2[index1].getConst<String>();
606
26
          if (n1.size() == 1)
607
          {
608
            // if n1.size()==1, then if n2[index1] is not a number, we can drop
609
            // the entire component
610
            //    e.g. str.contains( int.to.str(x), "123a45") --> false
611
10
            if (!t.isNumber())
612
            {
613
2
              removeComponent = true;
614
            }
615
          }
616
          else
617
          {
618
16
            const std::vector<unsigned>& tvec = t.getVec();
619
16
            Assert(tvec.size() > 0);
620
621
            // if n1.size()>1, then if the first (resp. last) character of
622
            // n2[index1]
623
            //  is not a digit, we can drop the entire component, e.g.:
624
            //    str.contains( str.++( int.to.str(x), y ), "a12") -->
625
            //    str.contains( y, "a12" )
626
            //    str.contains( str.++( y, int.to.str(x) ), "a0b") -->
627
            //    str.contains( y, "a0b" )
628
16
            unsigned i = r == 0 ? 0 : (tvec.size() - 1);
629
16
            if (!String::isDigit(tvec[i]))
630
            {
631
12
              removeComponent = true;
632
            }
633
          }
634
        }
635
      }
636
297600
      if (removeComponent)
637
      {
638
        // can drop entire first (resp. last) component
639
204
        if (r == 0)
640
        {
641
134
          nb.push_back(n1[index0]);
642
134
          n1.erase(n1.begin(), n1.begin() + 1);
643
        }
644
        else
645
        {
646
70
          ne.push_back(n1[index0]);
647
70
          n1.pop_back();
648
        }
649
204
        if (n1.empty())
650
        {
651
          // if we've removed everything, just return (we will rewrite to false)
652
22
          return true;
653
        }
654
        else
655
        {
656
182
          changed = true;
657
        }
658
      }
659
    }
660
  }
661
  // TODO (#1180) : computing the maximal overlap in this function may be
662
  // important.
663
  // str.contains( str.++( str.to.int(x), str.substr(y,0,3) ), "2aaaa" ) --->
664
  // false
665
  //   ...since str.to.int(x) can contain at most 1 character from "2aaaa",
666
  //   leaving 4 characters
667
  //      which is larger that the upper bound for length of str.substr(y,0,3),
668
  //      which is 3.
669
150291
  return changed;
670
}
671
672
183473
Node StringsEntail::checkContains(Node a, Node b, bool fullRewriter)
673
{
674
183473
  NodeManager* nm = NodeManager::currentNM();
675
366946
  Node ctn = nm->mkNode(STRING_STRCTN, a, b);
676
677
183473
  if (fullRewriter)
678
  {
679
8633
    ctn = Rewriter::rewrite(ctn);
680
  }
681
  else
682
  {
683
349680
    Node prev;
684
144
    do
685
    {
686
174984
      prev = ctn;
687
174984
      ctn = d_rewriter.rewriteContains(ctn);
688
174984
    } while (prev != ctn && ctn.getKind() == STRING_STRCTN);
689
  }
690
691
183473
  Assert(ctn.getType().isBoolean());
692
366946
  return ctn.isConst() ? ctn : Node::null();
693
}
694
695
19667
bool StringsEntail::checkNonEmpty(Node a)
696
{
697
39334
  Node len = NodeManager::currentNM()->mkNode(STRING_LENGTH, a);
698
19667
  len = Rewriter::rewrite(len);
699
39334
  return ArithEntail::check(len, true);
700
}
701
702
52558
bool StringsEntail::checkLengthOne(Node s, bool strict)
703
{
704
52558
  NodeManager* nm = NodeManager::currentNM();
705
105116
  Node one = nm->mkConst(Rational(1));
706
105116
  Node len = nm->mkNode(STRING_LENGTH, s);
707
52558
  len = Rewriter::rewrite(len);
708
210232
  return ArithEntail::check(one, len)
709
262790
         && (!strict || ArithEntail::check(len, true));
710
}
711
712
170731
bool StringsEntail::checkMultisetSubset(Node a, Node b)
713
{
714
341462
  std::vector<Node> avec;
715
170731
  utils::getConcat(getMultisetApproximation(a), avec);
716
341462
  std::vector<Node> bvec;
717
170731
  utils::getConcat(b, bvec);
718
719
341462
  std::map<Node, unsigned> num_nconst[2];
720
341462
  std::map<Node, unsigned> num_const[2];
721
512193
  for (unsigned j = 0; j < 2; j++)
722
  {
723
341462
    std::vector<Node>& jvec = j == 0 ? avec : bvec;
724
844449
    for (const Node& cc : jvec)
725
    {
726
502987
      if (cc.isConst())
727
      {
728
146828
        num_const[j][cc]++;
729
      }
730
      else
731
      {
732
356159
        num_nconst[j][cc]++;
733
      }
734
    }
735
  }
736
170731
  bool ms_success = true;
737
179247
  for (std::pair<const Node, unsigned>& nncp : num_nconst[0])
738
  {
739
112855
    if (nncp.second > num_nconst[1][nncp.first])
740
    {
741
104339
      ms_success = false;
742
104339
      break;
743
    }
744
  }
745
170731
  if (ms_success)
746
  {
747
    // count the number of constant characters in the first argument
748
199176
    std::map<Node, unsigned> count_const[2];
749
132740
    std::vector<Node> chars;
750
199176
    for (unsigned j = 0; j < 2; j++)
751
    {
752
202072
      for (std::pair<const Node, unsigned>& ncp : num_const[j])
753
      {
754
138576
        Node cn = ncp.first;
755
69288
        Assert(cn.isConst());
756
138576
        std::vector<Node> cnChars = Word::getChars(cn);
757
253688
        for (const Node& ch : cnChars)
758
        {
759
184400
          count_const[j][ch] += ncp.second;
760
184400
          if (std::find(chars.begin(), chars.end(), ch) == chars.end())
761
          {
762
126398
            chars.push_back(ch);
763
          }
764
        }
765
      }
766
    }
767
132784
    Trace("strings-entail-ms-ss")
768
66392
        << "For " << a << " and " << b << " : " << std::endl;
769
192726
    for (const Node& ch : chars)
770
    {
771
126378
      Trace("strings-entail-ms-ss") << "  # occurrences of substring ";
772
126378
      Trace("strings-entail-ms-ss") << ch << " in arguments is ";
773
252756
      Trace("strings-entail-ms-ss")
774
126378
          << count_const[0][ch] << " / " << count_const[1][ch] << std::endl;
775
126378
      if (count_const[0][ch] < count_const[1][ch])
776
      {
777
44
        return true;
778
      }
779
    }
780
781
    // TODO (#1180): count the number of 2,3,4,.. character substrings
782
    // for example:
783
    // str.contains( str.++( x, "cbabc" ), str.++( "cabbc", x ) ) ---> false
784
    // since the second argument contains more occurrences of "bb".
785
    // note this is orthogonal reasoning to inductive reasoning
786
    // via regular membership reduction in Liang et al CAV 2015.
787
  }
788
170687
  return false;
789
}
790
791
149822
Node StringsEntail::checkHomogeneousString(Node a)
792
{
793
299644
  std::vector<Node> avec;
794
149822
  utils::getConcat(getMultisetApproximation(a), avec);
795
796
149822
  bool cValid = false;
797
299644
  Node c;
798
221766
  for (const Node& ac : avec)
799
  {
800
152296
    if (ac.isConst())
801
    {
802
166873
      std::vector<Node> acv = Word::getChars(ac);
803
148088
      for (const Node& cc : acv)
804
      {
805
76144
        if (!cValid)
806
        {
807
49129
          cValid = true;
808
49129
          c = cc;
809
        }
810
27015
        else if (c != cc)
811
        {
812
          // Found a different character
813
22985
          return Node::null();
814
        }
815
      }
816
    }
817
    else
818
    {
819
      // Could produce a different character
820
57367
      return Node::null();
821
    }
822
  }
823
824
69470
  if (!cValid)
825
  {
826
45294
    return Word::mkEmptyWord(a.getType());
827
  }
828
829
24176
  return c;
830
}
831
832
457075
Node StringsEntail::getMultisetApproximation(Node a)
833
{
834
457075
  NodeManager* nm = NodeManager::currentNM();
835
457075
  if (a.getKind() == STRING_SUBSTR)
836
  {
837
65751
    return a[0];
838
  }
839
391324
  else if (a.getKind() == STRING_STRREPL)
840
  {
841
4431
    return getMultisetApproximation(nm->mkNode(STRING_CONCAT, a[0], a[2]));
842
  }
843
386893
  else if (a.getKind() == STRING_CONCAT)
844
  {
845
93304
    NodeBuilder<> nb(STRING_CONCAT);
846
178743
    for (const Node& ac : a)
847
    {
848
132091
      nb << getMultisetApproximation(ac);
849
    }
850
46652
    return nb.constructNode();
851
  }
852
  else
853
  {
854
340241
    return a;
855
  }
856
}
857
858
111330
Node StringsEntail::getStringOrEmpty(Node n)
859
{
860
111330
  Node res;
861
334030
  while (res.isNull())
862
  {
863
111350
    switch (n.getKind())
864
    {
865
344
      case STRING_STRREPL:
866
      {
867
344
        if (Word::isEmpty(n[0]))
868
        {
869
          // (str.replace "" x y) --> y
870
20
          n = n[2];
871
20
          break;
872
        }
873
874
324
        if (checkLengthOne(n[0]) && Word::isEmpty(n[2]))
875
        {
876
          // (str.replace "A" x "") --> "A"
877
36
          res = n[0];
878
36
          break;
879
        }
880
881
288
        res = n;
882
288
        break;
883
      }
884
5687
      case STRING_SUBSTR:
885
      {
886
5687
        if (checkLengthOne(n[0]))
887
        {
888
          // (str.substr "A" x y) --> "A"
889
270
          res = n[0];
890
270
          break;
891
        }
892
5417
        res = n;
893
5417
        break;
894
      }
895
105319
      default:
896
      {
897
105319
        res = n;
898
105319
        break;
899
      }
900
    }
901
  }
902
111330
  return res;
903
}
904
905
1610
Node StringsEntail::inferEqsFromContains(Node x, Node y)
906
{
907
1610
  NodeManager* nm = NodeManager::currentNM();
908
3220
  Node emp = Word::mkEmptyWord(x.getType());
909
1610
  Assert(x.getType() == y.getType());
910
3220
  TypeNode stype = x.getType();
911
912
3220
  Node xLen = nm->mkNode(STRING_LENGTH, x);
913
3220
  std::vector<Node> yLens;
914
1610
  if (y.getKind() != STRING_CONCAT)
915
  {
916
86
    yLens.push_back(nm->mkNode(STRING_LENGTH, y));
917
  }
918
  else
919
  {
920
5509
    for (const Node& yi : y)
921
    {
922
3985
      yLens.push_back(nm->mkNode(STRING_LENGTH, yi));
923
    }
924
  }
925
926
3220
  std::vector<Node> zeroLens;
927
1610
  if (x == emp)
928
  {
929
    // If x is the empty string, then all ys must be empty, too, and we can
930
    // skip the expensive checks. Note that this is just a performance
931
    // optimization.
932
309
    zeroLens.swap(yLens);
933
  }
934
  else
935
  {
936
    // Check if we can infer that str.len(x) <= str.len(y). If that is the
937
    // case, try to minimize the sum in str.len(x) <= str.len(y1) + ... +
938
    // str.len(yn) (where y = y1 ++ ... ++ yn) while keeping the inequality
939
    // true. The terms that can have length zero without making the inequality
940
    // false must be all be empty if (str.contains x y) is true.
941
1301
    if (!ArithEntail::inferZerosInSumGeq(xLen, yLens, zeroLens))
942
    {
943
      // We could not prove that the inequality holds
944
433
      return Node::null();
945
    }
946
868
    else if (yLens.size() == y.getNumChildren())
947
    {
948
      // We could only prove that the inequality holds but not that any of the
949
      // ys must be empty
950
428
      return nm->mkNode(EQUAL, x, y);
951
    }
952
  }
953
954
749
  if (y.getKind() != STRING_CONCAT)
955
  {
956
8
    if (zeroLens.size() == 1)
957
    {
958
      // y is not a concatenation and we found that it must be empty, so just
959
      // return (= y "")
960
      Assert(zeroLens[0][0] == y);
961
      return nm->mkNode(EQUAL, y, emp);
962
    }
963
    else
964
    {
965
8
      Assert(yLens.size() == 1 && yLens[0][0] == y);
966
8
      return nm->mkNode(EQUAL, x, y);
967
    }
968
  }
969
970
1482
  std::vector<Node> cs;
971
1236
  for (const Node& yiLen : yLens)
972
  {
973
495
    Assert(std::find(y.begin(), y.end(), yiLen[0]) != y.end());
974
495
    cs.push_back(yiLen[0]);
975
  }
976
977
1482
  NodeBuilder<> nb(AND);
978
  // (= x (str.++ y1' ... ym'))
979
741
  if (!cs.empty())
980
  {
981
432
    nb << nm->mkNode(EQUAL, x, utils::mkConcat(cs, stype));
982
  }
983
  // (= y1'' "") ... (= yk'' "")
984
2199
  for (const Node& zeroLen : zeroLens)
985
  {
986
1458
    Assert(std::find(y.begin(), y.end(), zeroLen[0]) != y.end());
987
1458
    nb << nm->mkNode(EQUAL, zeroLen[0], emp);
988
  }
989
990
  // (and (= x (str.++ y1' ... ym')) (= y1'' "") ... (= yk'' ""))
991
741
  return nb.constructNode();
992
}
993
994
}  // namespace strings
995
}  // namespace theory
996
26685
}  // namespace CVC4