GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/theory_strings.cpp Lines: 451 556 81.1 %
Date: 2021-05-22 Branches: 953 2311 41.2 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Tianyi Liang, Andres Noetzli
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of the theory of strings.
14
 */
15
16
#include "theory/strings/theory_strings.h"
17
18
#include "expr/kind.h"
19
#include "options/smt_options.h"
20
#include "options/strings_options.h"
21
#include "options/theory_options.h"
22
#include "smt/logic_exception.h"
23
#include "theory/decision_manager.h"
24
#include "theory/ext_theory.h"
25
#include "theory/rewriter.h"
26
#include "theory/strings/theory_strings_utils.h"
27
#include "theory/strings/type_enumerator.h"
28
#include "theory/strings/word.h"
29
#include "theory/theory_model.h"
30
#include "theory/valuation.h"
31
32
using namespace std;
33
using namespace cvc5::context;
34
using namespace cvc5::kind;
35
36
namespace cvc5 {
37
namespace theory {
38
namespace strings {
39
40
9460
TheoryStrings::TheoryStrings(context::Context* c,
41
                             context::UserContext* u,
42
                             OutputChannel& out,
43
                             Valuation valuation,
44
                             const LogicInfo& logicInfo,
45
9460
                             ProofNodeManager* pnm)
46
    : Theory(THEORY_STRINGS, c, u, out, valuation, logicInfo, pnm),
47
      d_notify(*this),
48
      d_statistics(),
49
      d_state(c, u, d_valuation),
50
      d_eagerSolver(d_state),
51
      d_termReg(d_state, d_statistics, pnm),
52
      d_extTheoryCb(),
53
      d_extTheory(d_extTheoryCb, c, u, out),
54
      d_im(*this, d_state, d_termReg, d_extTheory, d_statistics, pnm),
55
      d_rewriter(&d_statistics.d_rewrites),
56
      d_bsolver(d_state, d_im),
57
      d_csolver(d_state, d_im, d_termReg, d_bsolver),
58
      d_esolver(d_state,
59
                d_im,
60
                d_termReg,
61
                d_rewriter,
62
                d_bsolver,
63
                d_csolver,
64
                d_extTheory,
65
                d_statistics),
66
      d_rsolver(d_state,
67
                d_im,
68
                d_termReg.getSkolemCache(),
69
                d_csolver,
70
                d_esolver,
71
                d_statistics),
72
18929
      d_regexp_elim(options::regExpElimAgg(), pnm, u),
73
18920
      d_stringsFmf(c, u, valuation, d_termReg)
74
{
75
9460
  d_termReg.finishInit(&d_im);
76
77
9460
  d_zero = NodeManager::currentNM()->mkConst( Rational( 0 ) );
78
9460
  d_one = NodeManager::currentNM()->mkConst( Rational( 1 ) );
79
9460
  d_neg_one = NodeManager::currentNM()->mkConst(Rational(-1));
80
9460
  d_true = NodeManager::currentNM()->mkConst( true );
81
9460
  d_false = NodeManager::currentNM()->mkConst( false );
82
83
9460
  d_cardSize = utils::getAlphabetCardinality();
84
85
  // set up the extended function callback
86
9460
  d_extTheoryCb.d_esolver = &d_esolver;
87
88
  // use the state object as the official theory state
89
9460
  d_theoryState = &d_state;
90
  // use the inference manager as the official inference manager
91
9460
  d_inferManager = &d_im;
92
9460
}
93
94
18920
TheoryStrings::~TheoryStrings() {
95
96
18920
}
97
98
9460
TheoryRewriter* TheoryStrings::getTheoryRewriter() { return &d_rewriter; }
99
100
3600
ProofRuleChecker* TheoryStrings::getProofChecker() { return &d_checker; }
101
102
9460
bool TheoryStrings::needsEqualityEngine(EeSetupInfo& esi)
103
{
104
9460
  esi.d_notify = &d_notify;
105
9460
  esi.d_name = "theory::strings::ee";
106
9460
  return true;
107
}
108
109
9460
void TheoryStrings::finishInit()
110
{
111
9460
  Assert(d_equalityEngine != nullptr);
112
113
  // witness is used to eliminate str.from_code
114
9460
  d_valuation.setUnevaluatedKind(WITNESS);
115
116
18920
  bool eagerEval = options::stringEagerEval();
117
  // The kinds we are treating as function application in congruence
118
9460
  d_equalityEngine->addFunctionKind(kind::STRING_LENGTH, eagerEval);
119
9460
  d_equalityEngine->addFunctionKind(kind::STRING_CONCAT, eagerEval);
120
9460
  d_equalityEngine->addFunctionKind(kind::STRING_IN_REGEXP, eagerEval);
121
9460
  d_equalityEngine->addFunctionKind(kind::STRING_TO_CODE, eagerEval);
122
9460
  d_equalityEngine->addFunctionKind(kind::SEQ_UNIT, eagerEval);
123
  // `seq.nth` is not always defined, and so we do not evaluate it eagerly.
124
9460
  d_equalityEngine->addFunctionKind(kind::SEQ_NTH, false);
125
  // extended functions
126
9460
  d_equalityEngine->addFunctionKind(kind::STRING_STRCTN, eagerEval);
127
9460
  d_equalityEngine->addFunctionKind(kind::STRING_LEQ, eagerEval);
128
9460
  d_equalityEngine->addFunctionKind(kind::STRING_SUBSTR, eagerEval);
129
9460
  d_equalityEngine->addFunctionKind(kind::STRING_UPDATE, eagerEval);
130
9460
  d_equalityEngine->addFunctionKind(kind::STRING_ITOS, eagerEval);
131
9460
  d_equalityEngine->addFunctionKind(kind::STRING_STOI, eagerEval);
132
9460
  d_equalityEngine->addFunctionKind(kind::STRING_STRIDOF, eagerEval);
133
9460
  d_equalityEngine->addFunctionKind(kind::STRING_STRREPL, eagerEval);
134
9460
  d_equalityEngine->addFunctionKind(kind::STRING_STRREPLALL, eagerEval);
135
9460
  d_equalityEngine->addFunctionKind(kind::STRING_REPLACE_RE, eagerEval);
136
9460
  d_equalityEngine->addFunctionKind(kind::STRING_REPLACE_RE_ALL, eagerEval);
137
9460
  d_equalityEngine->addFunctionKind(kind::STRING_STRREPLALL, eagerEval);
138
9460
  d_equalityEngine->addFunctionKind(kind::STRING_TOLOWER, eagerEval);
139
9460
  d_equalityEngine->addFunctionKind(kind::STRING_TOUPPER, eagerEval);
140
9460
  d_equalityEngine->addFunctionKind(kind::STRING_REV, eagerEval);
141
9460
}
142
143
std::string TheoryStrings::identify() const
144
{
145
  return std::string("TheoryStrings");
146
}
147
148
2675
bool TheoryStrings::areCareDisequal( TNode x, TNode y ) {
149
2675
  Assert(d_equalityEngine->hasTerm(x));
150
2675
  Assert(d_equalityEngine->hasTerm(y));
151
8025
  if (d_equalityEngine->isTriggerTerm(x, THEORY_STRINGS)
152
8025
      && d_equalityEngine->isTriggerTerm(y, THEORY_STRINGS))
153
  {
154
    TNode x_shared =
155
3039
        d_equalityEngine->getTriggerTermRepresentative(x, THEORY_STRINGS);
156
    TNode y_shared =
157
3039
        d_equalityEngine->getTriggerTermRepresentative(y, THEORY_STRINGS);
158
2012
    EqualityStatus eqStatus = d_valuation.getEqualityStatus(x_shared, y_shared);
159
2012
    if( eqStatus==EQUALITY_FALSE_AND_PROPAGATED || eqStatus==EQUALITY_FALSE || eqStatus==EQUALITY_FALSE_IN_MODEL ){
160
985
      return true;
161
    }
162
  }
163
1690
  return false;
164
}
165
166
537871
bool TheoryStrings::propagateLit(TNode literal)
167
{
168
537871
  return d_im.propagateLit(literal);
169
}
170
171
31693
TrustNode TheoryStrings::explain(TNode literal)
172
{
173
31693
  Debug("strings-explain") << "explain called on " << literal << std::endl;
174
31693
  return d_im.explainLit(literal);
175
}
176
177
14310
void TheoryStrings::presolve() {
178
14310
  Debug("strings-presolve") << "TheoryStrings::Presolving : get fmf options " << (options::stringFMF() ? "true" : "false") << std::endl;
179
14310
  d_strat.initializeStrategy();
180
181
  // if strings fmf is enabled, register the strategy
182
14310
  if (options::stringFMF())
183
  {
184
74
    d_stringsFmf.presolve();
185
    // This strategy is local to a check-sat call, since we refresh the strategy
186
    // on every call to presolve.
187
74
    d_im.getDecisionManager()->registerStrategy(
188
        DecisionManager::STRAT_STRINGS_SUM_LENGTHS,
189
        d_stringsFmf.getDecisionStrategy(),
190
        DecisionManager::STRAT_SCOPE_LOCAL_SOLVE);
191
  }
192
14310
  Debug("strings-presolve") << "Finished presolve" << std::endl;
193
14310
}
194
195
196
/////////////////////////////////////////////////////////////////////////////
197
// MODEL GENERATION
198
/////////////////////////////////////////////////////////////////////////////
199
200
4884
bool TheoryStrings::collectModelValues(TheoryModel* m,
201
                                       const std::set<Node>& termSet)
202
{
203
4884
  if (Trace.isOn("strings-debug-model"))
204
  {
205
    Trace("strings-debug-model")
206
        << "TheoryStrings::collectModelValues" << std::endl;
207
    Trace("strings-debug-model") << "Equivalence classes are:" << std::endl;
208
    Trace("strings-debug-model") << debugPrintStringsEqc() << std::endl;
209
    Trace("strings-debug-model") << "Extended functions are:" << std::endl;
210
    Trace("strings-debug-model") << d_esolver.debugPrintModel() << std::endl;
211
  }
212
4884
  Trace("strings-model") << "TheoryStrings::collectModelValues" << std::endl;
213
9768
  std::map<TypeNode, std::unordered_set<Node> > repSet;
214
  // Generate model
215
  // get the relevant string equivalence classes
216
69102
  for (const Node& s : termSet)
217
  {
218
128436
    TypeNode tn = s.getType();
219
64218
    if (tn.isStringLike())
220
    {
221
63586
      Node r = d_state.getRepresentative(s);
222
31793
      repSet[tn].insert(r);
223
    }
224
  }
225
5599
  for (const std::pair<const TypeNode, std::unordered_set<Node> >& rst : repSet)
226
  {
227
    // get partition of strings of equal lengths, per type
228
1432
    std::map<TypeNode, std::vector<std::vector<Node> > > colT;
229
1432
    std::map<TypeNode, std::vector<Node> > ltsT;
230
1432
    std::vector<Node> repVec(rst.second.begin(), rst.second.end());
231
716
    d_state.separateByLength(repVec, colT, ltsT);
232
    // now collect model info for the type
233
1432
    TypeNode st = rst.first;
234
717
    if (!collectModelInfoType(st, rst.second, colT[st], ltsT[st], m))
235
    {
236
      return false;
237
    }
238
  }
239
4883
  return true;
240
}
241
242
716
bool TheoryStrings::collectModelInfoType(TypeNode tn,
243
                                         const std::unordered_set<Node>& repSet,
244
                                         std::vector<std::vector<Node> >& col,
245
                                         std::vector<Node>& lts,
246
                                         TheoryModel* m)
247
{
248
716
  NodeManager* nm = NodeManager::currentNM();
249
1432
  std::map< Node, Node > processed;
250
  //step 1 : get all values for known lengths
251
1432
  std::vector< Node > lts_values;
252
1432
  std::map<std::size_t, Node> values_used;
253
1432
  std::vector<Node> len_splits;
254
5072
  for( unsigned i=0; i<col.size(); i++ ) {
255
4357
    Trace("strings-model") << "Checking length for {";
256
12127
    for( unsigned j=0; j<col[i].size(); j++ ) {
257
7770
      if( j>0 ) {
258
3413
        Trace("strings-model") << ", ";
259
      }
260
7770
      Trace("strings-model") << col[i][j];
261
    }
262
4357
    Trace("strings-model") << " } (length is " << lts[i] << ")" << std::endl;
263
8714
    Node len_value;
264
4357
    if( lts[i].isConst() ) {
265
3079
      len_value = lts[i];
266
    }
267
1278
    else if (!lts[i].isNull())
268
    {
269
      // get the model value for lts[i]
270
1278
      len_value = d_valuation.getModelValue(lts[i]);
271
    }
272
4357
    if (len_value.isNull())
273
    {
274
      lts_values.push_back(Node::null());
275
    }
276
    else
277
    {
278
      // must throw logic exception if we cannot construct the string
279
4357
      if (len_value.getConst<Rational>() > String::maxSize())
280
      {
281
2
        std::stringstream ss;
282
2
        ss << "The model was computed to have strings of length " << len_value
283
1
           << ". We only allow strings up to length " << String::maxSize();
284
1
        throw LogicException(ss.str());
285
      }
286
      std::size_t lvalue =
287
4356
          len_value.getConst<Rational>().getNumerator().toUnsignedInt();
288
4356
      auto itvu = values_used.find(lvalue);
289
4356
      if (itvu == values_used.end())
290
      {
291
3390
        values_used[lvalue] = lts[i];
292
      }
293
      else
294
      {
295
966
        len_splits.push_back(lts[i].eqNode(itvu->second));
296
      }
297
4356
      lts_values.push_back(len_value);
298
    }
299
  }
300
  ////step 2 : assign arbitrary values for unknown lengths?
301
  // confirmed by calculus invariant, see paper
302
715
  Trace("strings-model") << "Assign to equivalence classes..." << std::endl;
303
1430
  std::map<Node, Node> pure_eq_assign;
304
  //step 3 : assign values to equivalence classes that are pure variables
305
5070
  for( unsigned i=0; i<col.size(); i++ ){
306
8710
    std::vector< Node > pure_eq;
307
8710
    Trace("strings-model") << "The (" << col[i].size()
308
4355
                           << ") equivalence classes ";
309
12123
    for (const Node& eqc : col[i])
310
    {
311
7768
      Trace("strings-model") << eqc << " ";
312
      //check if col[i][j] has only variables
313
7768
      if (!eqc.isConst())
314
      {
315
2453
        NormalForm& nfe = d_csolver.getNormalForm(eqc);
316
2453
        if (nfe.d_nf.size() == 1)
317
        {
318
          // is it an equivalence class with a seq.unit term?
319
792
          if (nfe.d_nf[0].getKind() == SEQ_UNIT)
320
          {
321
16
            Node argVal;
322
8
            if (nfe.d_nf[0][0].getType().isStringLike())
323
            {
324
1
              argVal = d_state.getRepresentative(nfe.d_nf[0][0]);
325
            }
326
            else
327
            {
328
              // otherwise, it is a shared term
329
7
              argVal = d_valuation.getModelValue(nfe.d_nf[0][0]);
330
            }
331
8
            Assert(!argVal.isNull());
332
16
            Node c = Rewriter::rewrite(nm->mkNode(SEQ_UNIT, argVal));
333
8
            pure_eq_assign[eqc] = c;
334
8
            Trace("strings-model") << "(unit: " << nfe.d_nf[0] << ") ";
335
8
            m->getEqualityEngine()->addTerm(c);
336
          }
337
          // does it have a code and the length of these equivalence classes are
338
          // one?
339
784
          else if (d_termReg.hasStringCode() && lts_values[i] == d_one)
340
          {
341
299
            EqcInfo* eip = d_state.getOrMakeEqcInfo(eqc, false);
342
299
            if (eip && !eip->d_codeTerm.get().isNull())
343
            {
344
              // its value must be equal to its code
345
318
              Node ct = nm->mkNode(kind::STRING_TO_CODE, eip->d_codeTerm.get());
346
318
              Node ctv = d_valuation.getModelValue(ct);
347
              unsigned cvalue =
348
159
                  ctv.getConst<Rational>().getNumerator().toUnsignedInt();
349
159
              Trace("strings-model") << "(code: " << cvalue << ") ";
350
318
              std::vector<unsigned> vec;
351
159
              vec.push_back(cvalue);
352
318
              Node mv = nm->mkConst(String(vec));
353
159
              pure_eq_assign[eqc] = mv;
354
159
              m->getEqualityEngine()->addTerm(mv);
355
            }
356
          }
357
792
          pure_eq.push_back(eqc);
358
        }
359
      }
360
      else
361
      {
362
5315
        processed[eqc] = eqc;
363
        // Make sure that constants are asserted to the theory model that we
364
        // are building. It is possible that new constants were introduced by
365
        // the eager evaluation in the equality engine. These terms are missing
366
        // in the term set and, as a result, are skipped when the equality
367
        // engine is asserted to the theory model.
368
5315
        m->getEqualityEngine()->addTerm(eqc);
369
      }
370
    }
371
4355
    Trace("strings-model") << "have length " << lts_values[i] << std::endl;
372
373
    //assign a new length if necessary
374
4355
    if( !pure_eq.empty() ){
375
568
      if( lts_values[i].isNull() ){
376
        // start with length two (other lengths have special precendence)
377
        std::size_t lvalue = 2;
378
        while( values_used.find( lvalue )!=values_used.end() ){
379
          lvalue++;
380
        }
381
        Trace("strings-model") << "*** Decide to make length of " << lvalue << std::endl;
382
        lts_values[i] = nm->mkConst(Rational(lvalue));
383
        values_used[lvalue] = Node::null();
384
      }
385
568
      Trace("strings-model") << "Need to assign values of length " << lts_values[i] << " to equivalence classes ";
386
1360
      for( unsigned j=0; j<pure_eq.size(); j++ ){
387
792
        Trace("strings-model") << pure_eq[j] << " ";
388
      }
389
568
      Trace("strings-model") << std::endl;
390
391
      //use type enumerator
392
568
      Assert(lts_values[i].getConst<Rational>() <= Rational(String::maxSize()))
393
          << "Exceeded UINT32_MAX in string model";
394
      uint32_t currLen =
395
568
          lts_values[i].getConst<Rational>().getNumerator().toUnsignedInt();
396
1136
      std::unique_ptr<SEnumLen> sel;
397
1136
      Trace("strings-model") << "Cardinality of alphabet is "
398
568
                             << utils::getAlphabetCardinality() << std::endl;
399
568
      if (tn.isString())  // string-only
400
      {
401
554
        sel.reset(new StringEnumLen(
402
554
            currLen, currLen, utils::getAlphabetCardinality()));
403
      }
404
      else
405
      {
406
14
        sel.reset(new SeqEnumLen(tn, nullptr, currLen, currLen));
407
      }
408
1360
      for (const Node& eqc : pure_eq)
409
      {
410
1584
        Node c;
411
792
        std::map<Node, Node>::iterator itp = pure_eq_assign.find(eqc);
412
792
        if (itp == pure_eq_assign.end())
413
        {
414
624
          do
415
          {
416
1249
            if (sel->isFinished())
417
            {
418
              // We are in a case where model construction is impossible due to
419
              // an insufficient number of constants of a given length.
420
421
              // Consider an integer equivalence class E whose value is assigned
422
              // n in the model. Let { S_1, ..., S_m } be the set of string
423
              // equivalence classes such that len( x ) is a member of E for
424
              // some member x of each class S1, ...,Sm. Since our calculus is
425
              // saturated with respect to cardinality inference (see Liang
426
              // et al, Figure 6, CAV 2014), we have that m <= A^n, where A is
427
              // the cardinality of our alphabet.
428
429
              // Now, consider the case where there exists two integer
430
              // equivalence classes E1 and E2 that are assigned n, and moreover
431
              // we did not received notification from arithmetic that E1 = E2.
432
              // This typically should never happen, but assume in the following
433
              // that it does.
434
435
              // Now, it may be the case that there are string equivalence
436
              // classes { S_1, ..., S_m1 } whose lengths are in E1,
437
              // and classes { S'_1, ..., S'_m2 } whose lengths are in E2, where
438
              // m1 + m2 > A^n. In this case, we have insufficient strings to
439
              // assign to { S_1, ..., S_m1, S'_1, ..., S'_m2 }. If this
440
              // happens, we add a split on len( u1 ) = len( u2 ) for some
441
              // len( u1 ) in E1, len( u2 ) in E2. We do this for each pair of
442
              // integer equivalence classes that are assigned to the same value
443
              // in the model.
444
              AlwaysAssert(!len_splits.empty());
445
              for (const Node& sl : len_splits)
446
              {
447
                Node spl = nm->mkNode(OR, sl, sl.negate());
448
                d_im.lemma(spl, InferenceId::STRINGS_CMI_SPLIT);
449
                Trace("strings-lemma")
450
                    << "Strings::CollectModelInfoSplit: " << spl << std::endl;
451
              }
452
              // we added a lemma, so can return here
453
              return false;
454
            }
455
1249
            c = sel->getCurrent();
456
            // increment
457
1249
            sel->increment();
458
1249
          } while (m->hasTerm(c));
459
        }
460
        else
461
        {
462
167
          c = itp->second;
463
        }
464
1584
        Trace("strings-model") << "*** Assigned constant " << c << " for "
465
792
                               << eqc << std::endl;
466
792
        processed[eqc] = c;
467
792
        if (!m->assertEquality(eqc, c, true))
468
        {
469
          // this should never happen due to the model soundness argument
470
          // for strings
471
          Unreachable()
472
              << "TheoryStrings::collectModelInfoType: Inconsistent equality"
473
              << std::endl;
474
          return false;
475
        }
476
      }
477
    }
478
  }
479
715
  Trace("strings-model") << "String Model : Pure Assigned." << std::endl;
480
  //step 4 : assign constants to all other equivalence classes
481
8483
  for (const Node& rn : repSet)
482
  {
483
7768
    if (processed.find(rn) == processed.end())
484
    {
485
1661
      NormalForm& nf = d_csolver.getNormalForm(rn);
486
1661
      if (Trace.isOn("strings-model"))
487
      {
488
        Trace("strings-model")
489
            << "Construct model for " << rn << " based on normal form ";
490
        for (unsigned j = 0, size = nf.d_nf.size(); j < size; j++)
491
        {
492
          Node n = nf.d_nf[j];
493
          if (j > 0)
494
          {
495
            Trace("strings-model") << " ++ ";
496
          }
497
          Trace("strings-model") << n;
498
          Node r = d_state.getRepresentative(n);
499
          if (!r.isConst() && processed.find(r) == processed.end())
500
          {
501
            Trace("strings-model") << "(UNPROCESSED)";
502
          }
503
        }
504
      }
505
1661
      Trace("strings-model") << std::endl;
506
3322
      std::vector< Node > nc;
507
8119
      for (const Node& n : nf.d_nf)
508
      {
509
12916
        Node r = d_state.getRepresentative(n);
510
6458
        Assert(r.isConst() || processed.find(r) != processed.end());
511
6458
        nc.push_back(r.isConst() ? r : processed[r]);
512
      }
513
3322
      Node cc = utils::mkNConcat(nc, tn);
514
3322
      Trace("strings-model")
515
1661
          << "*** Determined constant " << cc << " for " << rn << std::endl;
516
1661
      processed[rn] = cc;
517
1661
      if (!m->assertEquality(rn, cc, true))
518
      {
519
        // this should never happen due to the model soundness argument
520
        // for strings
521
        Unreachable() << "TheoryStrings::collectModelInfoType: "
522
                         "Inconsistent equality (unprocessed eqc)"
523
                      << std::endl;
524
        return false;
525
      }
526
1661
      else if (!cc.isConst())
527
      {
528
        // the value may be specified by seq.unit components, ensure this
529
        // is marked as the skeleton for constructing values in this class.
530
        m->assertSkeleton(cc);
531
      }
532
    }
533
  }
534
  //Trace("strings-model") << "String Model : Assigned." << std::endl;
535
715
  Trace("strings-model") << "String Model : Finished." << std::endl;
536
715
  return true;
537
}
538
539
/////////////////////////////////////////////////////////////////////////////
540
// MAIN SOLVER
541
/////////////////////////////////////////////////////////////////////////////
542
543
108097
void TheoryStrings::preRegisterTerm(TNode n)
544
{
545
216194
  Trace("strings-preregister")
546
108097
      << "TheoryStrings::preRegisterTerm: " << n << std::endl;
547
108097
  d_termReg.preRegisterTerm(n);
548
  // Register the term with the extended theory. Notice we do not recurse on
549
  // this term here since preRegisterTerm is already called recursively on all
550
  // subterms in preregistered literals.
551
108097
  d_extTheory.registerTerm(n);
552
108097
}
553
554
530940
bool TheoryStrings::preNotifyFact(
555
    TNode atom, bool pol, TNode fact, bool isPrereg, bool isInternal)
556
{
557
  // this is only required for internal facts, others are already registered
558
530940
  if (isInternal && atom.getKind() == EQUAL)
559
  {
560
    // We must ensure these terms are registered. We register eagerly here for
561
    // performance reasons. Alternatively, terms could be registered at full
562
    // effort in e.g. BaseSolver::init.
563
38802
    for (const Node& t : atom)
564
    {
565
25868
      d_termReg.registerTerm(t, 0);
566
    }
567
  }
568
530940
  return false;
569
}
570
571
530940
void TheoryStrings::notifyFact(TNode atom,
572
                               bool polarity,
573
                               TNode fact,
574
                               bool isInternal)
575
{
576
530940
  d_eagerSolver.notifyFact(atom, polarity, fact, isInternal);
577
  // process pending conflicts due to reasoning about endpoints
578
530940
  if (!d_state.isInConflict() && d_state.hasPendingConflict())
579
  {
580
484
    InferInfo iiPendingConf(InferenceId::UNKNOWN);
581
242
    d_state.getPendingConflict(iiPendingConf);
582
484
    Trace("strings-pending")
583
242
        << "Process pending conflict " << iiPendingConf.d_premises << std::endl;
584
484
    Trace("strings-conflict")
585
242
        << "CONFLICT: Eager : " << iiPendingConf.d_premises << std::endl;
586
242
    ++(d_statistics.d_conflictsEager);
587
    // call the inference manager to send the conflict
588
242
    d_im.processConflict(iiPendingConf);
589
242
    return;
590
  }
591
530698
  Trace("strings-pending-debug") << "  Now collect terms" << std::endl;
592
530698
  Trace("strings-pending-debug") << "  Finished collect terms" << std::endl;
593
}
594
595
227975
void TheoryStrings::postCheck(Effort e)
596
{
597
227975
  d_im.doPendingFacts();
598
599
227975
  Assert(d_strat.isStrategyInit());
600
682594
  if (!d_state.isInConflict() && !d_valuation.needCheck()
601
284327
      && d_strat.hasStrategyEffort(e))
602
  {
603
35382
    Trace("strings-check-debug")
604
17691
        << "Theory of strings " << e << " effort check " << std::endl;
605
17691
    if (Trace.isOn("strings-eqc"))
606
    {
607
      Trace("strings-eqc") << debugPrintStringsEqc() << std::endl;
608
    }
609
17691
    ++(d_statistics.d_checkRuns);
610
17691
    bool sentLemma = false;
611
17691
    bool hadPending = false;
612
17691
    Trace("strings-check") << "Full effort check..." << std::endl;
613
7533
    do{
614
25224
      d_im.reset();
615
25224
      ++(d_statistics.d_strategyRuns);
616
25224
      Trace("strings-check") << "  * Run strategy..." << std::endl;
617
25224
      runStrategy(e);
618
      // remember if we had pending facts or lemmas
619
25224
      hadPending = d_im.hasPending();
620
      // Send the facts *and* the lemmas. We send lemmas regardless of whether
621
      // we send facts since some lemmas cannot be dropped. Other lemmas are
622
      // otherwise avoided by aborting the strategy when a fact is ready.
623
25224
      d_im.doPending();
624
      // Did we successfully send a lemma? Notice that if hasPending = true
625
      // and sentLemma = false, then the above call may have:
626
      // (1) had no pending lemmas, but successfully processed pending facts,
627
      // (2) unsuccessfully processed pending lemmas.
628
      // In either case, we repeat the strategy if we are not in conflict.
629
25224
      sentLemma = d_im.hasSentLemma();
630
25224
      if (Trace.isOn("strings-check"))
631
      {
632
        Trace("strings-check") << "  ...finish run strategy: ";
633
        Trace("strings-check") << (hadPending ? "hadPending " : "");
634
        Trace("strings-check") << (sentLemma ? "sentLemma " : "");
635
        Trace("strings-check") << (d_state.isInConflict() ? "conflict " : "");
636
        if (!hadPending && !sentLemma && !d_state.isInConflict())
637
        {
638
          Trace("strings-check") << "(none)";
639
        }
640
        Trace("strings-check") << std::endl;
641
      }
642
      // repeat if we did not add a lemma or conflict, and we had pending
643
      // facts or lemmas.
644
25224
    } while (!d_state.isInConflict() && !sentLemma && hadPending);
645
  }
646
227975
  Trace("strings-check") << "Theory of strings, done check : " << e << std::endl;
647
227975
  Assert(!d_im.hasPendingFact());
648
227975
  Assert(!d_im.hasPendingLemma());
649
227975
}
650
651
6006
bool TheoryStrings::needsCheckLastEffort() {
652
6006
  if( options::stringGuessModel() ){
653
    return d_esolver.hasExtendedFunctions();
654
  }
655
6006
  return false;
656
}
657
658
/** Conflict when merging two constants */
659
693
void TheoryStrings::conflict(TNode a, TNode b){
660
693
  if (d_state.isInConflict())
661
  {
662
    // already in conflict
663
    return;
664
  }
665
693
  d_im.conflictEqConstantMerge(a, b);
666
693
  Trace("strings-conflict") << "CONFLICT: Eq engine conflict" << std::endl;
667
693
  ++(d_statistics.d_conflictsEqEngine);
668
}
669
670
84119
void TheoryStrings::eqNotifyNewClass(TNode t){
671
84119
  Kind k = t.getKind();
672
84119
  if (k == STRING_LENGTH || k == STRING_TO_CODE)
673
  {
674
25491
    Trace("strings-debug") << "New length eqc : " << t << std::endl;
675
    //we care about the length of this string
676
25491
    d_termReg.registerTerm(t[0], 1);
677
  }
678
84119
  d_eagerSolver.eqNotifyNewClass(t);
679
84119
}
680
681
8459
void TheoryStrings::addCarePairs(TNodeTrie* t1,
682
                                 TNodeTrie* t2,
683
                                 unsigned arity,
684
                                 unsigned depth)
685
{
686
8459
  if( depth==arity ){
687
378
    if( t2!=NULL ){
688
756
      Node f1 = t1->getData();
689
756
      Node f2 = t2->getData();
690
378
      if (!d_equalityEngine->areEqual(f1, f2))
691
      {
692
236
        Trace("strings-cg-debug") << "TheoryStrings::computeCareGraph(): checking function " << f1 << " and " << f2 << std::endl;
693
472
        vector< pair<TNode, TNode> > currentPairs;
694
668
        for (unsigned k = 0; k < f1.getNumChildren(); ++ k) {
695
864
          TNode x = f1[k];
696
864
          TNode y = f2[k];
697
432
          Assert(d_equalityEngine->hasTerm(x));
698
432
          Assert(d_equalityEngine->hasTerm(y));
699
432
          Assert(!d_equalityEngine->areDisequal(x, y, false));
700
432
          Assert(!areCareDisequal(x, y));
701
432
          if (!d_equalityEngine->areEqual(x, y))
702
          {
703
741
            if (d_equalityEngine->isTriggerTerm(x, THEORY_STRINGS)
704
741
                && d_equalityEngine->isTriggerTerm(y, THEORY_STRINGS))
705
            {
706
115
              TNode x_shared = d_equalityEngine->getTriggerTermRepresentative(
707
230
                  x, THEORY_STRINGS);
708
115
              TNode y_shared = d_equalityEngine->getTriggerTermRepresentative(
709
230
                  y, THEORY_STRINGS);
710
115
              currentPairs.push_back(make_pair(x_shared, y_shared));
711
            }
712
          }
713
        }
714
351
        for (unsigned c = 0; c < currentPairs.size(); ++ c) {
715
115
          Trace("strings-cg-pair") << "TheoryStrings::computeCareGraph(): pair : " << currentPairs[c].first << " " << currentPairs[c].second << std::endl;
716
115
          addCarePair(currentPairs[c].first, currentPairs[c].second);
717
        }
718
      }
719
    }
720
  }else{
721
8081
    if( t2==NULL ){
722
7201
      if( depth<(arity-1) ){
723
        //add care pairs internal to each child
724
7261
        for (std::pair<const TNode, TNodeTrie>& tt : t1->d_data)
725
        {
726
4861
          addCarePairs(&tt.second, nullptr, arity, depth + 1);
727
        }
728
      }
729
      //add care pairs based on each pair of non-disequal arguments
730
24608
      for (std::map<TNode, TNodeTrie>::iterator it = t1->d_data.begin();
731
24608
           it != t1->d_data.end();
732
           ++it)
733
      {
734
17407
        std::map<TNode, TNodeTrie>::iterator it2 = it;
735
17407
        ++it2;
736
164637
        for( ; it2 != t1->d_data.end(); ++it2 ){
737
73615
          if (!d_equalityEngine->areDisequal(it->first, it2->first, false))
738
          {
739
1300
            if( !areCareDisequal(it->first, it2->first) ){
740
713
              addCarePairs( &it->second, &it2->second, arity, depth+1 );
741
            }
742
          }
743
        }
744
      }
745
    }else{
746
      //add care pairs based on product of indices, non-disequal arguments
747
2326
      for (std::pair<const TNode, TNodeTrie>& tt1 : t1->d_data)
748
      {
749
4036
        for (std::pair<const TNode, TNodeTrie>& tt2 : t2->d_data)
750
        {
751
2590
          if (!d_equalityEngine->areDisequal(tt1.first, tt2.first, false))
752
          {
753
943
            if (!areCareDisequal(tt1.first, tt2.first))
754
            {
755
545
              addCarePairs(&tt1.second, &tt2.second, arity, depth + 1);
756
            }
757
          }
758
        }
759
      }
760
    }
761
  }
762
8459
}
763
764
6496
void TheoryStrings::computeCareGraph(){
765
  //computing the care graph here is probably still necessary, due to operators that take non-string arguments  TODO: verify
766
6496
  Trace("strings-cg") << "TheoryStrings::computeCareGraph(): Build term indices..." << std::endl;
767
  // Term index for each (type, operator) pair. We require the operator here
768
  // since operators are polymorphic, taking strings/sequences.
769
12992
  std::map<std::pair<TypeNode, Node>, TNodeTrie> index;
770
12992
  std::map< Node, unsigned > arity;
771
6496
  const context::CDList<TNode>& fterms = d_termReg.getFunctionTerms();
772
6496
  size_t functionTerms = fterms.size();
773
40570
  for (unsigned i = 0; i < functionTerms; ++ i) {
774
68148
    TNode f1 = fterms[i];
775
34074
    Trace("strings-cg") << "...build for " << f1 << std::endl;
776
68148
    Node op = f1.getOperator();
777
68148
    std::vector< TNode > reps;
778
34074
    bool has_trigger_arg = false;
779
78561
    for( unsigned j=0; j<f1.getNumChildren(); j++ ){
780
44487
      reps.push_back(d_equalityEngine->getRepresentative(f1[j]));
781
44487
      if (d_equalityEngine->isTriggerTerm(f1[j], THEORY_STRINGS))
782
      {
783
33328
        has_trigger_arg = true;
784
      }
785
    }
786
34074
    if( has_trigger_arg ){
787
50486
      TypeNode ft = utils::getOwnerStringType(f1);
788
50486
      std::pair<TypeNode, Node> ikey = std::pair<TypeNode, Node>(ft, op);
789
25243
      index[ikey].addTerm(f1, reps);
790
25243
      arity[op] = reps.size();
791
    }
792
  }
793
  //for each index
794
8836
  for (std::pair<const std::pair<TypeNode, Node>, TNodeTrie>& ti : index)
795
  {
796
4680
    Trace("strings-cg") << "TheoryStrings::computeCareGraph(): Process index "
797
2340
                        << ti.first << "..." << std::endl;
798
4680
    Node op = ti.first.second;
799
2340
    addCarePairs(&ti.second, nullptr, arity[op], 0);
800
  }
801
6496
}
802
803
void TheoryStrings::checkRegisterTermsPreNormalForm()
804
{
805
  const std::vector<Node>& seqc = d_bsolver.getStringEqc();
806
  for (const Node& eqc : seqc)
807
  {
808
    eq::EqClassIterator eqc_i = eq::EqClassIterator(eqc, d_equalityEngine);
809
    while (!eqc_i.isFinished())
810
    {
811
      Node n = (*eqc_i);
812
      if (!d_bsolver.isCongruent(n))
813
      {
814
        d_termReg.registerTerm(n, 2);
815
      }
816
      ++eqc_i;
817
    }
818
  }
819
}
820
821
9372
void TheoryStrings::checkCodes()
822
{
823
  // ensure that lemmas regarding str.code been added for each constant string
824
  // of length one
825
9372
  if (d_termReg.hasStringCode())
826
  {
827
968
    NodeManager* nm = NodeManager::currentNM();
828
    // str.code applied to the code term for each equivalence class that has a
829
    // code term but is not a constant
830
1767
    std::vector<Node> nconst_codes;
831
    // str.code applied to the proxy variables for each equivalence classes that
832
    // are constants of size one
833
1767
    std::vector<Node> const_codes;
834
968
    const std::vector<Node>& seqc = d_bsolver.getStringEqc();
835
11688
    for (const Node& eqc : seqc)
836
    {
837
10720
      NormalForm& nfe = d_csolver.getNormalForm(eqc);
838
10720
      if (nfe.d_nf.size() == 1 && nfe.d_nf[0].isConst())
839
      {
840
8684
        Node c = nfe.d_nf[0];
841
8684
        Trace("strings-code-debug") << "Get proxy variable for " << c
842
4342
                                    << std::endl;
843
8684
        Node cc = nm->mkNode(kind::STRING_TO_CODE, c);
844
4342
        cc = Rewriter::rewrite(cc);
845
4342
        Assert(cc.isConst());
846
8684
        Node cp = d_termReg.ensureProxyVariableFor(c);
847
8684
        Node vc = nm->mkNode(STRING_TO_CODE, cp);
848
4342
        if (!d_state.areEqual(cc, vc))
849
        {
850
856
          std::vector<Node> emptyVec;
851
428
          d_im.sendInference(emptyVec, cc.eqNode(vc), InferenceId::STRINGS_CODE_PROXY);
852
        }
853
4342
        const_codes.push_back(vc);
854
      }
855
      else
856
      {
857
6378
        EqcInfo* ei = d_state.getOrMakeEqcInfo(eqc, false);
858
6378
        if (ei && !ei->d_codeTerm.get().isNull())
859
        {
860
3592
          Node vc = nm->mkNode(kind::STRING_TO_CODE, ei->d_codeTerm.get());
861
1796
          nconst_codes.push_back(vc);
862
        }
863
      }
864
    }
865
968
    if (d_im.hasProcessed())
866
    {
867
169
      return;
868
    }
869
    // now, ensure that str.code is injective
870
1598
    std::vector<Node> cmps;
871
799
    cmps.insert(cmps.end(), const_codes.rbegin(), const_codes.rend());
872
799
    cmps.insert(cmps.end(), nconst_codes.rbegin(), nconst_codes.rend());
873
2306
    for (unsigned i = 0, num_ncc = nconst_codes.size(); i < num_ncc; i++)
874
    {
875
3014
      Node c1 = nconst_codes[i];
876
1507
      cmps.pop_back();
877
8746
      for (const Node& c2 : cmps)
878
      {
879
14478
        Trace("strings-code-debug")
880
7239
            << "Compare codes : " << c1 << " " << c2 << std::endl;
881
7239
        if (!d_state.areDisequal(c1, c2) && !d_state.areEqual(c1, d_neg_one))
882
        {
883
2008
          Node eq_no = c1.eqNode(d_neg_one);
884
2008
          Node deq = c1.eqNode(c2).negate();
885
2008
          Node eqn = c1[0].eqNode(c2[0]);
886
          // str.code(x)==-1 V str.code(x)!=str.code(y) V x==y
887
2008
          Node inj_lem = nm->mkNode(kind::OR, eq_no, deq, eqn);
888
1004
          deq = Rewriter::rewrite(deq);
889
1004
          d_im.addPendingPhaseRequirement(deq, false);
890
2008
          std::vector<Node> emptyVec;
891
1004
          d_im.sendInference(emptyVec, inj_lem, InferenceId::STRINGS_CODE_INJ);
892
        }
893
      }
894
    }
895
  }
896
}
897
898
void TheoryStrings::checkRegisterTermsNormalForms()
899
{
900
  const std::vector<Node>& seqc = d_bsolver.getStringEqc();
901
  for (const Node& eqc : seqc)
902
  {
903
    NormalForm& nfi = d_csolver.getNormalForm(eqc);
904
    // check if there is a length term for this equivalence class
905
    EqcInfo* ei = d_state.getOrMakeEqcInfo(eqc, false);
906
    Node lt = ei ? ei->d_lengthTerm : Node::null();
907
    if (lt.isNull())
908
    {
909
      Node c = utils::mkNConcat(nfi.d_nf, eqc.getType());
910
      d_termReg.registerTerm(c, 3);
911
    }
912
  }
913
}
914
915
90019
TrustNode TheoryStrings::ppRewrite(TNode atom, std::vector<SkolemLemma>& lems)
916
{
917
90019
  Trace("strings-ppr") << "TheoryStrings::ppRewrite " << atom << std::endl;
918
90019
  if (atom.getKind() == STRING_FROM_CODE)
919
  {
920
    // str.from_code(t) --->
921
    //   witness k. ite(0 <= t < |A|, t = str.to_code(k), k = "")
922
51
    NodeManager* nm = NodeManager::currentNM();
923
102
    Node t = atom[0];
924
102
    Node card = nm->mkConst(Rational(utils::getAlphabetCardinality()));
925
    Node cond =
926
102
        nm->mkNode(AND, nm->mkNode(LEQ, d_zero, t), nm->mkNode(LT, t, card));
927
102
    Node v = nm->mkBoundVar(nm->stringType());
928
102
    Node emp = Word::mkEmptyWord(atom.getType());
929
    Node pred = nm->mkNode(
930
102
        ITE, cond, t.eqNode(nm->mkNode(STRING_TO_CODE, v)), v.eqNode(emp));
931
51
    SkolemManager* sm = nm->getSkolemManager();
932
102
    Node ret = sm->mkSkolem(v, pred, "kFromCode");
933
51
    lems.push_back(SkolemLemma(ret, nullptr));
934
51
    return TrustNode::mkTrustRewrite(atom, ret, nullptr);
935
  }
936
179936
  TrustNode ret;
937
179936
  Node atomRet = atom;
938
179967
  if (options::regExpElim() && atom.getKind() == STRING_IN_REGEXP)
939
  {
940
    // aggressive elimination of regular expression membership
941
56
    ret = d_regexp_elim.eliminateTrusted(atomRet);
942
56
    if (!ret.isNull())
943
    {
944
92
      Trace("strings-ppr") << "  rewrote " << atom << " -> " << ret.getNode()
945
46
                           << " via regular expression elimination."
946
46
                           << std::endl;
947
46
      atomRet = ret.getNode();
948
    }
949
  }
950
89968
  return ret;
951
}
952
953
/** run the given inference step */
954
199498
void TheoryStrings::runInferStep(InferStep s, int effort)
955
{
956
199498
  Trace("strings-process") << "Run " << s;
957
199498
  if (effort > 0)
958
  {
959
30175
    Trace("strings-process") << ", effort = " << effort;
960
  }
961
199498
  Trace("strings-process") << "..." << std::endl;
962
199498
  switch (s)
963
  {
964
25224
    case CHECK_INIT: d_bsolver.checkInit(); break;
965
22075
    case CHECK_CONST_EQC: d_bsolver.checkConstantEquivalenceClasses(); break;
966
31915
    case CHECK_EXTF_EVAL: d_esolver.checkExtfEval(effort); break;
967
20839
    case CHECK_CYCLES: d_csolver.checkCycles(); break;
968
20839
    case CHECK_FLAT_FORMS: d_csolver.checkFlatForms(); break;
969
    case CHECK_REGISTER_TERMS_PRE_NF: checkRegisterTermsPreNormalForm(); break;
970
15405
    case CHECK_NORMAL_FORMS_EQ: d_csolver.checkNormalFormsEq(); break;
971
9730
    case CHECK_NORMAL_FORMS_DEQ: d_csolver.checkNormalFormsDeq(); break;
972
9372
    case CHECK_CODES: checkCodes(); break;
973
9036
    case CHECK_LENGTH_EQC: d_csolver.checkLengthsEqc(); break;
974
    case CHECK_REGISTER_TERMS_NF: checkRegisterTermsNormalForms(); break;
975
20335
    case CHECK_EXTF_REDUCTION: d_esolver.checkExtfReductions(effort); break;
976
7526
    case CHECK_MEMBERSHIP: d_rsolver.checkMemberships(); break;
977
7202
    case CHECK_CARDINALITY: d_bsolver.checkCardinality(); break;
978
    default: Unreachable(); break;
979
  }
980
398996
  Trace("strings-process") << "Done " << s
981
398996
                           << ", addedFact = " << d_im.hasPendingFact()
982
398996
                           << ", addedLemma = " << d_im.hasPendingLemma()
983
398996
                           << ", conflict = " << d_state.isInConflict()
984
199498
                           << std::endl;
985
199498
}
986
987
25224
void TheoryStrings::runStrategy(Theory::Effort e)
988
{
989
25224
  std::vector<std::pair<InferStep, int> >::iterator it = d_strat.stepBegin(e);
990
  std::vector<std::pair<InferStep, int> >::iterator stepEnd =
991
25224
      d_strat.stepEnd(e);
992
993
25224
  Trace("strings-process") << "----check, next round---" << std::endl;
994
771366
  while (it != stepEnd)
995
  {
996
391093
    InferStep curr = it->first;
997
391093
    if (curr == BREAK)
998
    {
999
191595
      if (d_im.hasProcessed())
1000
      {
1001
17321
        break;
1002
      }
1003
    }
1004
    else
1005
    {
1006
199498
      runInferStep(curr, it->second);
1007
199498
      if (d_state.isInConflict())
1008
      {
1009
701
        break;
1010
      }
1011
    }
1012
373071
    ++it;
1013
  }
1014
25224
  Trace("strings-process") << "----finished round---" << std::endl;
1015
25224
}
1016
1017
std::string TheoryStrings::debugPrintStringsEqc()
1018
{
1019
  std::stringstream ss;
1020
  for (unsigned t = 0; t < 2; t++)
1021
  {
1022
    eq::EqClassesIterator eqcs2_i = eq::EqClassesIterator(d_equalityEngine);
1023
    ss << (t == 0 ? "STRINGS:" : "OTHER:") << std::endl;
1024
    while (!eqcs2_i.isFinished())
1025
    {
1026
      Node eqc = (*eqcs2_i);
1027
      bool print = (t == 0 && eqc.getType().isStringLike())
1028
                   || (t == 1 && !eqc.getType().isStringLike());
1029
      if (print)
1030
      {
1031
        eq::EqClassIterator eqc2_i = eq::EqClassIterator(eqc, d_equalityEngine);
1032
        ss << "Eqc( " << eqc << " ) : { ";
1033
        while (!eqc2_i.isFinished())
1034
        {
1035
          if ((*eqc2_i) != eqc && (*eqc2_i).getKind() != kind::EQUAL)
1036
          {
1037
            ss << (*eqc2_i) << " ";
1038
          }
1039
          ++eqc2_i;
1040
        }
1041
        ss << " } " << std::endl;
1042
        EqcInfo* ei = d_state.getOrMakeEqcInfo(eqc, false);
1043
        if (ei)
1044
        {
1045
          Trace("strings-eqc-debug")
1046
              << "* Length term : " << ei->d_lengthTerm.get() << std::endl;
1047
          Trace("strings-eqc-debug")
1048
              << "* Cardinality lemma k : " << ei->d_cardinalityLemK.get()
1049
              << std::endl;
1050
          Trace("strings-eqc-debug")
1051
              << "* Normalization length lemma : "
1052
              << ei->d_normalizedLength.get() << std::endl;
1053
        }
1054
      }
1055
      ++eqcs2_i;
1056
    }
1057
    ss << std::endl;
1058
  }
1059
  ss << std::endl;
1060
  return ss.str();
1061
}
1062
1063
}  // namespace strings
1064
}  // namespace theory
1065
28194
}  // namespace cvc5