GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/core_solver.h Lines: 2 2 100.0 %
Date: 2021-09-29 Branches: 9 22 40.9 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Core solver for the theory of strings, responsible for reasoning
14
 * string concatenation plus length constraints.
15
 */
16
17
#include "cvc5_private.h"
18
19
#ifndef CVC5__THEORY__STRINGS__CORE_SOLVER_H
20
#define CVC5__THEORY__STRINGS__CORE_SOLVER_H
21
22
#include "context/cdhashmap.h"
23
#include "context/cdhashset.h"
24
#include "context/cdlist.h"
25
#include "smt/env_obj.h"
26
#include "theory/strings/base_solver.h"
27
#include "theory/strings/infer_info.h"
28
#include "theory/strings/inference_manager.h"
29
#include "theory/strings/normal_form.h"
30
#include "theory/strings/solver_state.h"
31
#include "theory/strings/term_registry.h"
32
33
namespace cvc5 {
34
namespace theory {
35
namespace strings {
36
37
/**
38
 * This data structure encapsulates an inference for the core solver of the
39
 * theory of strings. This includes the form of the inference to be processed
40
 * by the inference manager, the side effects it generates for the core solver,
41
 * and information used for heuristics and debugging.
42
 */
43
29313
class CoreInferInfo
44
{
45
 public:
46
  CoreInferInfo(InferenceId id);
47
44442
  ~CoreInferInfo() {}
48
  /** The infer info of this class */
49
  InferInfo d_infer;
50
  /**
51
   * The pending phase requirements, see InferenceManager::sendPhaseRequirement.
52
   */
53
  std::map<Node, bool> d_pendingPhase;
54
  /**
55
   * The index in the normal forms under which this inference is addressing.
56
   * For example, if the inference is inferring x = y from |x|=|y| and
57
   *   w ++ x ++ ... = w ++ y ++ ...
58
   * then d_index is 1, since x and y are at index 1 in these concat terms.
59
   */
60
  unsigned d_index;
61
  /**
62
   * The normal form pair that is cached as a result of this inference.
63
   */
64
  Node d_nfPair[2];
65
  /** for debugging
66
   *
67
   * The base pair of strings d_i/d_j that led to the inference, and whether
68
   * (d_rev) we were processing the normal forms of these strings in reverse
69
   * direction.
70
   */
71
  Node d_i;
72
  Node d_j;
73
  bool d_rev;
74
};
75
76
/** The core solver for the theory of strings
77
 *
78
 * This implements techniques for handling (dis)equalities involving
79
 * string concatenation terms based on the procedure by Liang et al CAV 2014.
80
 */
81
class CoreSolver : protected EnvObj
82
{
83
  friend class InferenceManager;
84
  using NodeIntMap = context::CDHashMap<Node, int>;
85
  using NodeSet = context::CDHashSet<Node>;
86
87
 public:
88
  CoreSolver(Env& env,
89
             SolverState& s,
90
             InferenceManager& im,
91
             TermRegistry& tr,
92
             BaseSolver& bs);
93
  ~CoreSolver();
94
95
  //-----------------------inference steps
96
  /** check cycles
97
   *
98
   * This inference schema ensures that a containment ordering < over the
99
   * string equivalence classes is acyclic. We define this ordering < such that
100
   * for equivalence classes e1 = { t1...tn } and e2 = { s1...sm }, e1 < e2
101
   * if there exists a ti whose flat form (see below) is [w1...sj...wk] for
102
   * some i,j. If e1 < ... < en < e1 for some chain, we infer that the flat
103
   * form components that do not constitute this chain, e.g. (w1...wk) \ sj
104
   * in the flat form above, must be empty.
105
   *
106
   * For more details, see the inference S-Cycle in Liang et al CAV 2014.
107
   */
108
  void checkCycles();
109
  /** check flat forms
110
   *
111
   * This applies an inference schema based on "flat forms". The flat form of a
112
   * string term t is a vector of representative terms [r1, ..., rn] such that
113
   * t is of the form t1 ++ ... ++ tm and r1 ++ ... ++ rn is equivalent to
114
   * rewrite( [t1] ++ ... ++ [tm] ), where [t1] denotes the representative of
115
   * the equivalence class containing t1. For example, if t is y ++ z ++ z,
116
   * E is { y = "", w = z }, and w is the representative of the equivalence
117
   * class { w, z }, then the flat form of t is [w, w]. Say t1 and t2 are terms
118
   * in the same equivalence classes with flat forms [r1...rn] and [s1...sm].
119
   * We may infer various facts based on this pair of terms. For example:
120
   *   ri = si, if ri != si, rj == sj for each j < i, and len(ri)=len(si),
121
   *   rn = sn, if n=m and rj == sj for each j < n,
122
   *   ri = empty, if n=m+1 and ri == rj for each i=1,...,m.
123
   * We refer to these as "unify", "endpoint-eq" and "endpoint-emp" inferences
124
   * respectively.
125
   *
126
   * Notice that this inference scheme is an optimization and not needed for
127
   * model-soundness. The motivation for this schema is that it is simpler than
128
   * checkNormalFormsEq, which can be seen as a recursive version of this
129
   * schema (see difference of "normal form" vs "flat form" below), and
130
   * checkNormalFormsEq is complete, in the sense that if it passes with no
131
   * inferences, we are ensured that all string equalities in the current
132
   * context are satisfied.
133
   *
134
   * Must call checkCycles before this function in a strategy.
135
   */
136
  void checkFlatForms();
137
  /** check normal forms equalities
138
   *
139
   * This applies an inference schema based on "normal forms". The normal form
140
   * of an equivalence class of string terms e = {t1, ..., tn} union {x1....xn},
141
   * where t1...tn are concatenation terms is a vector of representative terms
142
   * [r1, ..., rm] such that:
143
   * (1) if n=0, then m=1 and r1 is the representative of e,
144
   * (2) if n>0, say
145
   *   t1 = t^1_1 ++ ... ++ t^1_m_1
146
   *   ...
147
   *   tn = t^1_n ++ ... ++ t^_m_n
148
   * for *each* i=1, ..., n, the result of concenating the normal forms of
149
   * t^1_1 ++ ... ++ t^1_m_1 is equal to [r1, ..., rm]. If an equivalence class
150
   * can be assigned a normal form, then all equalities between ti and tj are
151
   * satisfied by all models that correspond to extensions of the current
152
   * assignment. For further detail on this terminology, see Liang et al
153
   * CAV 2014.
154
   *
155
   * Notice that all constant words are implicitly considered concatentation
156
   * of their characters, e.g. "abc" is treated as "a" ++ "b" ++ "c".
157
   *
158
   * At a high level, we build normal forms for equivalence classes bottom-up,
159
   * starting with equivalence classes that are minimal with respect to the
160
   * containment ordering < computed during checkCycles. While computing a
161
   * normal form for an equivalence class, we may infer equalities between
162
   * components of strings that must be equal (e.g. x=y when x++z == y++w when
163
   * len(x)==len(y) is asserted), derive conflicts if two strings have disequal
164
   * prefixes/suffixes (e.g. "a" ++ x == "b" ++ y is a conflict), or split
165
   * string terms into smaller components using fresh skolem variables (see
166
   * Inference values with names "SPLIT"). We also may introduce regular
167
   * expression constraints in this method for looping word equations (see
168
   * the Inference INFER_FLOOP).
169
   *
170
   * If this inference schema returns no facts, lemmas, or conflicts, then
171
   * we have successfully assigned normal forms for all equivalence classes, as
172
   * stored in d_normal_forms. Otherwise, this method may add a fact, lemma, or
173
   * conflict based on inferences in the Inference enumeration above.
174
   */
175
  void checkNormalFormsEq();
176
  /** check normal forms disequalities
177
   *
178
   * This inference schema can be seen as the converse of the above schema. In
179
   * particular, it ensures that each pair of distinct equivalence classes
180
   * e1 and e2 have distinct normal forms.
181
   *
182
   * This method considers all pairs of distinct equivalence classes (e1,e2)
183
   * such that len(x1)==len(x2) is asserted for some x1 in e1 and x2 in e2. It
184
   * then traverses the normal forms of x1 and x2, say they are [r1, ..., rn]
185
   * and [s1, ..., sm]. For the minimial i such that ri!=si, if ri and si are
186
   * disequal and have the same length, then x1 and x2 have distinct normal
187
   * forms. Otherwise, we may add splitting lemmas on the length of ri and si,
188
   * or split on an equality between ri and si.
189
   *
190
   * If this inference schema returns no facts, lemmas, or conflicts, then all
191
   * disequalities between string terms are satisfied by all models that are
192
   * extensions of the current assignment.
193
   */
194
  void checkNormalFormsDeq();
195
  /** check lengths for equivalence classes
196
   *
197
   * This inference schema adds lemmas of the form:
198
   *   E => len( x ) = rewrite( len( r1 ++ ... ++ rn ) )
199
   * where [r1, ..., rn] is the normal form of the equivalence class containing
200
   * x. This schema is not required for correctness but experimentally has
201
   * shown to be helpful.
202
   */
203
  void checkLengthsEqc();
204
  //-----------------------end inference steps
205
206
  //--------------------------- query functions
207
  /**
208
   * Get normal form for string term n. For details on this data structure,
209
   * see theory/strings/normal_form.h.
210
   *
211
   * This query is valid after a successful call to checkNormalFormsEq, e.g.
212
   * a call where the inference manager was not given any lemmas or inferences.
213
   */
214
  NormalForm& getNormalForm(Node n);
215
  /** get normal string
216
   *
217
   * This method returns the node that is equivalent to the normal form of x,
218
   * and adds the corresponding explanation to nf_exp.
219
   *
220
   * For example, if x = y ++ z is an assertion in the current context, then
221
   * this method returns the term y ++ z and adds x = y ++ z to nf_exp.
222
   */
223
  Node getNormalString(Node x, std::vector<Node>& nf_exp);
224
  //-------------------------- end query functions
225
226
  /**
227
   * This returns the conclusion of the proof rule corresponding to splitting
228
   * on the arrangement of terms x and y appearing in an equation of the form
229
   *   x ++ x' = y ++ y' or x' ++ x = y' ++ y
230
   * where we are in the second case if isRev is true. This method is called
231
   * both by the core solver and by the strings proof checker.
232
   *
233
   * @param x The first term
234
   * @param y The second term
235
   * @param rule The proof rule whose conclusion we are asking for
236
   * @param isRev Whether the equation is in a reverse direction
237
   * @param skc The skolem cache (to allocate fresh variables if necessary)
238
   * @param newSkolems The vector to add new variables to
239
   * @return The conclusion of the inference.
240
   */
241
  static Node getConclusion(Node x,
242
                            Node y,
243
                            PfRule rule,
244
                            bool isRev,
245
                            SkolemCache* skc,
246
                            std::vector<Node>& newSkolems);
247
  /**
248
   * Get sufficient non-empty overlap of string constants c and d.
249
   *
250
   * This is called when handling equations of the form:
251
   *   x ++ d ++ ... = c ++ ...
252
   * when x is non-empty and non-constant.
253
   *
254
   * This returns the maximal index in c which x must have as a prefix, which
255
   * notice is an integer >= 1 since x is non-empty.
256
   *
257
   * @param c The first constant
258
   * @param d The second constant
259
   * @param isRev Whether the equation is in the reverse direction
260
   * @return The position in c.
261
   */
262
  static size_t getSufficientNonEmptyOverlap(Node c, Node d, bool isRev);
263
  /**
264
   * This returns the conclusion of the decompose proof rule. This returns
265
   * a conjunction of splitting string x into pieces based on length l, e.g.:
266
   *   x = k_1 ++ k_2
267
   * where k_1 (resp. k_2) is a skolem corresponding to a substring of x of
268
   * length l if isRev is false (resp. true). The function also adds a
269
   * length constraint len(k_1) = l (resp. len(k_2) = l). Note that adding this
270
   * constraint to the conclusion is *not* optional, since the skolems k_1 and
271
   * k_2 may be shared, hence their length constraint must be guarded by the
272
   * premises of this inference.
273
   *
274
   * @param x The string term
275
   * @param l The length term
276
   * @param isRev Whether the equation is in a reverse direction
277
   * @param skc The skolem cache (to allocate fresh variables if necessary)
278
   * @param newSkolems The vector to add new variables to
279
   * @return The conclusion of the inference.
280
   */
281
  static Node getDecomposeConclusion(Node x,
282
                                     Node l,
283
                                     bool isRev,
284
                                     SkolemCache* skc,
285
                                     std::vector<Node>& newSkolems);
286
287
 private:
288
  /**
289
   * This processes the infer info ii as an inference. In more detail, it calls
290
   * the inference manager to process the inference, and updates the set of
291
   * normal form pairs. Returns true if the conclusion of ii was not true
292
   * after rewriting. If the conclusion is true, this method does nothing.
293
   */
294
  bool processInferInfo(CoreInferInfo& ii);
295
  /** Add that (n1,n2) is a normal form pair in the current context. */
296
  void addNormalFormPair(Node n1, Node n2);
297
  /** Is (n1,n2) a normal form pair in the current context? */
298
  bool isNormalFormPair(Node n1, Node n2);
299
300
  //--------------------------for checkFlatForm
301
  /**
302
   * This checks whether there are flat form inferences between eqc[start] and
303
   * eqc[j] for some j>start. If the flag isRev is true, we check for flat form
304
   * interferences in the reverse direction of the flat forms (note:
305
   * `d_flat_form` and `d_flat_form_index` must be in reverse order if `isRev`
306
   * is true). For more details, see checkFlatForms below.
307
   */
308
  void checkFlatForm(std::vector<Node>& eqc, size_t start, bool isRev);
309
  /** debug print current flat forms on trace tc */
310
  void debugPrintFlatForms(const char* tc);
311
  //--------------------------end for checkFlatForm
312
313
  //--------------------------for checkCycles
314
  Node checkCycles(Node eqc, std::vector<Node>& curr, std::vector<Node>& exp);
315
  //--------------------------end for checkCycles
316
317
  //--------------------------for checkNormalFormsEq
318
  /** normalize equivalence class
319
   *
320
   * This method attempts to build a "normal form" for the equivalence class
321
   * of string term n (for more details on normal forms, see normal_form.h
322
   * or see Liang et al CAV 2014). In particular, this method checks whether the
323
   * current normal form for each term in this equivalence class is identical.
324
   * If it is not, then we add an inference via sendInference and abort the
325
   * call.
326
   *
327
   * stype is the string-like type of the equivalence class we are processing.
328
   */
329
  void normalizeEquivalenceClass(Node n, TypeNode stype);
330
  /**
331
   * For each term in the equivalence class of eqc, this adds data regarding its
332
   * normal form to normal_forms. The map term_to_nf_index maps terms to the
333
   * index in normal_forms where their normal form data is located.
334
   *
335
   * stype is the string-like type of the equivalence class we are processing.
336
   */
337
  void getNormalForms(Node eqc,
338
                      std::vector<NormalForm>& normal_forms,
339
                      std::map<Node, unsigned>& term_to_nf_index,
340
                      TypeNode stype);
341
  /** process normalize equivalence class
342
   *
343
   * This is called when an equivalence class eqc contains a set of terms that
344
   * have normal forms given by the argument normal_forms. It either
345
   * verifies that all normal forms in this vector are identical, or otherwise
346
   * adds a conflict, lemma, or inference via the sendInference method.
347
   *
348
   * To prioritize one inference versus another, it builds a set of possible
349
   * inferences, at most two for each pair of distinct normal forms,
350
   * corresponding to processing the normal form pair in the (forward, reverse)
351
   * directions. Once all possible inferences are recorded, it executes the
352
   * one with highest priority based on the enumeration type Inference.
353
   *
354
   * stype is the string-like type of the equivalence class we are processing.
355
   */
356
  void processNEqc(Node eqc,
357
                   std::vector<NormalForm>& normal_forms,
358
                   TypeNode stype);
359
  /** process simple normal equality
360
   *
361
   * This method is called when two equal terms have normal forms nfi and nfj.
362
   * It adds (typically at most one) possible inference to the vector pinfer.
363
   * This inference is in the form of an CoreInferInfo object, which stores the
364
   * necessary information regarding how to process the inference.
365
   *
366
   * index: The index in the normal form vectors (nfi.d_nf and nfj.d_nf) that
367
   *   we are currently checking. This method will increment this index until
368
   *   it finds an index where these vectors differ, or until it reaches the
369
   *   end of these vectors.
370
   * isRev: Whether we are processing the normal forms in reverse direction.
371
   *   Notice in this case the normal form vectors have been reversed, hence,
372
   *   many operations are identical to the forward case, e.g. index is
373
   *   incremented not decremented, while others require special care, e.g.
374
   *   constant strings "ABC" in the normal form vectors are not reversed to
375
   *   "CBA" and hence all operations should assume a flipped semantics for
376
   *   constants when isRev is true,
377
   * rproc: the number of string components on the suffix of the normal form of
378
   *   nfi and nfj that were already processed. This is used when using
379
   *   fowards/backwards traversals of normal forms to ensure that duplicate
380
   *   inferences are not processed.
381
   * pinfer: the set of possible inferences we add to.
382
   *
383
   * stype is the string-like type of the equivalence class we are processing.
384
   */
385
  void processSimpleNEq(NormalForm& nfi,
386
                        NormalForm& nfj,
387
                        unsigned& index,
388
                        bool isRev,
389
                        unsigned rproc,
390
                        std::vector<CoreInferInfo>& pinfer,
391
                        TypeNode stype);
392
  //--------------------------end for checkNormalFormsEq
393
394
  //--------------------------for checkNormalFormsEq with loops
395
  bool detectLoop(NormalForm& nfi,
396
                  NormalForm& nfj,
397
                  int index,
398
                  int& loop_in_i,
399
                  int& loop_in_j,
400
                  unsigned rproc);
401
402
  /**
403
   * Result of processLoop() below.
404
   */
405
  enum class ProcessLoopResult
406
  {
407
    /** Loop processing made an inference */
408
    INFERENCE,
409
    /** Loop processing detected a conflict */
410
    CONFLICT,
411
    /** Loop not processed or no loop detected */
412
    SKIPPED,
413
  };
414
415
  ProcessLoopResult processLoop(NormalForm& nfi,
416
                                NormalForm& nfj,
417
                                int loop_index,
418
                                int index,
419
                                CoreInferInfo& info);
420
  //--------------------------end for checkNormalFormsEq with loops
421
422
  //--------------------------for checkNormalFormsDeq
423
424
  /**
425
   * Given a pair of disequal strings with the same length, checks whether the
426
   * disequality holds. This may result in inferences or conflicts.
427
   *
428
   * @param n1 The first string in the disequality
429
   * @param n2 The second string in the disequality
430
   */
431
  void processDeq(Node n1, Node n2);
432
433
  /**
434
   * Given a pair of disequal strings with the same length and their normal
435
   * forms, checks whether the disequality holds. This may result in
436
   * inferences.
437
   *
438
   * @param nfi The normal form for the first string in the disequality
439
   * @param nfj The normal form for the second string in the disequality
440
   * @param ni The first string in the disequality
441
   * @param nj The second string in the disequality
442
   * @return true if the disequality is satisfied, false otherwise
443
   */
444
  bool processReverseDeq(std::vector<Node>& nfi,
445
                         std::vector<Node>& nfj,
446
                         Node ni,
447
                         Node nj);
448
449
  /**
450
   * Given a pair of disequal strings with the same length and their normal
451
   * forms, performs some simple checks whether the disequality holds. The
452
   * check is done starting from a given index and can either be performed on
453
   * reversed normal forms or the original normal forms. If the function cannot
454
   * show that a disequality holds, it updates the index to point to the first
455
   * element in the normal forms for which the relationship is unclear.
456
   *
457
   * @param nfi The normal form for the first string in the disequality
458
   * @param nfj The normal form for the second string in the disequality
459
   * @param ni The first string in the disequality
460
   * @param nj The second string in the disequality
461
   * @param index The index to start at. If this function returns false, the
462
   *              index points to the first index in the normal forms for which
463
   *              it is not known whether they are equal or disequal
464
   * @param isRev This should be true if the normal forms are reversed, false
465
   *              otherwise
466
   * @return true if the disequality is satisfied, false otherwise
467
   */
468
  bool processSimpleDeq(std::vector<Node>& nfi,
469
                        std::vector<Node>& nfj,
470
                        Node ni,
471
                        Node nj,
472
                        size_t& index,
473
                        bool isRev);
474
  /**
475
   * Process disequality by extensionality. If necessary, this may result
476
   * in inferences that follow from this disequality of the form:
477
   *   (not (= n1 n2)) => (not (= k1 k2))
478
   * where k1, k2 are either strings of length one or elements of a sequence.
479
   *
480
   * @param n1 The first string in the disequality
481
   * @param n2 The second string in the disequality
482
   */
483
  void processDeqExtensionality(Node n1, Node n2);
484
  //--------------------------end for checkNormalFormsDeq
485
486
  /** The solver state object */
487
  SolverState& d_state;
488
  /** The (custom) output channel of the theory of strings */
489
  InferenceManager& d_im;
490
  /** Reference to the term registry of theory of strings */
491
  TermRegistry& d_termReg;
492
  /** reference to the base solver, used for certain queries */
493
  BaseSolver& d_bsolver;
494
  /** Commonly used constants */
495
  Node d_true;
496
  Node d_false;
497
  Node d_zero;
498
  Node d_one;
499
  Node d_neg_one;
500
  /** empty vector (used for trivial explanations) */
501
  std::vector<Node> d_emptyVec;
502
  /**
503
   * The list of equivalence classes of type string. These are ordered based
504
   * on the ordering described in checkCycles.
505
   */
506
  std::vector<Node> d_strings_eqc;
507
  /** map from terms to their normal forms */
508
  std::map<Node, NormalForm> d_normal_form;
509
  /**
510
   * In certain cases, we know that two terms are equivalent despite
511
   * not having to verify their normal forms are identical. For example,
512
   * after applying the R-Loop rule to two terms a and b, we know they
513
   * are entailed to be equal in the current context without having
514
   * to look at their normal forms. We call (a,b) a normal form pair.
515
   *
516
   * We map representative terms a to a list of nodes b1,...,bn such that
517
   * (a,b1) ... (a, bn) are normal form pairs. This list is SAT-context
518
   * dependent. We use a context-dependent integer along with a context
519
   * indepedent map from nodes to lists of nodes to model this, given by
520
   * the two data members below.
521
   */
522
  NodeIntMap d_nfPairs;
523
  std::map<Node, std::vector<Node> > d_nf_pairs_data;
524
  /** list of non-congruent concat terms in each equivalence class */
525
  std::map<Node, std::vector<Node> > d_eqc;
526
  /**
527
   * The flat form for each equivalence class. For a term (str.++ t1 ... tn),
528
   * this is a list of the form (r_{i1} ... r_{im}) where each empty t1...tn
529
   * is dropped and the others are replaced in order with their representative.
530
   */
531
  std::map<Node, std::vector<Node> > d_flat_form;
532
  /**
533
   * For each component r_{i1} ... r_{im} in a flat form, this stores
534
   * the argument number of the t1 ... tn they were generated from.
535
   */
536
  std::map<Node, std::vector<int> > d_flat_form_index;
537
  /** Set of equalities for which we have applied extensionality. */
538
  NodeSet d_extDeq;
539
}; /* class CoreSolver */
540
541
}  // namespace strings
542
}  // namespace theory
543
}  // namespace cvc5
544
545
#endif /* CVC5__THEORY__STRINGS__CORE_SOLVER_H */