GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/core_solver.h Lines: 2 2 100.0 %
Date: 2021-03-22 Branches: 9 22 40.9 %

Line Exec Source
1
/*********************                                                        */
2
/*! \file core_solver.h
3
 ** \verbatim
4
 ** Top contributors (to current version):
5
 **   Andrew Reynolds, Andres Noetzli, Tianyi Liang
6
 ** This file is part of the CVC4 project.
7
 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 ** in the top-level source directory and their institutional affiliations.
9
 ** All rights reserved.  See the file COPYING in the top-level source
10
 ** directory for licensing information.\endverbatim
11
 **
12
 ** \brief Core solver for the theory of strings, responsible for reasoning
13
 ** string concatenation plus length constraints.
14
 **/
15
16
#include "cvc4_private.h"
17
18
#ifndef CVC4__THEORY__STRINGS__CORE_SOLVER_H
19
#define CVC4__THEORY__STRINGS__CORE_SOLVER_H
20
21
#include "context/cdhashset.h"
22
#include "context/cdlist.h"
23
#include "theory/strings/base_solver.h"
24
#include "theory/strings/infer_info.h"
25
#include "theory/strings/inference_manager.h"
26
#include "theory/strings/normal_form.h"
27
#include "theory/strings/solver_state.h"
28
#include "theory/strings/term_registry.h"
29
30
namespace CVC4 {
31
namespace theory {
32
namespace strings {
33
34
/**
35
 * This data structure encapsulates an inference for the core solver of the
36
 * theory of strings. This includes the form of the inference to be processed
37
 * by the inference manager, the side effects it generates for the core solver,
38
 * and information used for heuristics and debugging.
39
 */
40
25522
class CoreInferInfo
41
{
42
 public:
43
  CoreInferInfo(InferenceId id);
44
38994
  ~CoreInferInfo() {}
45
  /** The infer info of this class */
46
  InferInfo d_infer;
47
  /**
48
   * The pending phase requirements, see InferenceManager::sendPhaseRequirement.
49
   */
50
  std::map<Node, bool> d_pendingPhase;
51
  /**
52
   * The index in the normal forms under which this inference is addressing.
53
   * For example, if the inference is inferring x = y from |x|=|y| and
54
   *   w ++ x ++ ... = w ++ y ++ ...
55
   * then d_index is 1, since x and y are at index 1 in these concat terms.
56
   */
57
  unsigned d_index;
58
  /**
59
   * The normal form pair that is cached as a result of this inference.
60
   */
61
  Node d_nfPair[2];
62
  /** for debugging
63
   *
64
   * The base pair of strings d_i/d_j that led to the inference, and whether
65
   * (d_rev) we were processing the normal forms of these strings in reverse
66
   * direction.
67
   */
68
  Node d_i;
69
  Node d_j;
70
  bool d_rev;
71
};
72
73
/** The core solver for the theory of strings
74
 *
75
 * This implements techniques for handling (dis)equalities involving
76
 * string concatenation terms based on the procedure by Liang et al CAV 2014.
77
 */
78
class CoreSolver
79
{
80
  friend class InferenceManager;
81
  using NodeIntMap = context::CDHashMap<Node, int, NodeHashFunction>;
82
83
 public:
84
  CoreSolver(SolverState& s,
85
             InferenceManager& im,
86
             TermRegistry& tr,
87
             BaseSolver& bs);
88
  ~CoreSolver();
89
90
  //-----------------------inference steps
91
  /** check cycles
92
   *
93
   * This inference schema ensures that a containment ordering < over the
94
   * string equivalence classes is acyclic. We define this ordering < such that
95
   * for equivalence classes e1 = { t1...tn } and e2 = { s1...sm }, e1 < e2
96
   * if there exists a ti whose flat form (see below) is [w1...sj...wk] for
97
   * some i,j. If e1 < ... < en < e1 for some chain, we infer that the flat
98
   * form components that do not constitute this chain, e.g. (w1...wk) \ sj
99
   * in the flat form above, must be empty.
100
   *
101
   * For more details, see the inference S-Cycle in Liang et al CAV 2014.
102
   */
103
  void checkCycles();
104
  /** check flat forms
105
   *
106
   * This applies an inference schema based on "flat forms". The flat form of a
107
   * string term t is a vector of representative terms [r1, ..., rn] such that
108
   * t is of the form t1 ++ ... ++ tm and r1 ++ ... ++ rn is equivalent to
109
   * rewrite( [t1] ++ ... ++ [tm] ), where [t1] denotes the representative of
110
   * the equivalence class containing t1. For example, if t is y ++ z ++ z,
111
   * E is { y = "", w = z }, and w is the representative of the equivalence
112
   * class { w, z }, then the flat form of t is [w, w]. Say t1 and t2 are terms
113
   * in the same equivalence classes with flat forms [r1...rn] and [s1...sm].
114
   * We may infer various facts based on this pair of terms. For example:
115
   *   ri = si, if ri != si, rj == sj for each j < i, and len(ri)=len(si),
116
   *   rn = sn, if n=m and rj == sj for each j < n,
117
   *   ri = empty, if n=m+1 and ri == rj for each i=1,...,m.
118
   * We refer to these as "unify", "endpoint-eq" and "endpoint-emp" inferences
119
   * respectively.
120
   *
121
   * Notice that this inference scheme is an optimization and not needed for
122
   * model-soundness. The motivation for this schema is that it is simpler than
123
   * checkNormalFormsEq, which can be seen as a recursive version of this
124
   * schema (see difference of "normal form" vs "flat form" below), and
125
   * checkNormalFormsEq is complete, in the sense that if it passes with no
126
   * inferences, we are ensured that all string equalities in the current
127
   * context are satisfied.
128
   *
129
   * Must call checkCycles before this function in a strategy.
130
   */
131
  void checkFlatForms();
132
  /** check normal forms equalities
133
   *
134
   * This applies an inference schema based on "normal forms". The normal form
135
   * of an equivalence class of string terms e = {t1, ..., tn} union {x1....xn},
136
   * where t1...tn are concatenation terms is a vector of representative terms
137
   * [r1, ..., rm] such that:
138
   * (1) if n=0, then m=1 and r1 is the representative of e,
139
   * (2) if n>0, say
140
   *   t1 = t^1_1 ++ ... ++ t^1_m_1
141
   *   ...
142
   *   tn = t^1_n ++ ... ++ t^_m_n
143
   * for *each* i=1, ..., n, the result of concenating the normal forms of
144
   * t^1_1 ++ ... ++ t^1_m_1 is equal to [r1, ..., rm]. If an equivalence class
145
   * can be assigned a normal form, then all equalities between ti and tj are
146
   * satisfied by all models that correspond to extensions of the current
147
   * assignment. For further detail on this terminology, see Liang et al
148
   * CAV 2014.
149
   *
150
   * Notice that all constant words are implicitly considered concatentation
151
   * of their characters, e.g. "abc" is treated as "a" ++ "b" ++ "c".
152
   *
153
   * At a high level, we build normal forms for equivalence classes bottom-up,
154
   * starting with equivalence classes that are minimal with respect to the
155
   * containment ordering < computed during checkCycles. While computing a
156
   * normal form for an equivalence class, we may infer equalities between
157
   * components of strings that must be equal (e.g. x=y when x++z == y++w when
158
   * len(x)==len(y) is asserted), derive conflicts if two strings have disequal
159
   * prefixes/suffixes (e.g. "a" ++ x == "b" ++ y is a conflict), or split
160
   * string terms into smaller components using fresh skolem variables (see
161
   * Inference values with names "SPLIT"). We also may introduce regular
162
   * expression constraints in this method for looping word equations (see
163
   * the Inference INFER_FLOOP).
164
   *
165
   * If this inference schema returns no facts, lemmas, or conflicts, then
166
   * we have successfully assigned normal forms for all equivalence classes, as
167
   * stored in d_normal_forms. Otherwise, this method may add a fact, lemma, or
168
   * conflict based on inferences in the Inference enumeration above.
169
   */
170
  void checkNormalFormsEq();
171
  /** check normal forms disequalities
172
   *
173
   * This inference schema can be seen as the converse of the above schema. In
174
   * particular, it ensures that each pair of distinct equivalence classes
175
   * e1 and e2 have distinct normal forms.
176
   *
177
   * This method considers all pairs of distinct equivalence classes (e1,e2)
178
   * such that len(x1)==len(x2) is asserted for some x1 in e1 and x2 in e2. It
179
   * then traverses the normal forms of x1 and x2, say they are [r1, ..., rn]
180
   * and [s1, ..., sm]. For the minimial i such that ri!=si, if ri and si are
181
   * disequal and have the same length, then x1 and x2 have distinct normal
182
   * forms. Otherwise, we may add splitting lemmas on the length of ri and si,
183
   * or split on an equality between ri and si.
184
   *
185
   * If this inference schema returns no facts, lemmas, or conflicts, then all
186
   * disequalities between string terms are satisfied by all models that are
187
   * extensions of the current assignment.
188
   */
189
  void checkNormalFormsDeq();
190
  /** check lengths for equivalence classes
191
   *
192
   * This inference schema adds lemmas of the form:
193
   *   E => len( x ) = rewrite( len( r1 ++ ... ++ rn ) )
194
   * where [r1, ..., rn] is the normal form of the equivalence class containing
195
   * x. This schema is not required for correctness but experimentally has
196
   * shown to be helpful.
197
   */
198
  void checkLengthsEqc();
199
  //-----------------------end inference steps
200
201
  //--------------------------- query functions
202
  /**
203
   * Get normal form for string term n. For details on this data structure,
204
   * see theory/strings/normal_form.h.
205
   *
206
   * This query is valid after a successful call to checkNormalFormsEq, e.g.
207
   * a call where the inference manager was not given any lemmas or inferences.
208
   */
209
  NormalForm& getNormalForm(Node n);
210
  /** get normal string
211
   *
212
   * This method returns the node that is equivalent to the normal form of x,
213
   * and adds the corresponding explanation to nf_exp.
214
   *
215
   * For example, if x = y ++ z is an assertion in the current context, then
216
   * this method returns the term y ++ z and adds x = y ++ z to nf_exp.
217
   */
218
  Node getNormalString(Node x, std::vector<Node>& nf_exp);
219
  //-------------------------- end query functions
220
221
  /**
222
   * This returns the conclusion of the proof rule corresponding to splitting
223
   * on the arrangement of terms x and y appearing in an equation of the form
224
   *   x ++ x' = y ++ y' or x' ++ x = y' ++ y
225
   * where we are in the second case if isRev is true. This method is called
226
   * both by the core solver and by the strings proof checker.
227
   *
228
   * @param x The first term
229
   * @param y The second term
230
   * @param rule The proof rule whose conclusion we are asking for
231
   * @param isRev Whether the equation is in a reverse direction
232
   * @param skc The skolem cache (to allocate fresh variables if necessary)
233
   * @param newSkolems The vector to add new variables to
234
   * @return The conclusion of the inference.
235
   */
236
  static Node getConclusion(Node x,
237
                            Node y,
238
                            PfRule rule,
239
                            bool isRev,
240
                            SkolemCache* skc,
241
                            std::vector<Node>& newSkolems);
242
  /**
243
   * Get sufficient non-empty overlap of string constants c and d.
244
   *
245
   * This is called when handling equations of the form:
246
   *   x ++ d ++ ... = c ++ ...
247
   * when x is non-empty and non-constant.
248
   *
249
   * This returns the maximal index in c which x must have as a prefix, which
250
   * notice is an integer >= 1 since x is non-empty.
251
   *
252
   * @param c The first constant
253
   * @param d The second constant
254
   * @param isRev Whether the equation is in the reverse direction
255
   * @return The position in c.
256
   */
257
  static size_t getSufficientNonEmptyOverlap(Node c, Node d, bool isRev);
258
  /**
259
   * This returns the conclusion of the decompose proof rule. This returns
260
   * a conjunction of splitting string x into pieces based on length l, e.g.:
261
   *   x = k_1 ++ k_2
262
   * where k_1 (resp. k_2) is a skolem corresponding to a substring of x of
263
   * length l if isRev is false (resp. true). The function also adds a
264
   * length constraint len(k_1) = l (resp. len(k_2) = l). Note that adding this
265
   * constraint to the conclusion is *not* optional, since the skolems k_1 and
266
   * k_2 may be shared, hence their length constraint must be guarded by the
267
   * premises of this inference.
268
   *
269
   * @param x The string term
270
   * @param l The length term
271
   * @param isRev Whether the equation is in a reverse direction
272
   * @param skc The skolem cache (to allocate fresh variables if necessary)
273
   * @param newSkolems The vector to add new variables to
274
   * @return The conclusion of the inference.
275
   */
276
  static Node getDecomposeConclusion(Node x,
277
                                     Node l,
278
                                     bool isRev,
279
                                     SkolemCache* skc,
280
                                     std::vector<Node>& newSkolems);
281
282
 private:
283
  /**
284
   * This processes the infer info ii as an inference. In more detail, it calls
285
   * the inference manager to process the inference, and updates the set of
286
   * normal form pairs. Returns true if the conclusion of ii was not true
287
   * after rewriting. If the conclusion is true, this method does nothing.
288
   */
289
  bool processInferInfo(CoreInferInfo& ii);
290
  /** Add that (n1,n2) is a normal form pair in the current context. */
291
  void addNormalFormPair(Node n1, Node n2);
292
  /** Is (n1,n2) a normal form pair in the current context? */
293
  bool isNormalFormPair(Node n1, Node n2);
294
295
  //--------------------------for checkFlatForm
296
  /**
297
   * This checks whether there are flat form inferences between eqc[start] and
298
   * eqc[j] for some j>start. If the flag isRev is true, we check for flat form
299
   * interferences in the reverse direction of the flat forms (note:
300
   * `d_flat_form` and `d_flat_form_index` must be in reverse order if `isRev`
301
   * is true). For more details, see checkFlatForms below.
302
   */
303
  void checkFlatForm(std::vector<Node>& eqc, size_t start, bool isRev);
304
  /** debug print current flat forms on trace tc */
305
  void debugPrintFlatForms(const char* tc);
306
  //--------------------------end for checkFlatForm
307
308
  //--------------------------for checkCycles
309
  Node checkCycles(Node eqc, std::vector<Node>& curr, std::vector<Node>& exp);
310
  //--------------------------end for checkCycles
311
312
  //--------------------------for checkNormalFormsEq
313
  /** normalize equivalence class
314
   *
315
   * This method attempts to build a "normal form" for the equivalence class
316
   * of string term n (for more details on normal forms, see normal_form.h
317
   * or see Liang et al CAV 2014). In particular, this method checks whether the
318
   * current normal form for each term in this equivalence class is identical.
319
   * If it is not, then we add an inference via sendInference and abort the
320
   * call.
321
   *
322
   * stype is the string-like type of the equivalence class we are processing.
323
   */
324
  void normalizeEquivalenceClass(Node n, TypeNode stype);
325
  /**
326
   * For each term in the equivalence class of eqc, this adds data regarding its
327
   * normal form to normal_forms. The map term_to_nf_index maps terms to the
328
   * index in normal_forms where their normal form data is located.
329
   *
330
   * stype is the string-like type of the equivalence class we are processing.
331
   */
332
  void getNormalForms(Node eqc,
333
                      std::vector<NormalForm>& normal_forms,
334
                      std::map<Node, unsigned>& term_to_nf_index,
335
                      TypeNode stype);
336
  /** process normalize equivalence class
337
   *
338
   * This is called when an equivalence class eqc contains a set of terms that
339
   * have normal forms given by the argument normal_forms. It either
340
   * verifies that all normal forms in this vector are identical, or otherwise
341
   * adds a conflict, lemma, or inference via the sendInference method.
342
   *
343
   * To prioritize one inference versus another, it builds a set of possible
344
   * inferences, at most two for each pair of distinct normal forms,
345
   * corresponding to processing the normal form pair in the (forward, reverse)
346
   * directions. Once all possible inferences are recorded, it executes the
347
   * one with highest priority based on the enumeration type Inference.
348
   *
349
   * stype is the string-like type of the equivalence class we are processing.
350
   */
351
  void processNEqc(Node eqc,
352
                   std::vector<NormalForm>& normal_forms,
353
                   TypeNode stype);
354
  /** process simple normal equality
355
   *
356
   * This method is called when two equal terms have normal forms nfi and nfj.
357
   * It adds (typically at most one) possible inference to the vector pinfer.
358
   * This inference is in the form of an CoreInferInfo object, which stores the
359
   * necessary information regarding how to process the inference.
360
   *
361
   * index: The index in the normal form vectors (nfi.d_nf and nfj.d_nf) that
362
   *   we are currently checking. This method will increment this index until
363
   *   it finds an index where these vectors differ, or until it reaches the
364
   *   end of these vectors.
365
   * isRev: Whether we are processing the normal forms in reverse direction.
366
   *   Notice in this case the normal form vectors have been reversed, hence,
367
   *   many operations are identical to the forward case, e.g. index is
368
   *   incremented not decremented, while others require special care, e.g.
369
   *   constant strings "ABC" in the normal form vectors are not reversed to
370
   *   "CBA" and hence all operations should assume a flipped semantics for
371
   *   constants when isRev is true,
372
   * rproc: the number of string components on the suffix of the normal form of
373
   *   nfi and nfj that were already processed. This is used when using
374
   *   fowards/backwards traversals of normal forms to ensure that duplicate
375
   *   inferences are not processed.
376
   * pinfer: the set of possible inferences we add to.
377
   *
378
   * stype is the string-like type of the equivalence class we are processing.
379
   */
380
  void processSimpleNEq(NormalForm& nfi,
381
                        NormalForm& nfj,
382
                        unsigned& index,
383
                        bool isRev,
384
                        unsigned rproc,
385
                        std::vector<CoreInferInfo>& pinfer,
386
                        TypeNode stype);
387
  //--------------------------end for checkNormalFormsEq
388
389
  //--------------------------for checkNormalFormsEq with loops
390
  bool detectLoop(NormalForm& nfi,
391
                  NormalForm& nfj,
392
                  int index,
393
                  int& loop_in_i,
394
                  int& loop_in_j,
395
                  unsigned rproc);
396
397
  /**
398
   * Result of processLoop() below.
399
   */
400
  enum class ProcessLoopResult
401
  {
402
    /** Loop processing made an inference */
403
    INFERENCE,
404
    /** Loop processing detected a conflict */
405
    CONFLICT,
406
    /** Loop not processed or no loop detected */
407
    SKIPPED,
408
  };
409
410
  ProcessLoopResult processLoop(NormalForm& nfi,
411
                                NormalForm& nfj,
412
                                int loop_index,
413
                                int index,
414
                                CoreInferInfo& info);
415
  //--------------------------end for checkNormalFormsEq with loops
416
417
  //--------------------------for checkNormalFormsDeq
418
419
  /**
420
   * Given a pair of disequal strings with the same length, checks whether the
421
   * disequality holds. This may result in inferences or conflicts.
422
   *
423
   * @param n1 The first string in the disequality
424
   * @param n2 The second string in the disequality
425
   */
426
  void processDeq(Node n1, Node n2);
427
428
  /**
429
   * Given a pair of disequal strings with the same length and their normal
430
   * forms, checks whether the disequality holds. This may result in
431
   * inferences.
432
   *
433
   * @param nfi The normal form for the first string in the disequality
434
   * @param nfj The normal form for the second string in the disequality
435
   * @param ni The first string in the disequality
436
   * @param nj The second string in the disequality
437
   * @return true if the disequality is satisfied, false otherwise
438
   */
439
  bool processReverseDeq(std::vector<Node>& nfi,
440
                         std::vector<Node>& nfj,
441
                         Node ni,
442
                         Node nj);
443
444
  /**
445
   * Given a pair of disequal strings with the same length and their normal
446
   * forms, performs some simple checks whether the disequality holds. The
447
   * check is done starting from a given index and can either be performed on
448
   * reversed normal forms or the original normal forms. If the function cannot
449
   * show that a disequality holds, it updates the index to point to the first
450
   * element in the normal forms for which the relationship is unclear.
451
   *
452
   * @param nfi The normal form for the first string in the disequality
453
   * @param nfj The normal form for the second string in the disequality
454
   * @param ni The first string in the disequality
455
   * @param nj The second string in the disequality
456
   * @param index The index to start at. If this function returns false, the
457
   *              index points to the first index in the normal forms for which
458
   *              it is not known whether they are equal or disequal
459
   * @param isRev This should be true if the normal forms are reversed, false
460
   *              otherwise
461
   * @return true if the disequality is satisfied, false otherwise
462
   */
463
  bool processSimpleDeq(std::vector<Node>& nfi,
464
                        std::vector<Node>& nfj,
465
                        Node ni,
466
                        Node nj,
467
                        size_t& index,
468
                        bool isRev);
469
  //--------------------------end for checkNormalFormsDeq
470
471
  /** The solver state object */
472
  SolverState& d_state;
473
  /** The (custom) output channel of the theory of strings */
474
  InferenceManager& d_im;
475
  /** Reference to the term registry of theory of strings */
476
  TermRegistry& d_termReg;
477
  /** reference to the base solver, used for certain queries */
478
  BaseSolver& d_bsolver;
479
  /** Commonly used constants */
480
  Node d_true;
481
  Node d_false;
482
  Node d_zero;
483
  Node d_one;
484
  Node d_neg_one;
485
  /** empty vector (used for trivial explanations) */
486
  std::vector<Node> d_emptyVec;
487
  /**
488
   * The list of equivalence classes of type string. These are ordered based
489
   * on the ordering described in checkCycles.
490
   */
491
  std::vector<Node> d_strings_eqc;
492
  /** map from terms to their normal forms */
493
  std::map<Node, NormalForm> d_normal_form;
494
  /**
495
   * In certain cases, we know that two terms are equivalent despite
496
   * not having to verify their normal forms are identical. For example,
497
   * after applying the R-Loop rule to two terms a and b, we know they
498
   * are entailed to be equal in the current context without having
499
   * to look at their normal forms. We call (a,b) a normal form pair.
500
   *
501
   * We map representative terms a to a list of nodes b1,...,bn such that
502
   * (a,b1) ... (a, bn) are normal form pairs. This list is SAT-context
503
   * dependent. We use a context-dependent integer along with a context
504
   * indepedent map from nodes to lists of nodes to model this, given by
505
   * the two data members below.
506
   */
507
  NodeIntMap d_nfPairs;
508
  std::map<Node, std::vector<Node> > d_nf_pairs_data;
509
  /** list of non-congruent concat terms in each equivalence class */
510
  std::map<Node, std::vector<Node> > d_eqc;
511
  /**
512
   * The flat form for each equivalence class. For a term (str.++ t1 ... tn),
513
   * this is a list of the form (r_{i1} ... r_{im}) where each empty t1...tn
514
   * is dropped and the others are replaced in order with their representative.
515
   */
516
  std::map<Node, std::vector<Node> > d_flat_form;
517
  /**
518
   * For each component r_{i1} ... r_{im} in a flat form, this stores
519
   * the argument number of the t1 ... tn they were generated from.
520
   */
521
  std::map<Node, std::vector<int> > d_flat_form_index;
522
}; /* class CoreSolver */
523
524
}  // namespace strings
525
}  // namespace theory
526
}  // namespace CVC4
527
528
#endif /* CVC4__THEORY__STRINGS__CORE_SOLVER_H */