GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/core_solver.h Lines: 2 2 100.0 %
Date: 2021-05-22 Branches: 9 22 40.9 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Core solver for the theory of strings, responsible for reasoning
14
 * string concatenation plus length constraints.
15
 */
16
17
#include "cvc5_private.h"
18
19
#ifndef CVC5__THEORY__STRINGS__CORE_SOLVER_H
20
#define CVC5__THEORY__STRINGS__CORE_SOLVER_H
21
22
#include "context/cdhashset.h"
23
#include "context/cdlist.h"
24
#include "theory/strings/base_solver.h"
25
#include "theory/strings/infer_info.h"
26
#include "theory/strings/inference_manager.h"
27
#include "theory/strings/normal_form.h"
28
#include "theory/strings/solver_state.h"
29
#include "theory/strings/term_registry.h"
30
31
namespace cvc5 {
32
namespace theory {
33
namespace strings {
34
35
/**
36
 * This data structure encapsulates an inference for the core solver of the
37
 * theory of strings. This includes the form of the inference to be processed
38
 * by the inference manager, the side effects it generates for the core solver,
39
 * and information used for heuristics and debugging.
40
 */
41
23710
class CoreInferInfo
42
{
43
 public:
44
  CoreInferInfo(InferenceId id);
45
36257
  ~CoreInferInfo() {}
46
  /** The infer info of this class */
47
  InferInfo d_infer;
48
  /**
49
   * The pending phase requirements, see InferenceManager::sendPhaseRequirement.
50
   */
51
  std::map<Node, bool> d_pendingPhase;
52
  /**
53
   * The index in the normal forms under which this inference is addressing.
54
   * For example, if the inference is inferring x = y from |x|=|y| and
55
   *   w ++ x ++ ... = w ++ y ++ ...
56
   * then d_index is 1, since x and y are at index 1 in these concat terms.
57
   */
58
  unsigned d_index;
59
  /**
60
   * The normal form pair that is cached as a result of this inference.
61
   */
62
  Node d_nfPair[2];
63
  /** for debugging
64
   *
65
   * The base pair of strings d_i/d_j that led to the inference, and whether
66
   * (d_rev) we were processing the normal forms of these strings in reverse
67
   * direction.
68
   */
69
  Node d_i;
70
  Node d_j;
71
  bool d_rev;
72
};
73
74
/** The core solver for the theory of strings
75
 *
76
 * This implements techniques for handling (dis)equalities involving
77
 * string concatenation terms based on the procedure by Liang et al CAV 2014.
78
 */
79
class CoreSolver
80
{
81
  friend class InferenceManager;
82
  using NodeIntMap = context::CDHashMap<Node, int>;
83
84
 public:
85
  CoreSolver(SolverState& s,
86
             InferenceManager& im,
87
             TermRegistry& tr,
88
             BaseSolver& bs);
89
  ~CoreSolver();
90
91
  //-----------------------inference steps
92
  /** check cycles
93
   *
94
   * This inference schema ensures that a containment ordering < over the
95
   * string equivalence classes is acyclic. We define this ordering < such that
96
   * for equivalence classes e1 = { t1...tn } and e2 = { s1...sm }, e1 < e2
97
   * if there exists a ti whose flat form (see below) is [w1...sj...wk] for
98
   * some i,j. If e1 < ... < en < e1 for some chain, we infer that the flat
99
   * form components that do not constitute this chain, e.g. (w1...wk) \ sj
100
   * in the flat form above, must be empty.
101
   *
102
   * For more details, see the inference S-Cycle in Liang et al CAV 2014.
103
   */
104
  void checkCycles();
105
  /** check flat forms
106
   *
107
   * This applies an inference schema based on "flat forms". The flat form of a
108
   * string term t is a vector of representative terms [r1, ..., rn] such that
109
   * t is of the form t1 ++ ... ++ tm and r1 ++ ... ++ rn is equivalent to
110
   * rewrite( [t1] ++ ... ++ [tm] ), where [t1] denotes the representative of
111
   * the equivalence class containing t1. For example, if t is y ++ z ++ z,
112
   * E is { y = "", w = z }, and w is the representative of the equivalence
113
   * class { w, z }, then the flat form of t is [w, w]. Say t1 and t2 are terms
114
   * in the same equivalence classes with flat forms [r1...rn] and [s1...sm].
115
   * We may infer various facts based on this pair of terms. For example:
116
   *   ri = si, if ri != si, rj == sj for each j < i, and len(ri)=len(si),
117
   *   rn = sn, if n=m and rj == sj for each j < n,
118
   *   ri = empty, if n=m+1 and ri == rj for each i=1,...,m.
119
   * We refer to these as "unify", "endpoint-eq" and "endpoint-emp" inferences
120
   * respectively.
121
   *
122
   * Notice that this inference scheme is an optimization and not needed for
123
   * model-soundness. The motivation for this schema is that it is simpler than
124
   * checkNormalFormsEq, which can be seen as a recursive version of this
125
   * schema (see difference of "normal form" vs "flat form" below), and
126
   * checkNormalFormsEq is complete, in the sense that if it passes with no
127
   * inferences, we are ensured that all string equalities in the current
128
   * context are satisfied.
129
   *
130
   * Must call checkCycles before this function in a strategy.
131
   */
132
  void checkFlatForms();
133
  /** check normal forms equalities
134
   *
135
   * This applies an inference schema based on "normal forms". The normal form
136
   * of an equivalence class of string terms e = {t1, ..., tn} union {x1....xn},
137
   * where t1...tn are concatenation terms is a vector of representative terms
138
   * [r1, ..., rm] such that:
139
   * (1) if n=0, then m=1 and r1 is the representative of e,
140
   * (2) if n>0, say
141
   *   t1 = t^1_1 ++ ... ++ t^1_m_1
142
   *   ...
143
   *   tn = t^1_n ++ ... ++ t^_m_n
144
   * for *each* i=1, ..., n, the result of concenating the normal forms of
145
   * t^1_1 ++ ... ++ t^1_m_1 is equal to [r1, ..., rm]. If an equivalence class
146
   * can be assigned a normal form, then all equalities between ti and tj are
147
   * satisfied by all models that correspond to extensions of the current
148
   * assignment. For further detail on this terminology, see Liang et al
149
   * CAV 2014.
150
   *
151
   * Notice that all constant words are implicitly considered concatentation
152
   * of their characters, e.g. "abc" is treated as "a" ++ "b" ++ "c".
153
   *
154
   * At a high level, we build normal forms for equivalence classes bottom-up,
155
   * starting with equivalence classes that are minimal with respect to the
156
   * containment ordering < computed during checkCycles. While computing a
157
   * normal form for an equivalence class, we may infer equalities between
158
   * components of strings that must be equal (e.g. x=y when x++z == y++w when
159
   * len(x)==len(y) is asserted), derive conflicts if two strings have disequal
160
   * prefixes/suffixes (e.g. "a" ++ x == "b" ++ y is a conflict), or split
161
   * string terms into smaller components using fresh skolem variables (see
162
   * Inference values with names "SPLIT"). We also may introduce regular
163
   * expression constraints in this method for looping word equations (see
164
   * the Inference INFER_FLOOP).
165
   *
166
   * If this inference schema returns no facts, lemmas, or conflicts, then
167
   * we have successfully assigned normal forms for all equivalence classes, as
168
   * stored in d_normal_forms. Otherwise, this method may add a fact, lemma, or
169
   * conflict based on inferences in the Inference enumeration above.
170
   */
171
  void checkNormalFormsEq();
172
  /** check normal forms disequalities
173
   *
174
   * This inference schema can be seen as the converse of the above schema. In
175
   * particular, it ensures that each pair of distinct equivalence classes
176
   * e1 and e2 have distinct normal forms.
177
   *
178
   * This method considers all pairs of distinct equivalence classes (e1,e2)
179
   * such that len(x1)==len(x2) is asserted for some x1 in e1 and x2 in e2. It
180
   * then traverses the normal forms of x1 and x2, say they are [r1, ..., rn]
181
   * and [s1, ..., sm]. For the minimial i such that ri!=si, if ri and si are
182
   * disequal and have the same length, then x1 and x2 have distinct normal
183
   * forms. Otherwise, we may add splitting lemmas on the length of ri and si,
184
   * or split on an equality between ri and si.
185
   *
186
   * If this inference schema returns no facts, lemmas, or conflicts, then all
187
   * disequalities between string terms are satisfied by all models that are
188
   * extensions of the current assignment.
189
   */
190
  void checkNormalFormsDeq();
191
  /** check lengths for equivalence classes
192
   *
193
   * This inference schema adds lemmas of the form:
194
   *   E => len( x ) = rewrite( len( r1 ++ ... ++ rn ) )
195
   * where [r1, ..., rn] is the normal form of the equivalence class containing
196
   * x. This schema is not required for correctness but experimentally has
197
   * shown to be helpful.
198
   */
199
  void checkLengthsEqc();
200
  //-----------------------end inference steps
201
202
  //--------------------------- query functions
203
  /**
204
   * Get normal form for string term n. For details on this data structure,
205
   * see theory/strings/normal_form.h.
206
   *
207
   * This query is valid after a successful call to checkNormalFormsEq, e.g.
208
   * a call where the inference manager was not given any lemmas or inferences.
209
   */
210
  NormalForm& getNormalForm(Node n);
211
  /** get normal string
212
   *
213
   * This method returns the node that is equivalent to the normal form of x,
214
   * and adds the corresponding explanation to nf_exp.
215
   *
216
   * For example, if x = y ++ z is an assertion in the current context, then
217
   * this method returns the term y ++ z and adds x = y ++ z to nf_exp.
218
   */
219
  Node getNormalString(Node x, std::vector<Node>& nf_exp);
220
  //-------------------------- end query functions
221
222
  /**
223
   * This returns the conclusion of the proof rule corresponding to splitting
224
   * on the arrangement of terms x and y appearing in an equation of the form
225
   *   x ++ x' = y ++ y' or x' ++ x = y' ++ y
226
   * where we are in the second case if isRev is true. This method is called
227
   * both by the core solver and by the strings proof checker.
228
   *
229
   * @param x The first term
230
   * @param y The second term
231
   * @param rule The proof rule whose conclusion we are asking for
232
   * @param isRev Whether the equation is in a reverse direction
233
   * @param skc The skolem cache (to allocate fresh variables if necessary)
234
   * @param newSkolems The vector to add new variables to
235
   * @return The conclusion of the inference.
236
   */
237
  static Node getConclusion(Node x,
238
                            Node y,
239
                            PfRule rule,
240
                            bool isRev,
241
                            SkolemCache* skc,
242
                            std::vector<Node>& newSkolems);
243
  /**
244
   * Get sufficient non-empty overlap of string constants c and d.
245
   *
246
   * This is called when handling equations of the form:
247
   *   x ++ d ++ ... = c ++ ...
248
   * when x is non-empty and non-constant.
249
   *
250
   * This returns the maximal index in c which x must have as a prefix, which
251
   * notice is an integer >= 1 since x is non-empty.
252
   *
253
   * @param c The first constant
254
   * @param d The second constant
255
   * @param isRev Whether the equation is in the reverse direction
256
   * @return The position in c.
257
   */
258
  static size_t getSufficientNonEmptyOverlap(Node c, Node d, bool isRev);
259
  /**
260
   * This returns the conclusion of the decompose proof rule. This returns
261
   * a conjunction of splitting string x into pieces based on length l, e.g.:
262
   *   x = k_1 ++ k_2
263
   * where k_1 (resp. k_2) is a skolem corresponding to a substring of x of
264
   * length l if isRev is false (resp. true). The function also adds a
265
   * length constraint len(k_1) = l (resp. len(k_2) = l). Note that adding this
266
   * constraint to the conclusion is *not* optional, since the skolems k_1 and
267
   * k_2 may be shared, hence their length constraint must be guarded by the
268
   * premises of this inference.
269
   *
270
   * @param x The string term
271
   * @param l The length term
272
   * @param isRev Whether the equation is in a reverse direction
273
   * @param skc The skolem cache (to allocate fresh variables if necessary)
274
   * @param newSkolems The vector to add new variables to
275
   * @return The conclusion of the inference.
276
   */
277
  static Node getDecomposeConclusion(Node x,
278
                                     Node l,
279
                                     bool isRev,
280
                                     SkolemCache* skc,
281
                                     std::vector<Node>& newSkolems);
282
283
 private:
284
  /**
285
   * This processes the infer info ii as an inference. In more detail, it calls
286
   * the inference manager to process the inference, and updates the set of
287
   * normal form pairs. Returns true if the conclusion of ii was not true
288
   * after rewriting. If the conclusion is true, this method does nothing.
289
   */
290
  bool processInferInfo(CoreInferInfo& ii);
291
  /** Add that (n1,n2) is a normal form pair in the current context. */
292
  void addNormalFormPair(Node n1, Node n2);
293
  /** Is (n1,n2) a normal form pair in the current context? */
294
  bool isNormalFormPair(Node n1, Node n2);
295
296
  //--------------------------for checkFlatForm
297
  /**
298
   * This checks whether there are flat form inferences between eqc[start] and
299
   * eqc[j] for some j>start. If the flag isRev is true, we check for flat form
300
   * interferences in the reverse direction of the flat forms (note:
301
   * `d_flat_form` and `d_flat_form_index` must be in reverse order if `isRev`
302
   * is true). For more details, see checkFlatForms below.
303
   */
304
  void checkFlatForm(std::vector<Node>& eqc, size_t start, bool isRev);
305
  /** debug print current flat forms on trace tc */
306
  void debugPrintFlatForms(const char* tc);
307
  //--------------------------end for checkFlatForm
308
309
  //--------------------------for checkCycles
310
  Node checkCycles(Node eqc, std::vector<Node>& curr, std::vector<Node>& exp);
311
  //--------------------------end for checkCycles
312
313
  //--------------------------for checkNormalFormsEq
314
  /** normalize equivalence class
315
   *
316
   * This method attempts to build a "normal form" for the equivalence class
317
   * of string term n (for more details on normal forms, see normal_form.h
318
   * or see Liang et al CAV 2014). In particular, this method checks whether the
319
   * current normal form for each term in this equivalence class is identical.
320
   * If it is not, then we add an inference via sendInference and abort the
321
   * call.
322
   *
323
   * stype is the string-like type of the equivalence class we are processing.
324
   */
325
  void normalizeEquivalenceClass(Node n, TypeNode stype);
326
  /**
327
   * For each term in the equivalence class of eqc, this adds data regarding its
328
   * normal form to normal_forms. The map term_to_nf_index maps terms to the
329
   * index in normal_forms where their normal form data is located.
330
   *
331
   * stype is the string-like type of the equivalence class we are processing.
332
   */
333
  void getNormalForms(Node eqc,
334
                      std::vector<NormalForm>& normal_forms,
335
                      std::map<Node, unsigned>& term_to_nf_index,
336
                      TypeNode stype);
337
  /** process normalize equivalence class
338
   *
339
   * This is called when an equivalence class eqc contains a set of terms that
340
   * have normal forms given by the argument normal_forms. It either
341
   * verifies that all normal forms in this vector are identical, or otherwise
342
   * adds a conflict, lemma, or inference via the sendInference method.
343
   *
344
   * To prioritize one inference versus another, it builds a set of possible
345
   * inferences, at most two for each pair of distinct normal forms,
346
   * corresponding to processing the normal form pair in the (forward, reverse)
347
   * directions. Once all possible inferences are recorded, it executes the
348
   * one with highest priority based on the enumeration type Inference.
349
   *
350
   * stype is the string-like type of the equivalence class we are processing.
351
   */
352
  void processNEqc(Node eqc,
353
                   std::vector<NormalForm>& normal_forms,
354
                   TypeNode stype);
355
  /** process simple normal equality
356
   *
357
   * This method is called when two equal terms have normal forms nfi and nfj.
358
   * It adds (typically at most one) possible inference to the vector pinfer.
359
   * This inference is in the form of an CoreInferInfo object, which stores the
360
   * necessary information regarding how to process the inference.
361
   *
362
   * index: The index in the normal form vectors (nfi.d_nf and nfj.d_nf) that
363
   *   we are currently checking. This method will increment this index until
364
   *   it finds an index where these vectors differ, or until it reaches the
365
   *   end of these vectors.
366
   * isRev: Whether we are processing the normal forms in reverse direction.
367
   *   Notice in this case the normal form vectors have been reversed, hence,
368
   *   many operations are identical to the forward case, e.g. index is
369
   *   incremented not decremented, while others require special care, e.g.
370
   *   constant strings "ABC" in the normal form vectors are not reversed to
371
   *   "CBA" and hence all operations should assume a flipped semantics for
372
   *   constants when isRev is true,
373
   * rproc: the number of string components on the suffix of the normal form of
374
   *   nfi and nfj that were already processed. This is used when using
375
   *   fowards/backwards traversals of normal forms to ensure that duplicate
376
   *   inferences are not processed.
377
   * pinfer: the set of possible inferences we add to.
378
   *
379
   * stype is the string-like type of the equivalence class we are processing.
380
   */
381
  void processSimpleNEq(NormalForm& nfi,
382
                        NormalForm& nfj,
383
                        unsigned& index,
384
                        bool isRev,
385
                        unsigned rproc,
386
                        std::vector<CoreInferInfo>& pinfer,
387
                        TypeNode stype);
388
  //--------------------------end for checkNormalFormsEq
389
390
  //--------------------------for checkNormalFormsEq with loops
391
  bool detectLoop(NormalForm& nfi,
392
                  NormalForm& nfj,
393
                  int index,
394
                  int& loop_in_i,
395
                  int& loop_in_j,
396
                  unsigned rproc);
397
398
  /**
399
   * Result of processLoop() below.
400
   */
401
  enum class ProcessLoopResult
402
  {
403
    /** Loop processing made an inference */
404
    INFERENCE,
405
    /** Loop processing detected a conflict */
406
    CONFLICT,
407
    /** Loop not processed or no loop detected */
408
    SKIPPED,
409
  };
410
411
  ProcessLoopResult processLoop(NormalForm& nfi,
412
                                NormalForm& nfj,
413
                                int loop_index,
414
                                int index,
415
                                CoreInferInfo& info);
416
  //--------------------------end for checkNormalFormsEq with loops
417
418
  //--------------------------for checkNormalFormsDeq
419
420
  /**
421
   * Given a pair of disequal strings with the same length, checks whether the
422
   * disequality holds. This may result in inferences or conflicts.
423
   *
424
   * @param n1 The first string in the disequality
425
   * @param n2 The second string in the disequality
426
   */
427
  void processDeq(Node n1, Node n2);
428
429
  /**
430
   * Given a pair of disequal strings with the same length and their normal
431
   * forms, checks whether the disequality holds. This may result in
432
   * inferences.
433
   *
434
   * @param nfi The normal form for the first string in the disequality
435
   * @param nfj The normal form for the second string in the disequality
436
   * @param ni The first string in the disequality
437
   * @param nj The second string in the disequality
438
   * @return true if the disequality is satisfied, false otherwise
439
   */
440
  bool processReverseDeq(std::vector<Node>& nfi,
441
                         std::vector<Node>& nfj,
442
                         Node ni,
443
                         Node nj);
444
445
  /**
446
   * Given a pair of disequal strings with the same length and their normal
447
   * forms, performs some simple checks whether the disequality holds. The
448
   * check is done starting from a given index and can either be performed on
449
   * reversed normal forms or the original normal forms. If the function cannot
450
   * show that a disequality holds, it updates the index to point to the first
451
   * element in the normal forms for which the relationship is unclear.
452
   *
453
   * @param nfi The normal form for the first string in the disequality
454
   * @param nfj The normal form for the second string in the disequality
455
   * @param ni The first string in the disequality
456
   * @param nj The second string in the disequality
457
   * @param index The index to start at. If this function returns false, the
458
   *              index points to the first index in the normal forms for which
459
   *              it is not known whether they are equal or disequal
460
   * @param isRev This should be true if the normal forms are reversed, false
461
   *              otherwise
462
   * @return true if the disequality is satisfied, false otherwise
463
   */
464
  bool processSimpleDeq(std::vector<Node>& nfi,
465
                        std::vector<Node>& nfj,
466
                        Node ni,
467
                        Node nj,
468
                        size_t& index,
469
                        bool isRev);
470
  //--------------------------end for checkNormalFormsDeq
471
472
  /** The solver state object */
473
  SolverState& d_state;
474
  /** The (custom) output channel of the theory of strings */
475
  InferenceManager& d_im;
476
  /** Reference to the term registry of theory of strings */
477
  TermRegistry& d_termReg;
478
  /** reference to the base solver, used for certain queries */
479
  BaseSolver& d_bsolver;
480
  /** Commonly used constants */
481
  Node d_true;
482
  Node d_false;
483
  Node d_zero;
484
  Node d_one;
485
  Node d_neg_one;
486
  /** empty vector (used for trivial explanations) */
487
  std::vector<Node> d_emptyVec;
488
  /**
489
   * The list of equivalence classes of type string. These are ordered based
490
   * on the ordering described in checkCycles.
491
   */
492
  std::vector<Node> d_strings_eqc;
493
  /** map from terms to their normal forms */
494
  std::map<Node, NormalForm> d_normal_form;
495
  /**
496
   * In certain cases, we know that two terms are equivalent despite
497
   * not having to verify their normal forms are identical. For example,
498
   * after applying the R-Loop rule to two terms a and b, we know they
499
   * are entailed to be equal in the current context without having
500
   * to look at their normal forms. We call (a,b) a normal form pair.
501
   *
502
   * We map representative terms a to a list of nodes b1,...,bn such that
503
   * (a,b1) ... (a, bn) are normal form pairs. This list is SAT-context
504
   * dependent. We use a context-dependent integer along with a context
505
   * indepedent map from nodes to lists of nodes to model this, given by
506
   * the two data members below.
507
   */
508
  NodeIntMap d_nfPairs;
509
  std::map<Node, std::vector<Node> > d_nf_pairs_data;
510
  /** list of non-congruent concat terms in each equivalence class */
511
  std::map<Node, std::vector<Node> > d_eqc;
512
  /**
513
   * The flat form for each equivalence class. For a term (str.++ t1 ... tn),
514
   * this is a list of the form (r_{i1} ... r_{im}) where each empty t1...tn
515
   * is dropped and the others are replaced in order with their representative.
516
   */
517
  std::map<Node, std::vector<Node> > d_flat_form;
518
  /**
519
   * For each component r_{i1} ... r_{im} in a flat form, this stores
520
   * the argument number of the t1 ... tn they were generated from.
521
   */
522
  std::map<Node, std::vector<int> > d_flat_form_index;
523
}; /* class CoreSolver */
524
525
}  // namespace strings
526
}  // namespace theory
527
}  // namespace cvc5
528
529
#endif /* CVC5__THEORY__STRINGS__CORE_SOLVER_H */