GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/core_solver.h Lines: 2 2 100.0 %
Date: 2021-09-16 Branches: 9 22 40.9 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Core solver for the theory of strings, responsible for reasoning
14
 * string concatenation plus length constraints.
15
 */
16
17
#include "cvc5_private.h"
18
19
#ifndef CVC5__THEORY__STRINGS__CORE_SOLVER_H
20
#define CVC5__THEORY__STRINGS__CORE_SOLVER_H
21
22
#include "context/cdhashset.h"
23
#include "context/cdlist.h"
24
#include "smt/env_obj.h"
25
#include "theory/strings/base_solver.h"
26
#include "theory/strings/infer_info.h"
27
#include "theory/strings/inference_manager.h"
28
#include "theory/strings/normal_form.h"
29
#include "theory/strings/solver_state.h"
30
#include "theory/strings/term_registry.h"
31
32
namespace cvc5 {
33
namespace theory {
34
namespace strings {
35
36
/**
37
 * This data structure encapsulates an inference for the core solver of the
38
 * theory of strings. This includes the form of the inference to be processed
39
 * by the inference manager, the side effects it generates for the core solver,
40
 * and information used for heuristics and debugging.
41
 */
42
34284
class CoreInferInfo
43
{
44
 public:
45
  CoreInferInfo(InferenceId id);
46
52512
  ~CoreInferInfo() {}
47
  /** The infer info of this class */
48
  InferInfo d_infer;
49
  /**
50
   * The pending phase requirements, see InferenceManager::sendPhaseRequirement.
51
   */
52
  std::map<Node, bool> d_pendingPhase;
53
  /**
54
   * The index in the normal forms under which this inference is addressing.
55
   * For example, if the inference is inferring x = y from |x|=|y| and
56
   *   w ++ x ++ ... = w ++ y ++ ...
57
   * then d_index is 1, since x and y are at index 1 in these concat terms.
58
   */
59
  unsigned d_index;
60
  /**
61
   * The normal form pair that is cached as a result of this inference.
62
   */
63
  Node d_nfPair[2];
64
  /** for debugging
65
   *
66
   * The base pair of strings d_i/d_j that led to the inference, and whether
67
   * (d_rev) we were processing the normal forms of these strings in reverse
68
   * direction.
69
   */
70
  Node d_i;
71
  Node d_j;
72
  bool d_rev;
73
};
74
75
/** The core solver for the theory of strings
76
 *
77
 * This implements techniques for handling (dis)equalities involving
78
 * string concatenation terms based on the procedure by Liang et al CAV 2014.
79
 */
80
class CoreSolver : protected EnvObj
81
{
82
  friend class InferenceManager;
83
  using NodeIntMap = context::CDHashMap<Node, int>;
84
85
 public:
86
  CoreSolver(Env& env,
87
             SolverState& s,
88
             InferenceManager& im,
89
             TermRegistry& tr,
90
             BaseSolver& bs);
91
  ~CoreSolver();
92
93
  //-----------------------inference steps
94
  /** check cycles
95
   *
96
   * This inference schema ensures that a containment ordering < over the
97
   * string equivalence classes is acyclic. We define this ordering < such that
98
   * for equivalence classes e1 = { t1...tn } and e2 = { s1...sm }, e1 < e2
99
   * if there exists a ti whose flat form (see below) is [w1...sj...wk] for
100
   * some i,j. If e1 < ... < en < e1 for some chain, we infer that the flat
101
   * form components that do not constitute this chain, e.g. (w1...wk) \ sj
102
   * in the flat form above, must be empty.
103
   *
104
   * For more details, see the inference S-Cycle in Liang et al CAV 2014.
105
   */
106
  void checkCycles();
107
  /** check flat forms
108
   *
109
   * This applies an inference schema based on "flat forms". The flat form of a
110
   * string term t is a vector of representative terms [r1, ..., rn] such that
111
   * t is of the form t1 ++ ... ++ tm and r1 ++ ... ++ rn is equivalent to
112
   * rewrite( [t1] ++ ... ++ [tm] ), where [t1] denotes the representative of
113
   * the equivalence class containing t1. For example, if t is y ++ z ++ z,
114
   * E is { y = "", w = z }, and w is the representative of the equivalence
115
   * class { w, z }, then the flat form of t is [w, w]. Say t1 and t2 are terms
116
   * in the same equivalence classes with flat forms [r1...rn] and [s1...sm].
117
   * We may infer various facts based on this pair of terms. For example:
118
   *   ri = si, if ri != si, rj == sj for each j < i, and len(ri)=len(si),
119
   *   rn = sn, if n=m and rj == sj for each j < n,
120
   *   ri = empty, if n=m+1 and ri == rj for each i=1,...,m.
121
   * We refer to these as "unify", "endpoint-eq" and "endpoint-emp" inferences
122
   * respectively.
123
   *
124
   * Notice that this inference scheme is an optimization and not needed for
125
   * model-soundness. The motivation for this schema is that it is simpler than
126
   * checkNormalFormsEq, which can be seen as a recursive version of this
127
   * schema (see difference of "normal form" vs "flat form" below), and
128
   * checkNormalFormsEq is complete, in the sense that if it passes with no
129
   * inferences, we are ensured that all string equalities in the current
130
   * context are satisfied.
131
   *
132
   * Must call checkCycles before this function in a strategy.
133
   */
134
  void checkFlatForms();
135
  /** check normal forms equalities
136
   *
137
   * This applies an inference schema based on "normal forms". The normal form
138
   * of an equivalence class of string terms e = {t1, ..., tn} union {x1....xn},
139
   * where t1...tn are concatenation terms is a vector of representative terms
140
   * [r1, ..., rm] such that:
141
   * (1) if n=0, then m=1 and r1 is the representative of e,
142
   * (2) if n>0, say
143
   *   t1 = t^1_1 ++ ... ++ t^1_m_1
144
   *   ...
145
   *   tn = t^1_n ++ ... ++ t^_m_n
146
   * for *each* i=1, ..., n, the result of concenating the normal forms of
147
   * t^1_1 ++ ... ++ t^1_m_1 is equal to [r1, ..., rm]. If an equivalence class
148
   * can be assigned a normal form, then all equalities between ti and tj are
149
   * satisfied by all models that correspond to extensions of the current
150
   * assignment. For further detail on this terminology, see Liang et al
151
   * CAV 2014.
152
   *
153
   * Notice that all constant words are implicitly considered concatentation
154
   * of their characters, e.g. "abc" is treated as "a" ++ "b" ++ "c".
155
   *
156
   * At a high level, we build normal forms for equivalence classes bottom-up,
157
   * starting with equivalence classes that are minimal with respect to the
158
   * containment ordering < computed during checkCycles. While computing a
159
   * normal form for an equivalence class, we may infer equalities between
160
   * components of strings that must be equal (e.g. x=y when x++z == y++w when
161
   * len(x)==len(y) is asserted), derive conflicts if two strings have disequal
162
   * prefixes/suffixes (e.g. "a" ++ x == "b" ++ y is a conflict), or split
163
   * string terms into smaller components using fresh skolem variables (see
164
   * Inference values with names "SPLIT"). We also may introduce regular
165
   * expression constraints in this method for looping word equations (see
166
   * the Inference INFER_FLOOP).
167
   *
168
   * If this inference schema returns no facts, lemmas, or conflicts, then
169
   * we have successfully assigned normal forms for all equivalence classes, as
170
   * stored in d_normal_forms. Otherwise, this method may add a fact, lemma, or
171
   * conflict based on inferences in the Inference enumeration above.
172
   */
173
  void checkNormalFormsEq();
174
  /** check normal forms disequalities
175
   *
176
   * This inference schema can be seen as the converse of the above schema. In
177
   * particular, it ensures that each pair of distinct equivalence classes
178
   * e1 and e2 have distinct normal forms.
179
   *
180
   * This method considers all pairs of distinct equivalence classes (e1,e2)
181
   * such that len(x1)==len(x2) is asserted for some x1 in e1 and x2 in e2. It
182
   * then traverses the normal forms of x1 and x2, say they are [r1, ..., rn]
183
   * and [s1, ..., sm]. For the minimial i such that ri!=si, if ri and si are
184
   * disequal and have the same length, then x1 and x2 have distinct normal
185
   * forms. Otherwise, we may add splitting lemmas on the length of ri and si,
186
   * or split on an equality between ri and si.
187
   *
188
   * If this inference schema returns no facts, lemmas, or conflicts, then all
189
   * disequalities between string terms are satisfied by all models that are
190
   * extensions of the current assignment.
191
   */
192
  void checkNormalFormsDeq();
193
  /** check lengths for equivalence classes
194
   *
195
   * This inference schema adds lemmas of the form:
196
   *   E => len( x ) = rewrite( len( r1 ++ ... ++ rn ) )
197
   * where [r1, ..., rn] is the normal form of the equivalence class containing
198
   * x. This schema is not required for correctness but experimentally has
199
   * shown to be helpful.
200
   */
201
  void checkLengthsEqc();
202
  //-----------------------end inference steps
203
204
  //--------------------------- query functions
205
  /**
206
   * Get normal form for string term n. For details on this data structure,
207
   * see theory/strings/normal_form.h.
208
   *
209
   * This query is valid after a successful call to checkNormalFormsEq, e.g.
210
   * a call where the inference manager was not given any lemmas or inferences.
211
   */
212
  NormalForm& getNormalForm(Node n);
213
  /** get normal string
214
   *
215
   * This method returns the node that is equivalent to the normal form of x,
216
   * and adds the corresponding explanation to nf_exp.
217
   *
218
   * For example, if x = y ++ z is an assertion in the current context, then
219
   * this method returns the term y ++ z and adds x = y ++ z to nf_exp.
220
   */
221
  Node getNormalString(Node x, std::vector<Node>& nf_exp);
222
  //-------------------------- end query functions
223
224
  /**
225
   * This returns the conclusion of the proof rule corresponding to splitting
226
   * on the arrangement of terms x and y appearing in an equation of the form
227
   *   x ++ x' = y ++ y' or x' ++ x = y' ++ y
228
   * where we are in the second case if isRev is true. This method is called
229
   * both by the core solver and by the strings proof checker.
230
   *
231
   * @param x The first term
232
   * @param y The second term
233
   * @param rule The proof rule whose conclusion we are asking for
234
   * @param isRev Whether the equation is in a reverse direction
235
   * @param skc The skolem cache (to allocate fresh variables if necessary)
236
   * @param newSkolems The vector to add new variables to
237
   * @return The conclusion of the inference.
238
   */
239
  static Node getConclusion(Node x,
240
                            Node y,
241
                            PfRule rule,
242
                            bool isRev,
243
                            SkolemCache* skc,
244
                            std::vector<Node>& newSkolems);
245
  /**
246
   * Get sufficient non-empty overlap of string constants c and d.
247
   *
248
   * This is called when handling equations of the form:
249
   *   x ++ d ++ ... = c ++ ...
250
   * when x is non-empty and non-constant.
251
   *
252
   * This returns the maximal index in c which x must have as a prefix, which
253
   * notice is an integer >= 1 since x is non-empty.
254
   *
255
   * @param c The first constant
256
   * @param d The second constant
257
   * @param isRev Whether the equation is in the reverse direction
258
   * @return The position in c.
259
   */
260
  static size_t getSufficientNonEmptyOverlap(Node c, Node d, bool isRev);
261
  /**
262
   * This returns the conclusion of the decompose proof rule. This returns
263
   * a conjunction of splitting string x into pieces based on length l, e.g.:
264
   *   x = k_1 ++ k_2
265
   * where k_1 (resp. k_2) is a skolem corresponding to a substring of x of
266
   * length l if isRev is false (resp. true). The function also adds a
267
   * length constraint len(k_1) = l (resp. len(k_2) = l). Note that adding this
268
   * constraint to the conclusion is *not* optional, since the skolems k_1 and
269
   * k_2 may be shared, hence their length constraint must be guarded by the
270
   * premises of this inference.
271
   *
272
   * @param x The string term
273
   * @param l The length term
274
   * @param isRev Whether the equation is in a reverse direction
275
   * @param skc The skolem cache (to allocate fresh variables if necessary)
276
   * @param newSkolems The vector to add new variables to
277
   * @return The conclusion of the inference.
278
   */
279
  static Node getDecomposeConclusion(Node x,
280
                                     Node l,
281
                                     bool isRev,
282
                                     SkolemCache* skc,
283
                                     std::vector<Node>& newSkolems);
284
285
 private:
286
  /**
287
   * This processes the infer info ii as an inference. In more detail, it calls
288
   * the inference manager to process the inference, and updates the set of
289
   * normal form pairs. Returns true if the conclusion of ii was not true
290
   * after rewriting. If the conclusion is true, this method does nothing.
291
   */
292
  bool processInferInfo(CoreInferInfo& ii);
293
  /** Add that (n1,n2) is a normal form pair in the current context. */
294
  void addNormalFormPair(Node n1, Node n2);
295
  /** Is (n1,n2) a normal form pair in the current context? */
296
  bool isNormalFormPair(Node n1, Node n2);
297
298
  //--------------------------for checkFlatForm
299
  /**
300
   * This checks whether there are flat form inferences between eqc[start] and
301
   * eqc[j] for some j>start. If the flag isRev is true, we check for flat form
302
   * interferences in the reverse direction of the flat forms (note:
303
   * `d_flat_form` and `d_flat_form_index` must be in reverse order if `isRev`
304
   * is true). For more details, see checkFlatForms below.
305
   */
306
  void checkFlatForm(std::vector<Node>& eqc, size_t start, bool isRev);
307
  /** debug print current flat forms on trace tc */
308
  void debugPrintFlatForms(const char* tc);
309
  //--------------------------end for checkFlatForm
310
311
  //--------------------------for checkCycles
312
  Node checkCycles(Node eqc, std::vector<Node>& curr, std::vector<Node>& exp);
313
  //--------------------------end for checkCycles
314
315
  //--------------------------for checkNormalFormsEq
316
  /** normalize equivalence class
317
   *
318
   * This method attempts to build a "normal form" for the equivalence class
319
   * of string term n (for more details on normal forms, see normal_form.h
320
   * or see Liang et al CAV 2014). In particular, this method checks whether the
321
   * current normal form for each term in this equivalence class is identical.
322
   * If it is not, then we add an inference via sendInference and abort the
323
   * call.
324
   *
325
   * stype is the string-like type of the equivalence class we are processing.
326
   */
327
  void normalizeEquivalenceClass(Node n, TypeNode stype);
328
  /**
329
   * For each term in the equivalence class of eqc, this adds data regarding its
330
   * normal form to normal_forms. The map term_to_nf_index maps terms to the
331
   * index in normal_forms where their normal form data is located.
332
   *
333
   * stype is the string-like type of the equivalence class we are processing.
334
   */
335
  void getNormalForms(Node eqc,
336
                      std::vector<NormalForm>& normal_forms,
337
                      std::map<Node, unsigned>& term_to_nf_index,
338
                      TypeNode stype);
339
  /** process normalize equivalence class
340
   *
341
   * This is called when an equivalence class eqc contains a set of terms that
342
   * have normal forms given by the argument normal_forms. It either
343
   * verifies that all normal forms in this vector are identical, or otherwise
344
   * adds a conflict, lemma, or inference via the sendInference method.
345
   *
346
   * To prioritize one inference versus another, it builds a set of possible
347
   * inferences, at most two for each pair of distinct normal forms,
348
   * corresponding to processing the normal form pair in the (forward, reverse)
349
   * directions. Once all possible inferences are recorded, it executes the
350
   * one with highest priority based on the enumeration type Inference.
351
   *
352
   * stype is the string-like type of the equivalence class we are processing.
353
   */
354
  void processNEqc(Node eqc,
355
                   std::vector<NormalForm>& normal_forms,
356
                   TypeNode stype);
357
  /** process simple normal equality
358
   *
359
   * This method is called when two equal terms have normal forms nfi and nfj.
360
   * It adds (typically at most one) possible inference to the vector pinfer.
361
   * This inference is in the form of an CoreInferInfo object, which stores the
362
   * necessary information regarding how to process the inference.
363
   *
364
   * index: The index in the normal form vectors (nfi.d_nf and nfj.d_nf) that
365
   *   we are currently checking. This method will increment this index until
366
   *   it finds an index where these vectors differ, or until it reaches the
367
   *   end of these vectors.
368
   * isRev: Whether we are processing the normal forms in reverse direction.
369
   *   Notice in this case the normal form vectors have been reversed, hence,
370
   *   many operations are identical to the forward case, e.g. index is
371
   *   incremented not decremented, while others require special care, e.g.
372
   *   constant strings "ABC" in the normal form vectors are not reversed to
373
   *   "CBA" and hence all operations should assume a flipped semantics for
374
   *   constants when isRev is true,
375
   * rproc: the number of string components on the suffix of the normal form of
376
   *   nfi and nfj that were already processed. This is used when using
377
   *   fowards/backwards traversals of normal forms to ensure that duplicate
378
   *   inferences are not processed.
379
   * pinfer: the set of possible inferences we add to.
380
   *
381
   * stype is the string-like type of the equivalence class we are processing.
382
   */
383
  void processSimpleNEq(NormalForm& nfi,
384
                        NormalForm& nfj,
385
                        unsigned& index,
386
                        bool isRev,
387
                        unsigned rproc,
388
                        std::vector<CoreInferInfo>& pinfer,
389
                        TypeNode stype);
390
  //--------------------------end for checkNormalFormsEq
391
392
  //--------------------------for checkNormalFormsEq with loops
393
  bool detectLoop(NormalForm& nfi,
394
                  NormalForm& nfj,
395
                  int index,
396
                  int& loop_in_i,
397
                  int& loop_in_j,
398
                  unsigned rproc);
399
400
  /**
401
   * Result of processLoop() below.
402
   */
403
  enum class ProcessLoopResult
404
  {
405
    /** Loop processing made an inference */
406
    INFERENCE,
407
    /** Loop processing detected a conflict */
408
    CONFLICT,
409
    /** Loop not processed or no loop detected */
410
    SKIPPED,
411
  };
412
413
  ProcessLoopResult processLoop(NormalForm& nfi,
414
                                NormalForm& nfj,
415
                                int loop_index,
416
                                int index,
417
                                CoreInferInfo& info);
418
  //--------------------------end for checkNormalFormsEq with loops
419
420
  //--------------------------for checkNormalFormsDeq
421
422
  /**
423
   * Given a pair of disequal strings with the same length, checks whether the
424
   * disequality holds. This may result in inferences or conflicts.
425
   *
426
   * @param n1 The first string in the disequality
427
   * @param n2 The second string in the disequality
428
   */
429
  void processDeq(Node n1, Node n2);
430
431
  /**
432
   * Given a pair of disequal strings with the same length and their normal
433
   * forms, checks whether the disequality holds. This may result in
434
   * inferences.
435
   *
436
   * @param nfi The normal form for the first string in the disequality
437
   * @param nfj The normal form for the second string in the disequality
438
   * @param ni The first string in the disequality
439
   * @param nj The second string in the disequality
440
   * @return true if the disequality is satisfied, false otherwise
441
   */
442
  bool processReverseDeq(std::vector<Node>& nfi,
443
                         std::vector<Node>& nfj,
444
                         Node ni,
445
                         Node nj);
446
447
  /**
448
   * Given a pair of disequal strings with the same length and their normal
449
   * forms, performs some simple checks whether the disequality holds. The
450
   * check is done starting from a given index and can either be performed on
451
   * reversed normal forms or the original normal forms. If the function cannot
452
   * show that a disequality holds, it updates the index to point to the first
453
   * element in the normal forms for which the relationship is unclear.
454
   *
455
   * @param nfi The normal form for the first string in the disequality
456
   * @param nfj The normal form for the second string in the disequality
457
   * @param ni The first string in the disequality
458
   * @param nj The second string in the disequality
459
   * @param index The index to start at. If this function returns false, the
460
   *              index points to the first index in the normal forms for which
461
   *              it is not known whether they are equal or disequal
462
   * @param isRev This should be true if the normal forms are reversed, false
463
   *              otherwise
464
   * @return true if the disequality is satisfied, false otherwise
465
   */
466
  bool processSimpleDeq(std::vector<Node>& nfi,
467
                        std::vector<Node>& nfj,
468
                        Node ni,
469
                        Node nj,
470
                        size_t& index,
471
                        bool isRev);
472
  //--------------------------end for checkNormalFormsDeq
473
474
  /** The solver state object */
475
  SolverState& d_state;
476
  /** The (custom) output channel of the theory of strings */
477
  InferenceManager& d_im;
478
  /** Reference to the term registry of theory of strings */
479
  TermRegistry& d_termReg;
480
  /** reference to the base solver, used for certain queries */
481
  BaseSolver& d_bsolver;
482
  /** Commonly used constants */
483
  Node d_true;
484
  Node d_false;
485
  Node d_zero;
486
  Node d_one;
487
  Node d_neg_one;
488
  /** empty vector (used for trivial explanations) */
489
  std::vector<Node> d_emptyVec;
490
  /**
491
   * The list of equivalence classes of type string. These are ordered based
492
   * on the ordering described in checkCycles.
493
   */
494
  std::vector<Node> d_strings_eqc;
495
  /** map from terms to their normal forms */
496
  std::map<Node, NormalForm> d_normal_form;
497
  /**
498
   * In certain cases, we know that two terms are equivalent despite
499
   * not having to verify their normal forms are identical. For example,
500
   * after applying the R-Loop rule to two terms a and b, we know they
501
   * are entailed to be equal in the current context without having
502
   * to look at their normal forms. We call (a,b) a normal form pair.
503
   *
504
   * We map representative terms a to a list of nodes b1,...,bn such that
505
   * (a,b1) ... (a, bn) are normal form pairs. This list is SAT-context
506
   * dependent. We use a context-dependent integer along with a context
507
   * indepedent map from nodes to lists of nodes to model this, given by
508
   * the two data members below.
509
   */
510
  NodeIntMap d_nfPairs;
511
  std::map<Node, std::vector<Node> > d_nf_pairs_data;
512
  /** list of non-congruent concat terms in each equivalence class */
513
  std::map<Node, std::vector<Node> > d_eqc;
514
  /**
515
   * The flat form for each equivalence class. For a term (str.++ t1 ... tn),
516
   * this is a list of the form (r_{i1} ... r_{im}) where each empty t1...tn
517
   * is dropped and the others are replaced in order with their representative.
518
   */
519
  std::map<Node, std::vector<Node> > d_flat_form;
520
  /**
521
   * For each component r_{i1} ... r_{im} in a flat form, this stores
522
   * the argument number of the t1 ... tn they were generated from.
523
   */
524
  std::map<Node, std::vector<int> > d_flat_form_index;
525
}; /* class CoreSolver */
526
527
}  // namespace strings
528
}  // namespace theory
529
}  // namespace cvc5
530
531
#endif /* CVC5__THEORY__STRINGS__CORE_SOLVER_H */