1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Andrew Reynolds, Mathias Preiner, Fabian Wolff |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* sygus_sampler |
14 |
|
*/ |
15 |
|
|
16 |
|
#include "cvc5_private.h" |
17 |
|
|
18 |
|
#ifndef CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H |
19 |
|
#define CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H |
20 |
|
|
21 |
|
#include <map> |
22 |
|
#include "theory/evaluator.h" |
23 |
|
#include "theory/quantifiers/lazy_trie.h" |
24 |
|
#include "theory/quantifiers/sygus/term_database_sygus.h" |
25 |
|
#include "theory/quantifiers/term_enumeration.h" |
26 |
|
|
27 |
|
namespace cvc5 { |
28 |
|
namespace theory { |
29 |
|
namespace quantifiers { |
30 |
|
|
31 |
|
/** SygusSampler |
32 |
|
* |
33 |
|
* This class can be used to test whether two expressions are equivalent |
34 |
|
* by random sampling. We use this class for the following options: |
35 |
|
* |
36 |
|
* sygus-rr-synth: synthesize candidate rewrite rules by finding two terms |
37 |
|
* t1 and t2 do not rewrite to the same term, but are identical when considering |
38 |
|
* a set of sample points, and |
39 |
|
* |
40 |
|
* sygus-rr-verify: detect unsound rewrite rules by finding two terms t1 and |
41 |
|
* t2 that rewrite to the same term, but not identical when considering a set |
42 |
|
* of sample points. |
43 |
|
* |
44 |
|
* To use this class: |
45 |
|
* (1) Call initialize( tds, f, nsamples) where f is sygus datatype term. |
46 |
|
* (2) For terms n1....nm enumerated that correspond to builtin analog of sygus |
47 |
|
* term f, we call registerTerm( n1 )....registerTerm( nm ). It may be the case |
48 |
|
* that registerTerm( ni ) returns nj for some j < i. In this case, we have that |
49 |
|
* ni and nj are equivalent under the sample points in this class. |
50 |
|
* |
51 |
|
* For example, say the grammar for f is: |
52 |
|
* A = 0 | 1 | x | y | A+A | ite( B, A, A ) |
53 |
|
* B = A <= A |
54 |
|
* If we call initialize( tds, f, 5 ), this class will generate 5 random sample |
55 |
|
* points for (x,y), say (0,0), (1,1), (0,1), (1,0), (2,2). The return values |
56 |
|
* of successive calls to registerTerm are listed below. |
57 |
|
* registerTerm( 0 ) -> 0 |
58 |
|
* registerTerm( x ) -> x |
59 |
|
* registerTerm( x+y ) -> x+y |
60 |
|
* registerTerm( y+x ) -> x+y |
61 |
|
* registerTerm( x+ite(x <= 1+1, 0, y ) ) -> x |
62 |
|
* Notice that the number of sample points can be configured for the above |
63 |
|
* options using sygus-samples=N. |
64 |
|
*/ |
65 |
|
class SygusSampler : public LazyTrieEvaluator |
66 |
|
{ |
67 |
|
public: |
68 |
|
SygusSampler(); |
69 |
4961 |
~SygusSampler() override {} |
70 |
|
|
71 |
|
/** initialize |
72 |
|
* |
73 |
|
* tn : the return type of terms we will be testing with this class, |
74 |
|
* vars : the variables we are testing substitutions for, |
75 |
|
* nsamples : number of sample points this class will test, |
76 |
|
* unique_type_ids : if this is set to true, then we consider each variable |
77 |
|
* in vars to have a unique "type id". A type id is a finer-grained notion of |
78 |
|
* type that is used to determine when a rewrite rule is redundant. |
79 |
|
*/ |
80 |
|
virtual void initialize(TypeNode tn, |
81 |
|
const std::vector<Node>& vars, |
82 |
|
unsigned nsamples, |
83 |
|
bool unique_type_ids = false); |
84 |
|
/** initialize sygus |
85 |
|
* |
86 |
|
* qe : pointer to quantifiers engine, |
87 |
|
* f : a term of some SyGuS datatype type whose values we will be |
88 |
|
* testing under the free variables in the grammar of f, |
89 |
|
* nsamples : number of sample points this class will test, |
90 |
|
* useSygusType : whether we will register terms with this sampler that have |
91 |
|
* the same type as f. If this flag is false, then we will be registering |
92 |
|
* terms of the analog of the type of f, that is, the builtin type that |
93 |
|
* f's type encodes in the deep embedding. |
94 |
|
*/ |
95 |
|
virtual void initializeSygus(TermDbSygus* tds, |
96 |
|
Node f, |
97 |
|
unsigned nsamples, |
98 |
|
bool useSygusType); |
99 |
|
/** register term n with this sampler database |
100 |
|
* |
101 |
|
* forceKeep is whether we wish to force that n is chosen as a representative |
102 |
|
* value in the trie. |
103 |
|
*/ |
104 |
|
virtual Node registerTerm(Node n, bool forceKeep = false); |
105 |
|
/** get number of sample points */ |
106 |
1074 |
unsigned getNumSamplePoints() const { return d_samples.size(); } |
107 |
|
/** get variables, adds d_vars to vars */ |
108 |
|
void getVariables(std::vector<Node>& vars) const; |
109 |
|
/** get sample point |
110 |
|
* |
111 |
|
* Appends sample point #index to the vector pt, d_vars to vars. |
112 |
|
*/ |
113 |
|
void getSamplePoint(unsigned index, |
114 |
|
std::vector<Node>& pt); |
115 |
|
/** Add pt to the set of sample points considered by this sampler */ |
116 |
|
void addSamplePoint(std::vector<Node>& pt); |
117 |
|
/** evaluate n on sample point index */ |
118 |
|
Node evaluate(Node n, unsigned index) override; |
119 |
|
/** |
120 |
|
* Compute the variables from the domain of d_var_index that occur in n, |
121 |
|
* store these in the vector fvs. |
122 |
|
*/ |
123 |
|
void computeFreeVariables(Node n, std::vector<Node>& fvs); |
124 |
|
/** |
125 |
|
* Returns the index of a sample point such that the evaluation of a and b |
126 |
|
* diverge, or -1 if no such sample point exists. |
127 |
|
*/ |
128 |
|
int getDiffSamplePointIndex(Node a, Node b); |
129 |
|
|
130 |
|
//--------------------------queries about terms |
131 |
|
/** is contiguous |
132 |
|
* |
133 |
|
* This returns whether n's free variables (terms occurring in the range of |
134 |
|
* d_type_vars) are a prefix of the list of variables in d_type_vars for each |
135 |
|
* type id. For instance, if d_type_vars[id] = { x, y } for some id, then |
136 |
|
* 0, x, x+y, y+x are contiguous but y is not. This is useful for excluding |
137 |
|
* terms from consideration that are alpha-equivalent to others. |
138 |
|
*/ |
139 |
|
bool isContiguous(Node n); |
140 |
|
/** is ordered |
141 |
|
* |
142 |
|
* This returns whether n's free variables are in order with respect to |
143 |
|
* variables in d_type_vars for each type. For instance, if |
144 |
|
* d_type_vars[id] = { x, y } for some id, then 0, x, x+y are ordered but |
145 |
|
* y and y+x are not. |
146 |
|
*/ |
147 |
|
bool isOrdered(Node n); |
148 |
|
/** is linear |
149 |
|
* |
150 |
|
* This returns whether n contains at most one occurrence of each free |
151 |
|
* variable. For example, x, x+y are linear, but x+x, (x-y)+y, (x+0)+(x+0) are |
152 |
|
* non-linear. |
153 |
|
*/ |
154 |
|
bool isLinear(Node n); |
155 |
|
/** check variables |
156 |
|
* |
157 |
|
* This returns false if !isOrdered(n) and checkOrder is true or !isLinear(n) |
158 |
|
* if checkLinear is true, or false otherwise. |
159 |
|
*/ |
160 |
|
bool checkVariables(Node n, bool checkOrder, bool checkLinear); |
161 |
|
/** contains free variables |
162 |
|
* |
163 |
|
* Returns true if the free variables of b are a subset of those in a, where |
164 |
|
* we require a strict subset if strict is true. Free variables are those that |
165 |
|
* occur in the range d_type_vars. |
166 |
|
*/ |
167 |
|
bool containsFreeVariables(Node a, Node b, bool strict = false); |
168 |
|
//--------------------------end queries about terms |
169 |
|
/** check equivalent |
170 |
|
* |
171 |
|
* Check whether bv and bvr are equivalent on all sample points, print |
172 |
|
* an error if not. Used with --sygus-rr-verify. |
173 |
|
*/ |
174 |
|
void checkEquivalent(Node bv, Node bvr); |
175 |
|
|
176 |
|
protected: |
177 |
|
/** sygus term database of d_qe */ |
178 |
|
TermDbSygus* d_tds; |
179 |
|
/** term enumerator object (used for random sampling) */ |
180 |
|
TermEnumeration d_tenum; |
181 |
|
/** samples */ |
182 |
|
std::vector<std::vector<Node> > d_samples; |
183 |
|
/** evaluator class */ |
184 |
|
Evaluator d_eval; |
185 |
|
/** data structure to check duplication of sample points */ |
186 |
63214 |
class PtTrie |
187 |
|
{ |
188 |
|
public: |
189 |
|
/** add pt to this trie, returns true if pt is not a duplicate. */ |
190 |
|
bool add(std::vector<Node>& pt); |
191 |
|
|
192 |
|
private: |
193 |
|
/** the children of this node */ |
194 |
|
std::map<Node, PtTrie> d_children; |
195 |
|
}; |
196 |
|
/** a trie for samples */ |
197 |
|
PtTrie d_samples_trie; |
198 |
|
/** the sygus type for this sampler (if applicable). */ |
199 |
|
TypeNode d_ftn; |
200 |
|
/** whether we are registering terms of sygus types with this sampler */ |
201 |
|
bool d_use_sygus_type; |
202 |
|
/** |
203 |
|
* For each (sygus) type, a map from builtin terms to the sygus term they |
204 |
|
* correspond to. |
205 |
|
*/ |
206 |
|
std::map<TypeNode, std::map<Node, Node> > d_builtin_to_sygus; |
207 |
|
/** all variables we are sampling values for */ |
208 |
|
std::vector<Node> d_vars; |
209 |
|
/** type variables |
210 |
|
* |
211 |
|
* We group variables according to "type ids". Two variables have the same |
212 |
|
* type id if they have indistinguishable status according to this sampler. |
213 |
|
* This is a finer-grained grouping than types. For example, two variables |
214 |
|
* of the same type may have different type ids if they occur as constructors |
215 |
|
* of a different set of sygus types in the grammar we are considering. |
216 |
|
* For instance, we assign x and y different type ids in this grammar: |
217 |
|
* A -> B + C |
218 |
|
* B -> x | 0 | 1 |
219 |
|
* C -> y |
220 |
|
* Type ids are computed for each variable in d_vars during initialize(...). |
221 |
|
* |
222 |
|
* For each type id, a list of variables in the grammar we are considering, |
223 |
|
* for that type. These typically correspond to the arguments of the |
224 |
|
* function-to-synthesize whose grammar we are considering. |
225 |
|
*/ |
226 |
|
std::map<unsigned, std::vector<Node> > d_type_vars; |
227 |
|
/** |
228 |
|
* A map all variables in the grammar we are considering to their index in |
229 |
|
* d_type_vars. |
230 |
|
*/ |
231 |
|
std::map<Node, unsigned> d_var_index; |
232 |
|
/** Map from variables to the id (the domain of d_type_vars). */ |
233 |
|
std::map<Node, unsigned> d_type_ids; |
234 |
|
/** constants |
235 |
|
* |
236 |
|
* For each type, a list of constants in the grammar we are considering, for |
237 |
|
* that type. |
238 |
|
*/ |
239 |
|
std::map<TypeNode, std::vector<Node> > d_type_consts; |
240 |
|
/** a lazy trie for each type |
241 |
|
* |
242 |
|
* This stores the evaluation of all terms registered to this class. |
243 |
|
* |
244 |
|
* Notice if we are registering sygus terms with this class, then terms |
245 |
|
* are grouped into this trie according to their sygus type, and not their |
246 |
|
* builtin type. For example, for grammar: |
247 |
|
* A -> x | B+1 |
248 |
|
* B -> x | 0 | 1 | B+B |
249 |
|
* If we register C^B_+( C^B_x(), C^B_0() ) and C^A_x() with this class, |
250 |
|
* then x+0 is registered to d_trie[B] and x is registered to d_trie[A], |
251 |
|
* and no rewrite rule is reported. The reason for this is that otherwise |
252 |
|
* we would end up reporting many useless rewrites since the same builtin |
253 |
|
* term can be generated by multiple sygus types (e.g. C^B_x() and C^A_x()). |
254 |
|
*/ |
255 |
|
std::map<TypeNode, LazyTrie> d_trie; |
256 |
|
/** is this sampler valid? |
257 |
|
* |
258 |
|
* A sampler can be invalid if sample points cannot be generated for a type |
259 |
|
* of an argument to function f. |
260 |
|
*/ |
261 |
|
bool d_is_valid; |
262 |
|
/** initialize samples |
263 |
|
* |
264 |
|
* Adds nsamples sample points to d_samples. |
265 |
|
*/ |
266 |
|
void initializeSamples(unsigned nsamples); |
267 |
|
/** get random value for a type |
268 |
|
* |
269 |
|
* Returns a random value for the given type based on the random number |
270 |
|
* generator. Currently, supported types: |
271 |
|
* |
272 |
|
* Bool, Bitvector : returns a random value in the range of that type. |
273 |
|
* Int, String : returns a random string of values in (base 10) of random |
274 |
|
* length, currently by a repeated coin flip. |
275 |
|
* Real : returns the division of two random integers, where the denominator |
276 |
|
* is omitted if it is zero. |
277 |
|
*/ |
278 |
|
Node getRandomValue(TypeNode tn); |
279 |
|
/** get sygus random value |
280 |
|
* |
281 |
|
* Returns a random value based on the sygus type tn. The return value is |
282 |
|
* a constant in the analog type of tn. This function chooses either to |
283 |
|
* return a random value, or otherwise will construct a constant based on |
284 |
|
* a random constructor of tn whose builtin operator is not a variable. |
285 |
|
* |
286 |
|
* rchance: the chance that the call to this function will be forbidden |
287 |
|
* from making recursive calls and instead must return a value based on |
288 |
|
* a nullary constructor of tn or based on getRandomValue above. |
289 |
|
* rinc: the percentage to increment rchance on recursive calls. |
290 |
|
* |
291 |
|
* For example, consider the grammar: |
292 |
|
* A -> x | y | 0 | 1 | +( A, A ) | ite( B, A, A ) |
293 |
|
* B -> A = A |
294 |
|
* If we call this function on A and rchance is 0.0, there are five evenly |
295 |
|
* chosen possibilities, either we return a random value via getRandomValue |
296 |
|
* above, or we choose one of the four non-variable constructors of A. |
297 |
|
* Say we choose ite, then we recursively call this function for |
298 |
|
* B, A, and A, which return constants c1, c2, and c3. Then, this function |
299 |
|
* returns the rewritten form of ite( c1, c2, c3 ). |
300 |
|
* If on the other hand, rchance was 0.5 and rand() < 0.5. Then, we force |
301 |
|
* this call to terminate by either selecting a random value via |
302 |
|
* getRandomValue, 0 or 1. |
303 |
|
*/ |
304 |
|
Node getSygusRandomValue(TypeNode tn, |
305 |
|
double rchance, |
306 |
|
double rinc, |
307 |
|
unsigned depth = 0); |
308 |
|
/** map from sygus types to non-variable constructors */ |
309 |
|
std::map<TypeNode, std::vector<unsigned> > d_rvalue_cindices; |
310 |
|
/** map from sygus types to non-variable nullary constructors */ |
311 |
|
std::map<TypeNode, std::vector<unsigned> > d_rvalue_null_cindices; |
312 |
|
/** the random string alphabet */ |
313 |
|
std::vector<unsigned> d_rstring_alphabet; |
314 |
|
/** map from variables to sygus types that include them */ |
315 |
|
std::map<Node, std::vector<TypeNode> > d_var_sygus_types; |
316 |
|
/** map from constants to sygus types that include them */ |
317 |
|
std::map<Node, std::vector<TypeNode> > d_const_sygus_types; |
318 |
|
/** register sygus type, initializes the above two data structures */ |
319 |
|
void registerSygusType(TypeNode tn); |
320 |
|
}; |
321 |
|
|
322 |
|
} // namespace quantifiers |
323 |
|
} // namespace theory |
324 |
|
} // namespace cvc5 |
325 |
|
|
326 |
|
#endif /* CVC5__THEORY__QUANTIFIERS__SYGUS_SAMPLER_H */ |