Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	src/theory/strings/skolem_cache.h	Lines:	1	1	100.0 %
Date:	2021-09-18	Branches:	0	0	0.0 %


/******************************************************************************
 * Top contributors (to current version):
 *   Andrew Reynolds, Andres Noetzli, Yoni Zohar
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * A cache of skolems for theory of strings.
 */

#include "cvc5_private.h"

#ifndef CVC5__THEORY__STRINGS__SKOLEM_CACHE_H
#define CVC5__THEORY__STRINGS__SKOLEM_CACHE_H

#include <map>
#include <tuple>
#include <unordered_set>

#include "expr/node.h"
#include "expr/skolem_manager.h"

namespace cvc5 {
namespace theory {
namespace strings {

/**
 * A cache of all string skolems generated by the TheoryStrings class. This
 * cache is used to ensure that duplicate skolems are not generated when
 * possible, and helps identify what skolems were allocated in the current run.
 */
class SkolemCache
{
 public:
  /**
   * Constructor.
   *
   * useOpts determines if we aggressively share Skolems or return the constants
   * they are entailed to be equal to.
   */
  SkolemCache(bool useOpts = true);
  /** Identifiers for skolem types
   *
   * The comments below document the properties of each skolem introduced by
   * inference in the strings solver, where by skolem we mean the fresh
   * string variable that witnesses each of "exists k".
   *
   * The skolems with _REV suffixes are used for the reverse version of the
   * preconditions below, e.g. where we are considering a' ++ a = b' ++ b.
   *
   * All skolems assume a and b are strings unless otherwise stated.
   */
  enum SkolemId
  {
    // exists k. k = a
    SK_PURIFY,
    // a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' =>
    //    exists k. a = "cccc" ++ k
    SK_ID_C_SPT,
    SK_ID_C_SPT_REV,
    // a != "" ^ b = "c" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
    //    exists k. a = "c" ++ k
    SK_ID_VC_SPT,
    SK_ID_VC_SPT_REV,
    // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
    //    exists k1 k2. len( k1 )>0 ^ len( k2 )>0 ^
    //                  ( a ++ k1 = b OR a = b ++ k2 )
    // k1 is the variable for (a,b) and k2 is the skolem for (b,a).
    SK_ID_V_SPT,
    SK_ID_V_SPT_REV,
    // a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
    //    exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k )
    SK_ID_V_UNIFIED_SPT,
    SK_ID_V_UNIFIED_SPT_REV,
    // a != ""  ^ b = "c" ^ a ++ a' != b ++ b' =>
    //    exists k, k_rem.
    //         len( k ) = 1 ^
    //         ( ( a = k ++ k_rem ^ k != "c" ) OR ( a = "c" ++ k_rem ) )
    SK_ID_DC_SPT,
    SK_ID_DC_SPT_REM,
    // a != ""  ^ b != "" ^ len( a ) != len( b ) ^ a ++ a' != b ++ b' =>
    //    exists k_x k_y k_z.
    //         ( len( k_y ) = len( a ) ^ len( k_x ) = len( b ) ^ len( k_z) > 0
    //           ( a = k_x ++ k_z OR b = k_y ++ k_z ) )
    SK_ID_DEQ_X,
    SK_ID_DEQ_Y,
    // contains( a, b ) =>
    //    exists k_pre, k_post. a = k_pre ++ b ++ k_post ^
    //                          ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b)
    //
    // As an optimization, these skolems are reused for positive occurrences of
    // contains, where they have the semantics:
    //
    //   contains( a, b ) =>
    //      exists k_pre, k_post. a = k_pre ++ b ++ k_post
    //
    // We reuse them since it is sound to consider w.l.o.g. the first occurrence
    // of b in a as the witness for contains( a, b ).
    SK_FIRST_CTN_PRE,
    SK_FIRST_CTN_POST,
    // For sequence a and regular expression b,
    // in_re(a, re.++(_*, b, _*)) =>
    //    exists k_pre, k_match, k_post.
    //       a = k_pre ++ k_match ++ k_post ^
    //       len(k_pre) = indexof_re(x, y, 0) ^
    //       (forall l. 0 < l < len(k_match) =>
    //         ~in_re(substr(k_match, 0, l), r)) ^
    //       in_re(k_match, b)
    //
    // k_pre is the prefix before the first, shortest match of b in a. k_match
    // is the substring of a matched by b. It is either empty or there is no
    // shorter string that matches b.
    SK_FIRST_MATCH_PRE,
    SK_FIRST_MATCH,
    SK_FIRST_MATCH_POST,
    // For integer b,
    // len( a ) > b =>
    //    exists k. a = k ++ a' ^ len( k ) = b
    SK_PREFIX,
    // For integer b,
    // b > 0 =>
    //    exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 )
    SK_SUFFIX_REM,
    // --------------- integer skolems
    // exists k. ( b occurs k times in a )
    SK_NUM_OCCUR,
    // --------------- function skolems
    // For function k: Int -> Int
    //   exists k.
    //     forall 0 <= x <= n,
    //       k(x) is the end index of the x^th occurrence of b in a
    //   where n is the number of occurrences of b in a, and k(0)=0.
    SK_OCCUR_INDEX,
    // For function k: Int -> Int
    //   exists k.
    //     forall 0 <= x < n,
    //       k(x) is the length of the x^th occurrence of b in a (excluding
    //       matches of empty strings)
    //   where b is a regular expression, n is the number of occurrences of b
    //   in a, and k(0)=0.
    SK_OCCUR_LEN,
    // For function k: ((Seq U) x Int) -> U
    // exists k.
    // forall s, n.
    //  k(s, n) is some undefined value of sort U
    SK_NTH,
  };
  /**
   * Returns a skolem of type string that is cached for (a,b,id) and has
   * name c.
   */
  Node mkSkolemCached(Node a, Node b, SkolemId id, const char* c);
  /**
   * Returns a skolem of type string that is cached for (a,[null],id) and has
   * name c.
   */
  Node mkSkolemCached(Node a, SkolemId id, const char* c);
  /** Same as above, but the skolem to construct has a custom type tn */
  Node mkTypedSkolemCached(
      TypeNode tn, Node a, Node b, SkolemId id, const char* c);
  /** Same as mkTypedSkolemCached above for (a,[null],id) */
  Node mkTypedSkolemCached(TypeNode tn, Node a, SkolemId id, const char* c);
  /**
   * Specific version for seq.nth, used for cases where the index is out of
   * range for sequence type seqType.
   */
  static Node mkSkolemSeqNth(TypeNode seqType, const char* c);
  /** Returns a (uncached) skolem of type string with name c */
  Node mkSkolem(const char* c);
  /** Returns true if n is a skolem allocated by this class */
  bool isSkolem(Node n) const;
  /** Make index variable
   *
   * This returns an integer variable of kind BOUND_VARIABLE that is used
   * for axiomatizing the behavior of a term or predicate t. Notice that this
   * index variable does *not* necessarily refer to indices in the term t
   * itself. Instead, it refers to indices in the relevant string in the
   * reduction of t. For example, the index variable for the term str.to_int(s)
   * is used to quantify over the positions in string term s.
   */
  static Node mkIndexVar(Node t);

  /** Make length variable
   *
   * This returns an integer variable of kind BOUND_VARIABLE that is used for
   * axiomatizing the behavior of a term or predicate t. It refers to lengths
   * of strings in the reduction of t. For example, the length variable for the
   * term str.indexof(s, r, n) is used to quantify over the lengths of strings
   * that could be matched by r.
   */
  static Node mkLengthVar(Node t);

 private:
  /**
   * Simplifies the arguments for a string skolem used for indexing into the
   * cache. In certain cases, we can share skolems with similar arguments e.g.
   * SK_FIRST_CTN(a, c) can be used instead of SK_FIRST_CTN((str.substr a 0 n),
   * c) because the first occurrence of "c" in "(str.substr a 0 n)" is also the
   * first occurrence of "c" in "a" (assuming that "c" appears in both and
   * otherwise the value of SK_FIRST_CTN does not matter).
   *
   * @param id The type of skolem
   * @param a The first argument used for indexing
   * @param b The second argument used for indexing
   * @return A tuple with the new skolem id, the new first, and the new second
   * argument
   */
  std::tuple<SkolemId, Node, Node> normalizeStringSkolem(SkolemId id,
                                                         Node a,
                                                         Node b);
  /** whether we are using optimizations */
  bool d_useOpts;
  /** string type */
  TypeNode d_strType;
  /** Constant node zero */
  Node d_zero;
  /** map from node pairs and identifiers to skolems */
  std::map<Node, std::map<Node, std::map<SkolemId, Node> > > d_skolemCache;
  /** the set of all skolems we have generated */
  std::unordered_set<Node> d_allSkolems;
};

}  // namespace strings
}  // namespace theory
}  // namespace cvc5

#endif /* CVC5__THEORY__STRINGS__SKOLEM_CACHE_H */


Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/******************************************************************************
2		* Top contributors (to current version):
3		* Andrew Reynolds, Andres Noetzli, Yoni Zohar
4		*
5		* This file is part of the cvc5 project.
6		*
7		* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8		* in the top-level source directory and their institutional affiliations.
9		* All rights reserved. See the file COPYING in the top-level source
10		* directory for licensing information.
11		* ****************************************************************************
12		*
13		* A cache of skolems for theory of strings.
14		*/
15
16		#include "cvc5_private.h"
17
18		#ifndef CVC5__THEORY__STRINGS__SKOLEM_CACHE_H
19		#define CVC5__THEORY__STRINGS__SKOLEM_CACHE_H
20
21		#include <map>
22		#include <tuple>
23		#include <unordered_set>
24
25		#include "expr/node.h"
26		#include "expr/skolem_manager.h"
27
28		namespace cvc5 {
29		namespace theory {
30		namespace strings {
31
32		/**
33		* A cache of all string skolems generated by the TheoryStrings class. This
34		* cache is used to ensure that duplicate skolems are not generated when
35		* possible, and helps identify what skolems were allocated in the current run.
36		*/
37	11022	class SkolemCache
38		{
39		public:
40		/**
41		* Constructor.
42		*
43		* useOpts determines if we aggressively share Skolems or return the constants
44		* they are entailed to be equal to.
45		*/
46		SkolemCache(bool useOpts = true);
47		/** Identifiers for skolem types
48		*
49		* The comments below document the properties of each skolem introduced by
50		* inference in the strings solver, where by skolem we mean the fresh
51		* string variable that witnesses each of "exists k".
52		*
53		* The skolems with _REV suffixes are used for the reverse version of the
54		* preconditions below, e.g. where we are considering a' ++ a = b' ++ b.
55		*
56		* All skolems assume a and b are strings unless otherwise stated.
57		*/
58		enum SkolemId
59		{
60		// exists k. k = a
61		SK_PURIFY,
62		// a != "" ^ b = "ccccd" ^ a ++ "d" ++ a' = b ++ b' =>
63		// exists k. a = "cccc" ++ k
64		SK_ID_C_SPT,
65		SK_ID_C_SPT_REV,
66		// a != "" ^ b = "c" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
67		// exists k. a = "c" ++ k
68		SK_ID_VC_SPT,
69		SK_ID_VC_SPT_REV,
70		// a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
71		// exists k1 k2. len( k1 )>0 ^ len( k2 )>0 ^
72		// ( a ++ k1 = b OR a = b ++ k2 )
73		// k1 is the variable for (a,b) and k2 is the skolem for (b,a).
74		SK_ID_V_SPT,
75		SK_ID_V_SPT_REV,
76		// a != "" ^ b != "" ^ len(a)!=len(b) ^ a ++ a' = b ++ b' =>
77		// exists k. len( k )>0 ^ ( a ++ k = b OR a = b ++ k )
78		SK_ID_V_UNIFIED_SPT,
79		SK_ID_V_UNIFIED_SPT_REV,
80		// a != "" ^ b = "c" ^ a ++ a' != b ++ b' =>
81		// exists k, k_rem.
82		// len( k ) = 1 ^
83		// ( ( a = k ++ k_rem ^ k != "c" ) OR ( a = "c" ++ k_rem ) )
84		SK_ID_DC_SPT,
85		SK_ID_DC_SPT_REM,
86		// a != "" ^ b != "" ^ len( a ) != len( b ) ^ a ++ a' != b ++ b' =>
87		// exists k_x k_y k_z.
88		// ( len( k_y ) = len( a ) ^ len( k_x ) = len( b ) ^ len( k_z) > 0
89		// ( a = k_x ++ k_z OR b = k_y ++ k_z ) )
90		SK_ID_DEQ_X,
91		SK_ID_DEQ_Y,
92		// contains( a, b ) =>
93		// exists k_pre, k_post. a = k_pre ++ b ++ k_post ^
94		// ~contains(k_pre ++ substr( b, 0, len(b)-1 ), b)
95		//
96		// As an optimization, these skolems are reused for positive occurrences of
97		// contains, where they have the semantics:
98		//
99		// contains( a, b ) =>
100		// exists k_pre, k_post. a = k_pre ++ b ++ k_post
101		//
102		// We reuse them since it is sound to consider w.l.o.g. the first occurrence
103		// of b in a as the witness for contains( a, b ).
104		SK_FIRST_CTN_PRE,
105		SK_FIRST_CTN_POST,
106		// For sequence a and regular expression b,
107		// in_re(a, re.++(_, b, _)) =>
108		// exists k_pre, k_match, k_post.
109		// a = k_pre ++ k_match ++ k_post ^
110		// len(k_pre) = indexof_re(x, y, 0) ^
111		// (forall l. 0 < l < len(k_match) =>
112		// ~in_re(substr(k_match, 0, l), r)) ^
113		// in_re(k_match, b)
114		//
115		// k_pre is the prefix before the first, shortest match of b in a. k_match
116		// is the substring of a matched by b. It is either empty or there is no
117		// shorter string that matches b.
118		SK_FIRST_MATCH_PRE,
119		SK_FIRST_MATCH,
120		SK_FIRST_MATCH_POST,
121		// For integer b,
122		// len( a ) > b =>
123		// exists k. a = k ++ a' ^ len( k ) = b
124		SK_PREFIX,
125		// For integer b,
126		// b > 0 =>
127		// exists k. a = a' ++ k ^ len( k ) = ite( len(a)>b, len(a)-b, 0 )
128		SK_SUFFIX_REM,
129		// --------------- integer skolems
130		// exists k. ( b occurs k times in a )
131		SK_NUM_OCCUR,
132		// --------------- function skolems
133		// For function k: Int -> Int
134		// exists k.
135		// forall 0 <= x <= n,
136		// k(x) is the end index of the x^th occurrence of b in a
137		// where n is the number of occurrences of b in a, and k(0)=0.
138		SK_OCCUR_INDEX,
139		// For function k: Int -> Int
140		// exists k.
141		// forall 0 <= x < n,
142		// k(x) is the length of the x^th occurrence of b in a (excluding
143		// matches of empty strings)
144		// where b is a regular expression, n is the number of occurrences of b
145		// in a, and k(0)=0.
146		SK_OCCUR_LEN,
147		// For function k: ((Seq U) x Int) -> U
148		// exists k.
149		// forall s, n.
150		// k(s, n) is some undefined value of sort U
151		SK_NTH,
152		};
153		/**
154		* Returns a skolem of type string that is cached for (a,b,id) and has
155		* name c.
156		*/
157		Node mkSkolemCached(Node a, Node b, SkolemId id, const char* c);
158		/**
159		* Returns a skolem of type string that is cached for (a,[null],id) and has
160		* name c.
161		*/
162		Node mkSkolemCached(Node a, SkolemId id, const char* c);
163		/** Same as above, but the skolem to construct has a custom type tn */
164		Node mkTypedSkolemCached(
165		TypeNode tn, Node a, Node b, SkolemId id, const char* c);
166		/** Same as mkTypedSkolemCached above for (a,[null],id) */
167		Node mkTypedSkolemCached(TypeNode tn, Node a, SkolemId id, const char* c);
168		/**
169		* Specific version for seq.nth, used for cases where the index is out of
170		* range for sequence type seqType.
171		*/
172		static Node mkSkolemSeqNth(TypeNode seqType, const char* c);
173		/** Returns a (uncached) skolem of type string with name c */
174		Node mkSkolem(const char* c);
175		/** Returns true if n is a skolem allocated by this class */
176		bool isSkolem(Node n) const;
177		/** Make index variable
178		*
179		* This returns an integer variable of kind BOUND_VARIABLE that is used
180		* for axiomatizing the behavior of a term or predicate t. Notice that this
181		* index variable does not necessarily refer to indices in the term t
182		* itself. Instead, it refers to indices in the relevant string in the
183		* reduction of t. For example, the index variable for the term str.to_int(s)
184		* is used to quantify over the positions in string term s.
185		*/
186		static Node mkIndexVar(Node t);
187
188		/** Make length variable
189		*
190		* This returns an integer variable of kind BOUND_VARIABLE that is used for
191		* axiomatizing the behavior of a term or predicate t. It refers to lengths
192		* of strings in the reduction of t. For example, the length variable for the
193		* term str.indexof(s, r, n) is used to quantify over the lengths of strings
194		* that could be matched by r.
195		*/
196		static Node mkLengthVar(Node t);
197
198		private:
199		/**
200		* Simplifies the arguments for a string skolem used for indexing into the
201		* cache. In certain cases, we can share skolems with similar arguments e.g.
202		* SK_FIRST_CTN(a, c) can be used instead of SK_FIRST_CTN((str.substr a 0 n),
203		* c) because the first occurrence of "c" in "(str.substr a 0 n)" is also the
204		* first occurrence of "c" in "a" (assuming that "c" appears in both and
205		* otherwise the value of SK_FIRST_CTN does not matter).
206		*
207		* @param id The type of skolem
208		* @param a The first argument used for indexing
209		* @param b The second argument used for indexing
210		* @return A tuple with the new skolem id, the new first, and the new second
211		* argument
212		*/
213		std::tuple<SkolemId, Node, Node> normalizeStringSkolem(SkolemId id,
214		Node a,
215		Node b);
216		/** whether we are using optimizations */
217		bool d_useOpts;
218		/** string type */
219		TypeNode d_strType;
220		/** Constant node zero */
221		Node d_zero;
222		/** map from node pairs and identifiers to skolems */
223		std::map<Node, std::map<Node, std::map<SkolemId, Node> > > d_skolemCache;
224		/** the set of all skolems we have generated */
225		std::unordered_set<Node> d_allSkolems;
226		};
227
228		} // namespace strings
229		} // namespace theory
230		} // namespace cvc5
231
232		#endif /* CVC5__THEORY__STRINGS__SKOLEM_CACHE_H */