Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	src/util/string.h	Lines:	23	24	95.8 %
Date:	2021-08-14	Branches:	4	8	50.0 %


/******************************************************************************
 * Top contributors (to current version):
 *   Andrew Reynolds, Tim King, Tianyi Liang
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * The string data type.
 */

#include "cvc5_public.h"

#ifndef CVC5__UTIL__STRING_H
#define CVC5__UTIL__STRING_H

#include <iosfwd>
#include <string>
#include <vector>

#include "util/rational.h"

namespace cvc5 {

/** The cvc5 string class
 *
 * This data structure is the domain of values for the string type. It can also
 * be used as a generic utility for representing strings.
 */
class String
{
 public:
  /**
   * This is the cardinality of the alphabet that is representable by this
   * class. Notice that this must be greater than or equal to the cardinality
   * of the alphabet that the string theory reasons about.
   *
   * This must be strictly less than std::numeric_limits<unsigned>::max().
   *
   * As per the SMT-LIB standard for strings, we support the first 3 planes of
   * Unicode characters, where 196608 = 3*16^4.
   */
  static inline unsigned num_codes() { return 196608; }
  /** constructors for String
   *
   * Internally, a cvc5::String is represented by a vector of unsigned
   * integers (d_str) representing the code points of the characters.
   *
   * To build a string from a C++ string, we may process escape sequences
   * according to the SMT-LIB standard. In particular, if useEscSequences is
   * true, we convert unicode escape sequences:
   *  \u d_3 d_2 d_1 d_0
   *  \u{d_0}
   *  \u{d_1 d_0}
   *  \u{d_2 d_1 d_0}
   *  \u{d_3 d_2 d_1 d_0}
   *  \u{d_4 d_3 d_2 d_1 d_0}
   * where d_0 ... d_4 are hexadecimal digits, to the appropriate character.
   *
   * If useEscSequences is false, then the characters of the constructed
   * cvc5::String correspond one-to-one with the input string.
   */
  String() = default;
  explicit String(const std::string& s, bool useEscSequences = false)
      : d_str(toInternal(s, useEscSequences))
  {
  }
  explicit String(const std::wstring& s);
  explicit String(const char* s, bool useEscSequences = false)
      : d_str(toInternal(std::string(s), useEscSequences))
  {
  }
  explicit String(const std::vector<unsigned>& s);

  String& operator=(const String& y) {
    if (this != &y) {
      d_str = y.d_str;
    }
    return *this;
  }

  String concat(const String& other) const;

  bool operator==(const String& y) const { return cmp(y) == 0; }
  bool operator!=(const String& y) const { return cmp(y) != 0; }
  bool operator<(const String& y) const { return cmp(y) < 0; }
  bool operator>(const String& y) const { return cmp(y) > 0; }
  bool operator<=(const String& y) const { return cmp(y) <= 0; }
  bool operator>=(const String& y) const { return cmp(y) >= 0; }

  /**
   * Returns true if this string is equal to y for their first n characters.
   * If n is larger than the length of this string or y, this method returns
   * true if and only if this string is equal to y.
   */
  bool strncmp(const String& y, std::size_t n) const;
  /**
   * Returns true if this string is equal to y for their last n characters.
   * Similar to strncmp, if n is larger than the length of this string or y,
   * this method returns true if and only if this string is equal to y.
   */
  bool rstrncmp(const String& y, std::size_t n) const;

  /* toString
   * Converts this string to a std::string.
   *
   * The unprintable characters are converted to unicode escape sequences as
   * described above.
   *
   * If useEscSequences is false, the string's printable characters are
   * printed as characters. Notice that for all std::string s having only
   * printable characters, we have that
   *    cvc5::String( s ).toString() = s.
   */
  std::string toString(bool useEscSequences = false) const;
  /* toWString
   * Converts this string to a std::wstring.
   *
   * Unlike toString(), this method uses no escape sequences as both this class
   * and std::wstring use 32bit characters.
   */
  std::wstring toWString() const;
  /** is this the empty string? */
  bool empty() const { return d_str.empty(); }
  /** is less than or equal to string y */
  bool isLeq(const String& y) const;
  /** Return the length of the string */
  std::size_t size() const { return d_str.size(); }

  bool isRepeated() const;
  bool tailcmp(const String& y, int& c) const;

  /**
   * Return the first position y occurs in this string, or std::string::npos
   * otherwise.
   */
  std::size_t find(const String& y, const std::size_t start = 0) const;
  /**
   * Return the first position y occurs in this string searching from the end,
   * or std::string::npos otherwise.
   */
  std::size_t rfind(const String& y, const std::size_t start = 0) const;
  /** Returns true if y is a prefix of this */
  bool hasPrefix(const String& y) const;
  /** Returns true if y is a suffix of this */
  bool hasSuffix(const String& y) const;
  /** Replace the character at index i in this string with t */
  String update(std::size_t i, const String& t) const;
  /** Replace the first occurrence of s in this string with t */
  String replace(const String& s, const String& t) const;
  /** Return the substring of this string starting at index i */
  String substr(std::size_t i) const;
  /** Return the substring of this string starting at index i with size at most
   * j */
  String substr(std::size_t i, std::size_t j) const;
  /** Return the prefix of this string of size at most i */
  String prefix(std::size_t i) const { return substr(0, i); }
  /** Return the suffix of this string of size at most i */
  String suffix(std::size_t i) const { return substr(size() - i, i); }

  /**
   * Checks if there is any overlap between this string and another string. This
   * corresponds to checking whether one string contains the other and whether a
   * substring of one is a prefix of the other and vice-versa.
   *
   * @param y The other string
   * @return True if there is an overlap, false otherwise
   */
  bool noOverlapWith(const String& y) const;

  /** string overlap
  *
  * if overlap returns m>0,
  * then the maximal suffix of this string that is a prefix of y is of length m.
  *
  * For example, if x is "abcdef", then:
  * x.overlap("defg") = 3
  * x.overlap("ab") = 0
  * x.overlap("d") = 0
  * x.overlap("bcdefdef") = 5
  */
  std::size_t overlap(const String& y) const;
  /** string reverse overlap
  *
  * if roverlap returns m>0,
  * then the maximal prefix of this string that is a suffix of y is of length m.
  *
  * For example, if x is "abcdef", then:
  * x.roverlap("aaabc") = 3
  * x.roverlap("def") = 0
  * x.roverlap("d") = 0
  * x.roverlap("defabcde") = 5
  *
  * Notice that x.overlap(y) = y.roverlap(x)
  */
  std::size_t roverlap(const String& y) const;

  /**
   * Returns true if this string corresponds in text to a number, for example
   * this returns true for strings "7", "12", "004", "0" and false for strings
   * "abc", "4a", "-4", "".
   */
  bool isNumber() const;
  /** Returns the corresponding rational for the text of this string. */
  Rational toNumber() const;
  /** Get the unsigned representation (code points) of this string */
  const std::vector<unsigned>& getVec() const { return d_str; }
  /**
   * Get the unsigned (code point) value of the first character in this string
   */
  unsigned front() const;
  /**
   * Get the unsigned (code point) value of the last character in this string
   */
  unsigned back() const;
  /** is the unsigned a digit?
   *
   * This is true for code points between 48 ('0') and 57 ('9').
   */
  static bool isDigit(unsigned character);
  /** is the unsigned a hexadecimal digit?
   *
   * This is true for code points between 48 ('0') and 57 ('9'), code points
   * between 65 ('A') and 70 ('F) and code points between 97 ('a') and 102 ('f).
   */
  static bool isHexDigit(unsigned character);
  /** is the unsigned a printable code point?
   *
   * This is true for Unicode 32 (' ') to 126 ('~').
   */
  static bool isPrintable(unsigned character);

  /**
   * Returns the maximum length of string representable by this class.
   * Corresponds to the maximum size of d_str.
   */
  static size_t maxSize();
 private:
  /**
   * Helper for toInternal: add character ch to vector vec, storing a string in
   * internal format. This throws an error if ch is not a printable character,
   * since non-printable characters must be escaped in SMT-LIB.
   */
  static void addCharToInternal(unsigned char ch, std::vector<unsigned>& vec);
  /**
   * Convert the string s to the internal format (vector of code points).
   * The argument useEscSequences is whether to process unicode escape
   * sequences.
   */
  static std::vector<unsigned> toInternal(const std::string& s,
                                          bool useEscSequences);

  /**
   * Returns a negative number if *this < y, 0 if *this and y are equal and a
   * positive number if *this > y.
   */
  int cmp(const String& y) const;

  std::vector<unsigned> d_str;
}; /* class String */

namespace strings {

struct StringHashFunction
{
  size_t operator()(const ::cvc5::String& s) const
  {
    return std::hash<std::string>()(s.toString());
  }
}; /* struct StringHashFunction */

}  // namespace strings

std::ostream& operator<<(std::ostream& os, const String& s);

}  // namespace cvc5

#endif /* CVC5__UTIL__STRING_H */


Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/******************************************************************************
2		* Top contributors (to current version):
3		* Andrew Reynolds, Tim King, Tianyi Liang
4		*
5		* This file is part of the cvc5 project.
6		*
7		* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8		* in the top-level source directory and their institutional affiliations.
9		* All rights reserved. See the file COPYING in the top-level source
10		* directory for licensing information.
11		* ****************************************************************************
12		*
13		* The string data type.
14		*/
15
16		#include "cvc5_public.h"
17
18		#ifndef CVC5__UTIL__STRING_H
19		#define CVC5__UTIL__STRING_H
20
21		#include <iosfwd>
22		#include <string>
23		#include <vector>
24
25		#include "util/rational.h"
26
27		namespace cvc5 {
28
29		/** The cvc5 string class
30		*
31		* This data structure is the domain of values for the string type. It can also
32		* be used as a generic utility for representing strings.
33		*/
34	4952346	class String
35		{
36		public:
37		/**
38		* This is the cardinality of the alphabet that is representable by this
39		* class. Notice that this must be greater than or equal to the cardinality
40		* of the alphabet that the string theory reasons about.
41		*
42		* This must be strictly less than std::numeric_limits<unsigned>::max().
43		*
44		* As per the SMT-LIB standard for strings, we support the first 3 planes of
45		* Unicode characters, where 196608 = 3*16^4.
46		*/
47	4329059	static inline unsigned num_codes() { return 196608; }
48		/** constructors for String
49		*
50		* Internally, a cvc5::String is represented by a vector of unsigned
51		* integers (d_str) representing the code points of the characters.
52		*
53		* To build a string from a C++ string, we may process escape sequences
54		* according to the SMT-LIB standard. In particular, if useEscSequences is
55		* true, we convert unicode escape sequences:
56		* \u d_3 d_2 d_1 d_0
57		* \u{d_0}
58		* \u{d_1 d_0}
59		* \u{d_2 d_1 d_0}
60		* \u{d_3 d_2 d_1 d_0}
61		* \u{d_4 d_3 d_2 d_1 d_0}
62		* where d_0 ... d_4 are hexadecimal digits, to the appropriate character.
63		*
64		* If useEscSequences is false, then the characters of the constructed
65		* cvc5::String correspond one-to-one with the input string.
66		*/
67	583711	String() = default;
68	8799	explicit String(const std::string& s, bool useEscSequences = false)
69	8799	: d_str(toInternal(s, useEscSequences))
70		{
71	8798	}
72		explicit String(const std::wstring& s);
73	15292	explicit String(const char* s, bool useEscSequences = false)
74	15292	: d_str(toInternal(std::string(s), useEscSequences))
75		{
76	15292	}
77		explicit String(const std::vector<unsigned>& s);
78
79	608101	String& operator=(const String& y) {
80	608101	if (this != &y) {
81	608101	d_str = y.d_str;
82		}
83	608101	return *this;
84		}
85
86		String concat(const String& other) const;
87
88	2492631	bool operator==(const String& y) const { return cmp(y) == 0; }
89	65	bool operator!=(const String& y) const { return cmp(y) != 0; }
90	4078	bool operator<(const String& y) const { return cmp(y) < 0; }
91		bool operator>(const String& y) const { return cmp(y) > 0; }
92		bool operator<=(const String& y) const { return cmp(y) <= 0; }
93		bool operator>=(const String& y) const { return cmp(y) >= 0; }
94
95		/**
96		* Returns true if this string is equal to y for their first n characters.
97		* If n is larger than the length of this string or y, this method returns
98		* true if and only if this string is equal to y.
99		*/
100		bool strncmp(const String& y, std::size_t n) const;
101		/**
102		* Returns true if this string is equal to y for their last n characters.
103		* Similar to strncmp, if n is larger than the length of this string or y,
104		* this method returns true if and only if this string is equal to y.
105		*/
106		bool rstrncmp(const String& y, std::size_t n) const;
107
108		/* toString
109		* Converts this string to a std::string.
110		*
111		* The unprintable characters are converted to unicode escape sequences as
112		* described above.
113		*
114		* If useEscSequences is false, the string's printable characters are
115		* printed as characters. Notice that for all std::string s having only
116		* printable characters, we have that
117		* cvc5::String( s ).toString() = s.
118		*/
119		std::string toString(bool useEscSequences = false) const;
120		/* toWString
121		* Converts this string to a std::wstring.
122		*
123		* Unlike toString(), this method uses no escape sequences as both this class
124		* and std::wstring use 32bit characters.
125		*/
126		std::wstring toWString() const;
127		/** is this the empty string? */
128	189339	bool empty() const { return d_str.empty(); }
129		/** is less than or equal to string y */
130		bool isLeq(const String& y) const;
131		/** Return the length of the string */
132	18279483	std::size_t size() const { return d_str.size(); }
133
134		bool isRepeated() const;
135		bool tailcmp(const String& y, int& c) const;
136
137		/**
138		* Return the first position y occurs in this string, or std::string::npos
139		* otherwise.
140		*/
141		std::size_t find(const String& y, const std::size_t start = 0) const;
142		/**
143		* Return the first position y occurs in this string searching from the end,
144		* or std::string::npos otherwise.
145		*/
146		std::size_t rfind(const String& y, const std::size_t start = 0) const;
147		/** Returns true if y is a prefix of this */
148		bool hasPrefix(const String& y) const;
149		/** Returns true if y is a suffix of this */
150		bool hasSuffix(const String& y) const;
151		/** Replace the character at index i in this string with t */
152		String update(std::size_t i, const String& t) const;
153		/** Replace the first occurrence of s in this string with t */
154		String replace(const String& s, const String& t) const;
155		/** Return the substring of this string starting at index i */
156		String substr(std::size_t i) const;
157		/** Return the substring of this string starting at index i with size at most
158		* j */
159		String substr(std::size_t i, std::size_t j) const;
160		/** Return the prefix of this string of size at most i */
161	4363	String prefix(std::size_t i) const { return substr(0, i); }
162		/** Return the suffix of this string of size at most i */
163	19569	String suffix(std::size_t i) const { return substr(size() - i, i); }
164
165		/**
166		* Checks if there is any overlap between this string and another string. This
167		* corresponds to checking whether one string contains the other and whether a
168		* substring of one is a prefix of the other and vice-versa.
169		*
170		* @param y The other string
171		* @return True if there is an overlap, false otherwise
172		*/
173		bool noOverlapWith(const String& y) const;
174
175		/** string overlap
176		*
177		* if overlap returns m>0,
178		* then the maximal suffix of this string that is a prefix of y is of length m.
179		*
180		* For example, if x is "abcdef", then:
181		* x.overlap("defg") = 3
182		* x.overlap("ab") = 0
183		* x.overlap("d") = 0
184		* x.overlap("bcdefdef") = 5
185		*/
186		std::size_t overlap(const String& y) const;
187		/** string reverse overlap
188		*
189		* if roverlap returns m>0,
190		* then the maximal prefix of this string that is a suffix of y is of length m.
191		*
192		* For example, if x is "abcdef", then:
193		* x.roverlap("aaabc") = 3
194		* x.roverlap("def") = 0
195		* x.roverlap("d") = 0
196		* x.roverlap("defabcde") = 5
197		*
198		* Notice that x.overlap(y) = y.roverlap(x)
199		*/
200		std::size_t roverlap(const String& y) const;
201
202		/**
203		* Returns true if this string corresponds in text to a number, for example
204		* this returns true for strings "7", "12", "004", "0" and false for strings
205		* "abc", "4a", "-4", "".
206		*/
207		bool isNumber() const;
208		/** Returns the corresponding rational for the text of this string. */
209		Rational toNumber() const;
210		/** Get the unsigned representation (code points) of this string */
211	260209	const std::vector<unsigned>& getVec() const { return d_str; }
212		/**
213		* Get the unsigned (code point) value of the first character in this string
214		*/
215		unsigned front() const;
216		/**
217		* Get the unsigned (code point) value of the last character in this string
218		*/
219		unsigned back() const;
220		/** is the unsigned a digit?
221		*
222		* This is true for code points between 48 ('0') and 57 ('9').
223		*/
224		static bool isDigit(unsigned character);
225		/** is the unsigned a hexadecimal digit?
226		*
227		* This is true for code points between 48 ('0') and 57 ('9'), code points
228		* between 65 ('A') and 70 ('F) and code points between 97 ('a') and 102 ('f).
229		*/
230		static bool isHexDigit(unsigned character);
231		/** is the unsigned a printable code point?
232		*
233		* This is true for Unicode 32 (' ') to 126 ('~').
234		*/
235		static bool isPrintable(unsigned character);
236
237		/**
238		* Returns the maximum length of string representable by this class.
239		* Corresponds to the maximum size of d_str.
240		*/
241		static size_t maxSize();
242		private:
243		/**
244		* Helper for toInternal: add character ch to vector vec, storing a string in
245		* internal format. This throws an error if ch is not a printable character,
246		* since non-printable characters must be escaped in SMT-LIB.
247		*/
248		static void addCharToInternal(unsigned char ch, std::vector<unsigned>& vec);
249		/**
250		* Convert the string s to the internal format (vector of code points).
251		* The argument useEscSequences is whether to process unicode escape
252		* sequences.
253		*/
254		static std::vector<unsigned> toInternal(const std::string& s,
255		bool useEscSequences);
256
257		/**
258		* Returns a negative number if this < y, 0 if this and y are equal and a
259		* positive number if *this > y.
260		*/
261		int cmp(const String& y) const;
262
263		std::vector<unsigned> d_str;
264		}; /* class String */
265
266		namespace strings {
267
268		struct StringHashFunction
269		{
270	2565504	size_t operator()(const ::cvc5::String& s) const
271		{
272	2565504	return std::hash<std::string>()(s.toString());
273		}
274		}; /* struct StringHashFunction */
275
276		} // namespace strings
277
278		std::ostream& operator<<(std::ostream& os, const String& s);
279
280		} // namespace cvc5
281
282		#endif /* CVC5__UTIL__STRING_H */