GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/util/string.h Lines: 23 24 95.8 %
Date: 2021-05-24 Branches: 4 8 50.0 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Tim King, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * The string data type.
14
 */
15
16
#include "cvc5_public.h"
17
18
#ifndef CVC5__UTIL__STRING_H
19
#define CVC5__UTIL__STRING_H
20
21
#include <iosfwd>
22
#include <string>
23
#include <vector>
24
25
#include "util/rational.h"
26
27
namespace cvc5 {
28
29
/** The cvc5 string class
30
 *
31
 * This data structure is the domain of values for the string type. It can also
32
 * be used as a generic utility for representing strings.
33
 */
34
3994657
class String
35
{
36
 public:
37
  /**
38
   * This is the cardinality of the alphabet that is representable by this
39
   * class. Notice that this must be greater than or equal to the cardinality
40
   * of the alphabet that the string theory reasons about.
41
   *
42
   * This must be strictly less than std::numeric_limits<unsigned>::max().
43
   *
44
   * As per the SMT-LIB standard for strings, we support the first 3 planes of
45
   * Unicode characters, where 196608 = 3*16^4.
46
   */
47
3895612
  static inline unsigned num_codes() { return 196608; }
48
  /** constructors for String
49
   *
50
   * Internally, a cvc5::String is represented by a vector of unsigned
51
   * integers (d_str) representing the code points of the characters.
52
   *
53
   * To build a string from a C++ string, we may process escape sequences
54
   * according to the SMT-LIB standard. In particular, if useEscSequences is
55
   * true, we convert unicode escape sequences:
56
   *  \u d_3 d_2 d_1 d_0
57
   *  \u{d_0}
58
   *  \u{d_1 d_0}
59
   *  \u{d_2 d_1 d_0}
60
   *  \u{d_3 d_2 d_1 d_0}
61
   *  \u{d_4 d_3 d_2 d_1 d_0}
62
   * where d_0 ... d_4 are hexadecimal digits, to the appropriate character.
63
   *
64
   * If useEscSequences is false, then the characters of the constructed
65
   * cvc5::String correspond one-to-one with the input string.
66
   */
67
583671
  String() = default;
68
8279
  explicit String(const std::string& s, bool useEscSequences = false)
69
8279
      : d_str(toInternal(s, useEscSequences))
70
  {
71
8278
  }
72
  explicit String(const std::wstring& s);
73
13155
  explicit String(const char* s, bool useEscSequences = false)
74
13155
      : d_str(toInternal(std::string(s), useEscSequences))
75
  {
76
13155
  }
77
  explicit String(const std::vector<unsigned>& s);
78
79
608094
  String& operator=(const String& y) {
80
608094
    if (this != &y) {
81
608094
      d_str = y.d_str;
82
    }
83
608094
    return *this;
84
  }
85
86
  String concat(const String& other) const;
87
88
1918420
  bool operator==(const String& y) const { return cmp(y) == 0; }
89
92
  bool operator!=(const String& y) const { return cmp(y) != 0; }
90
2046
  bool operator<(const String& y) const { return cmp(y) < 0; }
91
  bool operator>(const String& y) const { return cmp(y) > 0; }
92
  bool operator<=(const String& y) const { return cmp(y) <= 0; }
93
  bool operator>=(const String& y) const { return cmp(y) >= 0; }
94
95
  /**
96
   * Returns true if this string is equal to y for their first n characters.
97
   * If n is larger than the length of this string or y, this method returns
98
   * true if and only if this string is equal to y.
99
   */
100
  bool strncmp(const String& y, std::size_t n) const;
101
  /**
102
   * Returns true if this string is equal to y for their last n characters.
103
   * Similar to strncmp, if n is larger than the length of this string or y,
104
   * this method returns true if and only if this string is equal to y.
105
   */
106
  bool rstrncmp(const String& y, std::size_t n) const;
107
108
  /* toString
109
   * Converts this string to a std::string.
110
   *
111
   * The unprintable characters are converted to unicode escape sequences as
112
   * described above.
113
   *
114
   * If useEscSequences is false, the string's printable characters are
115
   * printed as characters. Notice that for all std::string s having only
116
   * printable characters, we have that
117
   *    cvc5::String( s ).toString() = s.
118
   */
119
  std::string toString(bool useEscSequences = false) const;
120
  /* toWString
121
   * Converts this string to a std::wstring.
122
   *
123
   * Unlike toString(), this method uses no escape sequences as both this class
124
   * and std::wstring use 32bit characters.
125
   */
126
  std::wstring toWString() const;
127
  /** is this the empty string? */
128
123095
  bool empty() const { return d_str.empty(); }
129
  /** is less than or equal to string y */
130
  bool isLeq(const String& y) const;
131
  /** Return the length of the string */
132
14672801
  std::size_t size() const { return d_str.size(); }
133
134
  bool isRepeated() const;
135
  bool tailcmp(const String& y, int& c) const;
136
137
  /**
138
   * Return the first position y occurs in this string, or std::string::npos
139
   * otherwise.
140
   */
141
  std::size_t find(const String& y, const std::size_t start = 0) const;
142
  /**
143
   * Return the first position y occurs in this string searching from the end,
144
   * or std::string::npos otherwise.
145
   */
146
  std::size_t rfind(const String& y, const std::size_t start = 0) const;
147
  /** Returns true if y is a prefix of this */
148
  bool hasPrefix(const String& y) const;
149
  /** Returns true if y is a suffix of this */
150
  bool hasSuffix(const String& y) const;
151
  /** Replace the character at index i in this string with t */
152
  String update(std::size_t i, const String& t) const;
153
  /** Replace the first occurrence of s in this string with t */
154
  String replace(const String& s, const String& t) const;
155
  /** Return the substring of this string starting at index i */
156
  String substr(std::size_t i) const;
157
  /** Return the substring of this string starting at index i with size at most
158
   * j */
159
  String substr(std::size_t i, std::size_t j) const;
160
  /** Return the prefix of this string of size at most i */
161
5912
  String prefix(std::size_t i) const { return substr(0, i); }
162
  /** Return the suffix of this string of size at most i */
163
20769
  String suffix(std::size_t i) const { return substr(size() - i, i); }
164
165
  /**
166
   * Checks if there is any overlap between this string and another string. This
167
   * corresponds to checking whether one string contains the other and whether a
168
   * substring of one is a prefix of the other and vice-versa.
169
   *
170
   * @param y The other string
171
   * @return True if there is an overlap, false otherwise
172
   */
173
  bool noOverlapWith(const String& y) const;
174
175
  /** string overlap
176
  *
177
  * if overlap returns m>0,
178
  * then the maximal suffix of this string that is a prefix of y is of length m.
179
  *
180
  * For example, if x is "abcdef", then:
181
  * x.overlap("defg") = 3
182
  * x.overlap("ab") = 0
183
  * x.overlap("d") = 0
184
  * x.overlap("bcdefdef") = 5
185
  */
186
  std::size_t overlap(const String& y) const;
187
  /** string reverse overlap
188
  *
189
  * if roverlap returns m>0,
190
  * then the maximal prefix of this string that is a suffix of y is of length m.
191
  *
192
  * For example, if x is "abcdef", then:
193
  * x.roverlap("aaabc") = 3
194
  * x.roverlap("def") = 0
195
  * x.roverlap("d") = 0
196
  * x.roverlap("defabcde") = 5
197
  *
198
  * Notice that x.overlap(y) = y.roverlap(x)
199
  */
200
  std::size_t roverlap(const String& y) const;
201
202
  /**
203
   * Returns true if this string corresponds in text to a number, for example
204
   * this returns true for strings "7", "12", "004", "0" and false for strings
205
   * "abc", "4a", "-4", "".
206
   */
207
  bool isNumber() const;
208
  /** Returns the corresponding rational for the text of this string. */
209
  Rational toNumber() const;
210
  /** Get the unsigned representation (code points) of this string */
211
221767
  const std::vector<unsigned>& getVec() const { return d_str; }
212
  /**
213
   * Get the unsigned (code point) value of the first character in this string
214
   */
215
  unsigned front() const;
216
  /**
217
   * Get the unsigned (code point) value of the last character in this string
218
   */
219
  unsigned back() const;
220
  /** is the unsigned a digit?
221
   *
222
   * This is true for code points between 48 ('0') and 57 ('9').
223
   */
224
  static bool isDigit(unsigned character);
225
  /** is the unsigned a hexadecimal digit?
226
   *
227
   * This is true for code points between 48 ('0') and 57 ('9'), code points
228
   * between 65 ('A') and 70 ('F) and code points between 97 ('a') and 102 ('f).
229
   */
230
  static bool isHexDigit(unsigned character);
231
  /** is the unsigned a printable code point?
232
   *
233
   * This is true for Unicode 32 (' ') to 126 ('~').
234
   */
235
  static bool isPrintable(unsigned character);
236
237
  /**
238
   * Returns the maximum length of string representable by this class.
239
   * Corresponds to the maximum size of d_str.
240
   */
241
  static size_t maxSize();
242
 private:
243
  /**
244
   * Helper for toInternal: add character ch to vector vec, storing a string in
245
   * internal format. This throws an error if ch is not a printable character,
246
   * since non-printable characters must be escaped in SMT-LIB.
247
   */
248
  static void addCharToInternal(unsigned char ch, std::vector<unsigned>& vec);
249
  /**
250
   * Convert the string s to the internal format (vector of code points).
251
   * The argument useEscSequences is whether to process unicode escape
252
   * sequences.
253
   */
254
  static std::vector<unsigned> toInternal(const std::string& s,
255
                                          bool useEscSequences);
256
257
  /**
258
   * Returns a negative number if *this < y, 0 if *this and y are equal and a
259
   * positive number if *this > y.
260
   */
261
  int cmp(const String& y) const;
262
263
  std::vector<unsigned> d_str;
264
}; /* class String */
265
266
namespace strings {
267
268
struct StringHashFunction
269
{
270
1996475
  size_t operator()(const ::cvc5::String& s) const
271
  {
272
1996475
    return std::hash<std::string>()(s.toString());
273
  }
274
}; /* struct StringHashFunction */
275
276
}  // namespace strings
277
278
std::ostream& operator<<(std::ostream& os, const String& s);
279
280
}  // namespace cvc5
281
282
#endif /* CVC5__UTIL__STRING_H */