GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/util/string.h Lines: 23 24 95.8 %
Date: 2021-03-22 Branches: 4 8 50.0 %

Line Exec Source
1
/*********************                                                        */
2
/*! \file string.h
3
 ** \verbatim
4
 ** Top contributors (to current version):
5
 **   Andrew Reynolds, Tim King, Tianyi Liang
6
 ** This file is part of the CVC4 project.
7
 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 ** in the top-level source directory and their institutional affiliations.
9
 ** All rights reserved.  See the file COPYING in the top-level source
10
 ** directory for licensing information.\endverbatim
11
 **
12
 ** \brief The string data type.
13
 **/
14
15
#include "cvc4_public.h"
16
17
#ifndef CVC4__UTIL__STRING_H
18
#define CVC4__UTIL__STRING_H
19
20
#include <iosfwd>
21
#include <string>
22
#include <vector>
23
24
#include "util/rational.h"
25
26
namespace CVC4 {
27
28
/** The CVC4 string class
29
 *
30
 * This data structure is the domain of values for the string type. It can also
31
 * be used as a generic utility for representing strings.
32
 */
33
4922508
class String
34
{
35
 public:
36
  /**
37
   * This is the cardinality of the alphabet that is representable by this
38
   * class. Notice that this must be greater than or equal to the cardinality
39
   * of the alphabet that the string theory reasons about.
40
   *
41
   * This must be strictly less than std::numeric_limits<unsigned>::max().
42
   *
43
   * As per the SMT-LIB standard for strings, we support the first 3 planes of
44
   * Unicode characters, where 196608 = 3*16^4.
45
   */
46
5351285
  static inline unsigned num_codes() { return 196608; }
47
  /** constructors for String
48
   *
49
   * Internally, a CVC4::String is represented by a vector of unsigned
50
   * integers (d_str) representing the code points of the characters.
51
   *
52
   * To build a string from a C++ string, we may process escape sequences
53
   * according to the SMT-LIB standard. In particular, if useEscSequences is
54
   * true, we convert unicode escape sequences:
55
   *  \u d_3 d_2 d_1 d_0
56
   *  \u{d_0}
57
   *  \u{d_1 d_0}
58
   *  \u{d_2 d_1 d_0}
59
   *  \u{d_3 d_2 d_1 d_0}
60
   *  \u{d_4 d_3 d_2 d_1 d_0}
61
   * where d_0 ... d_4 are hexadecimal digits, to the appropriate character.
62
   *
63
   * If useEscSequences is false, then the characters of the constructed
64
   * CVC4::String correspond one-to-one with the input string.
65
   */
66
601088
  String() = default;
67
8143
  explicit String(const std::string& s, bool useEscSequences = false)
68
8143
      : d_str(toInternal(s, useEscSequences))
69
  {
70
8142
  }
71
13092
  explicit String(const char* s, bool useEscSequences = false)
72
13092
      : d_str(toInternal(std::string(s), useEscSequences))
73
  {
74
13092
  }
75
  explicit String(const std::vector<unsigned>& s);
76
77
625696
  String& operator=(const String& y) {
78
625696
    if (this != &y) {
79
625696
      d_str = y.d_str;
80
    }
81
625696
    return *this;
82
  }
83
84
  String concat(const String& other) const;
85
86
2327412
  bool operator==(const String& y) const { return cmp(y) == 0; }
87
110
  bool operator!=(const String& y) const { return cmp(y) != 0; }
88
2046
  bool operator<(const String& y) const { return cmp(y) < 0; }
89
  bool operator>(const String& y) const { return cmp(y) > 0; }
90
  bool operator<=(const String& y) const { return cmp(y) <= 0; }
91
  bool operator>=(const String& y) const { return cmp(y) >= 0; }
92
93
  /**
94
   * Returns true if this string is equal to y for their first n characters.
95
   * If n is larger than the length of this string or y, this method returns
96
   * true if and only if this string is equal to y.
97
   */
98
  bool strncmp(const String& y, std::size_t n) const;
99
  /**
100
   * Returns true if this string is equal to y for their last n characters.
101
   * Similar to strncmp, if n is larger than the length of this string or y,
102
   * this method returns true if and only if this string is equal to y.
103
   */
104
  bool rstrncmp(const String& y, std::size_t n) const;
105
106
  /* toString
107
   * Converts this string to a std::string.
108
   *
109
   * The unprintable characters are converted to unicode escape sequences as
110
   * described above.
111
   *
112
   * If useEscSequences is false, the string's printable characters are
113
   * printed as characters. Notice that for all std::string s having only
114
   * printable characters, we have that
115
   *    CVC4::String( s ).toString() = s.
116
   */
117
  std::string toString(bool useEscSequences = false) const;
118
  /* toWString
119
   * Converts this string to a std::wstring.
120
   *
121
   * Unlike toString(), this method uses no escape sequences as both this class
122
   * and std::wstring use 32bit characters.
123
   */
124
  std::wstring toWString() const;
125
  /** is this the empty string? */
126
190681
  bool empty() const { return d_str.empty(); }
127
  /** is less than or equal to string y */
128
  bool isLeq(const String& y) const;
129
  /** Return the length of the string */
130
18785292
  std::size_t size() const { return d_str.size(); }
131
132
  bool isRepeated() const;
133
  bool tailcmp(const String& y, int& c) const;
134
135
  /**
136
   * Return the first position y occurs in this string, or std::string::npos
137
   * otherwise.
138
   */
139
  std::size_t find(const String& y, const std::size_t start = 0) const;
140
  /**
141
   * Return the first position y occurs in this string searching from the end,
142
   * or std::string::npos otherwise.
143
   */
144
  std::size_t rfind(const String& y, const std::size_t start = 0) const;
145
  /** Returns true if y is a prefix of this */
146
  bool hasPrefix(const String& y) const;
147
  /** Returns true if y is a suffix of this */
148
  bool hasSuffix(const String& y) const;
149
  /** Replace the character at index i in this string with t */
150
  String update(std::size_t i, const String& t) const;
151
  /** Replace the first occurrence of s in this string with t */
152
  String replace(const String& s, const String& t) const;
153
  /** Return the substring of this string starting at index i */
154
  String substr(std::size_t i) const;
155
  /** Return the substring of this string starting at index i with size at most
156
   * j */
157
  String substr(std::size_t i, std::size_t j) const;
158
  /** Return the prefix of this string of size at most i */
159
6826
  String prefix(std::size_t i) const { return substr(0, i); }
160
  /** Return the suffix of this string of size at most i */
161
34749
  String suffix(std::size_t i) const { return substr(size() - i, i); }
162
163
  /**
164
   * Checks if there is any overlap between this string and another string. This
165
   * corresponds to checking whether one string contains the other and whether a
166
   * substring of one is a prefix of the other and vice-versa.
167
   *
168
   * @param y The other string
169
   * @return True if there is an overlap, false otherwise
170
   */
171
  bool noOverlapWith(const String& y) const;
172
173
  /** string overlap
174
  *
175
  * if overlap returns m>0,
176
  * then the maximal suffix of this string that is a prefix of y is of length m.
177
  *
178
  * For example, if x is "abcdef", then:
179
  * x.overlap("defg") = 3
180
  * x.overlap("ab") = 0
181
  * x.overlap("d") = 0
182
  * x.overlap("bcdefdef") = 5
183
  */
184
  std::size_t overlap(const String& y) const;
185
  /** string reverse overlap
186
  *
187
  * if roverlap returns m>0,
188
  * then the maximal prefix of this string that is a suffix of y is of length m.
189
  *
190
  * For example, if x is "abcdef", then:
191
  * x.roverlap("aaabc") = 3
192
  * x.roverlap("def") = 0
193
  * x.roverlap("d") = 0
194
  * x.roverlap("defabcde") = 5
195
  *
196
  * Notice that x.overlap(y) = y.roverlap(x)
197
  */
198
  std::size_t roverlap(const String& y) const;
199
200
  /**
201
   * Returns true if this string corresponds in text to a number, for example
202
   * this returns true for strings "7", "12", "004", "0" and false for strings
203
   * "abc", "4a", "-4", "".
204
   */
205
  bool isNumber() const;
206
  /** Returns the corresponding rational for the text of this string. */
207
  Rational toNumber() const;
208
  /** Get the unsigned representation (code points) of this string */
209
265929
  const std::vector<unsigned>& getVec() const { return d_str; }
210
  /**
211
   * Get the unsigned (code point) value of the first character in this string
212
   */
213
  unsigned front() const;
214
  /**
215
   * Get the unsigned (code point) value of the last character in this string
216
   */
217
  unsigned back() const;
218
  /** is the unsigned a digit?
219
   *
220
   * This is true for code points between 48 ('0') and 57 ('9').
221
   */
222
  static bool isDigit(unsigned character);
223
  /** is the unsigned a hexadecimal digit?
224
   *
225
   * This is true for code points between 48 ('0') and 57 ('9'), code points
226
   * between 65 ('A') and 70 ('F) and code points between 97 ('a') and 102 ('f).
227
   */
228
  static bool isHexDigit(unsigned character);
229
  /** is the unsigned a printable code point?
230
   *
231
   * This is true for Unicode 32 (' ') to 126 ('~').
232
   */
233
  static bool isPrintable(unsigned character);
234
235
  /**
236
   * Returns the maximum length of string representable by this class.
237
   * Corresponds to the maximum size of d_str.
238
   */
239
  static size_t maxSize();
240
 private:
241
  /**
242
   * Helper for toInternal: add character ch to vector vec, storing a string in
243
   * internal format. This throws an error if ch is not a printable character,
244
   * since non-printable characters must be escaped in SMT-LIB.
245
   */
246
  static void addCharToInternal(unsigned char ch, std::vector<unsigned>& vec);
247
  /**
248
   * Convert the string s to the internal format (vector of code points).
249
   * The argument useEscSequences is whether to process unicode escape
250
   * sequences.
251
   */
252
  static std::vector<unsigned> toInternal(const std::string& s,
253
                                          bool useEscSequences);
254
255
  /**
256
   * Returns a negative number if *this < y, 0 if *this and y are equal and a
257
   * positive number if *this > y.
258
   */
259
  int cmp(const String& y) const;
260
261
  std::vector<unsigned> d_str;
262
}; /* class String */
263
264
namespace strings {
265
266
struct StringHashFunction
267
{
268
2400503
  size_t operator()(const ::CVC4::String& s) const {
269
2400503
    return std::hash<std::string>()(s.toString());
270
  }
271
}; /* struct StringHashFunction */
272
273
}  // namespace strings
274
275
std::ostream& operator<<(std::ostream& os, const String& s);
276
277
}  // namespace CVC4
278
279
#endif /* CVC4__UTIL__STRING_H */