GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/util/string.cpp Lines: 254 265 95.8 %
Date: 2021-05-22 Branches: 283 486 58.2 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Tim King, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of the string data type.
14
 */
15
16
#include "util/string.h"
17
18
#include <algorithm>
19
#include <climits>
20
#include <iomanip>
21
#include <iostream>
22
#include <sstream>
23
24
#include "base/check.h"
25
#include "base/exception.h"
26
27
using namespace std;
28
29
namespace cvc5 {
30
31
static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
32
33
12956
String::String(const std::wstring& s)
34
{
35
12956
  d_str.resize(s.size());
36
141890
  for (size_t i = 0, n = s.size(); i < n; ++i)
37
  {
38
128934
    d_str[i] = static_cast<unsigned>(s[i]);
39
  }
40
12956
}
41
42
2070314
String::String(const std::vector<unsigned> &s) : d_str(s)
43
{
44
#ifdef CVC5_ASSERTIONS
45
5982878
  for (unsigned u : d_str)
46
  {
47
3912564
    Assert(u < num_codes());
48
  }
49
#endif
50
2070314
}
51
52
2076082
int String::cmp(const String &y) const {
53
2076082
  if (size() != y.size()) {
54
10970
    return size() < y.size() ? -1 : 1;
55
  }
56
3582762
  for (unsigned int i = 0; i < size(); ++i) {
57
1522640
    if (d_str[i] != y.d_str[i]) {
58
4990
      unsigned cp = d_str[i];
59
4990
      unsigned cpy = y.d_str[i];
60
4990
      return cp < cpy ? -1 : 1;
61
    }
62
  }
63
2060122
  return 0;
64
}
65
66
24460
String String::concat(const String &other) const {
67
48920
  std::vector<unsigned int> ret_vec(d_str);
68
24460
  ret_vec.insert(ret_vec.end(), other.d_str.begin(), other.d_str.end());
69
48920
  return String(ret_vec);
70
}
71
72
23176
bool String::strncmp(const String& y, std::size_t n) const
73
{
74
23176
  std::size_t b = (size() >= y.size()) ? size() : y.size();
75
23176
  std::size_t s = (size() <= y.size()) ? size() : y.size();
76
23176
  if (n > s) {
77
    if (b == s) {
78
      n = s;
79
    } else {
80
      return false;
81
    }
82
  }
83
108171
  for (std::size_t i = 0; i < n; ++i) {
84
88191
    if (d_str[i] != y.d_str[i]) return false;
85
  }
86
19980
  return true;
87
}
88
89
24898
bool String::rstrncmp(const String& y, std::size_t n) const
90
{
91
24898
  std::size_t b = (size() >= y.size()) ? size() : y.size();
92
24898
  std::size_t s = (size() <= y.size()) ? size() : y.size();
93
24898
  if (n > s) {
94
    if (b == s) {
95
      n = s;
96
    } else {
97
      return false;
98
    }
99
  }
100
71454
  for (std::size_t i = 0; i < n; ++i) {
101
50187
    if (d_str[size() - i - 1] != y.d_str[y.size() - i - 1]) return false;
102
  }
103
21267
  return true;
104
}
105
106
17832
void String::addCharToInternal(unsigned char ch, std::vector<unsigned>& str)
107
{
108
  // if not a printable character
109
17832
  if (ch > 127 || ch < 32)
110
  {
111
2
    std::stringstream serr;
112
    serr << "Illegal string character: \"" << ch
113
1
         << "\", must use escape sequence";
114
1
    throw cvc5::Exception(serr.str());
115
  }
116
  else
117
  {
118
17831
    str.push_back(static_cast<unsigned>(ch));
119
  }
120
17831
}
121
122
21590
std::vector<unsigned> String::toInternal(const std::string& s,
123
                                         bool useEscSequences)
124
{
125
21590
  std::vector<unsigned> str;
126
21590
  unsigned i = 0;
127
55804
  while (i < s.size())
128
  {
129
    // get the current character
130
17108
    char si = s[i];
131
33883
    if (si != '\\' || !useEscSequences)
132
    {
133
16776
      addCharToInternal(si, str);
134
16775
      ++i;
135
16775
      continue;
136
    }
137
    // the vector of characters, in case we fail to read an escape sequence
138
664
    std::vector<unsigned> nonEscCache;
139
    // process the '\'
140
332
    addCharToInternal(si, nonEscCache);
141
332
    ++i;
142
    // are we an escape sequence?
143
332
    bool isEscapeSequence = true;
144
    // the string corresponding to the hexadecimal code point
145
664
    std::stringstream hexString;
146
    // is the slash followed by a 'u'? Could be last character.
147
332
    if (i >= s.size() || s[i] != 'u')
148
    {
149
180
      isEscapeSequence = false;
150
    }
151
    else
152
    {
153
      // process the 'u'
154
152
      addCharToInternal(s[i], nonEscCache);
155
152
      ++i;
156
152
      bool isStart = true;
157
152
      bool isEnd = false;
158
152
      bool hasBrace = false;
159
1028
      while (i < s.size())
160
      {
161
        // add the next character
162
578
        si = s[i];
163
578
        if (isStart)
164
        {
165
148
          isStart = false;
166
          // possibly read '{'
167
238
          if (si == '{')
168
          {
169
90
            hasBrace = true;
170
90
            addCharToInternal(si, nonEscCache);
171
90
            ++i;
172
90
            continue;
173
          }
174
        }
175
430
        else if (si == '}')
176
        {
177
          // can only end if we had an open brace and read at least one digit
178
78
          isEscapeSequence = hasBrace && !hexString.str().empty();
179
78
          isEnd = true;
180
78
          addCharToInternal(si, nonEscCache);
181
78
          ++i;
182
78
          break;
183
        }
184
        // must be a hex digit at this point
185
410
        if (!isHexDigit(static_cast<unsigned>(si)))
186
        {
187
6
          isEscapeSequence = false;
188
6
          break;
189
        }
190
404
        hexString << si;
191
404
        addCharToInternal(si, nonEscCache);
192
404
        ++i;
193
404
        if (!hasBrace && hexString.str().size() == 4)
194
        {
195
          // will be finished reading \ u d_3 d_2 d_1 d_0 with no parens
196
54
          isEnd = true;
197
54
          break;
198
        }
199
350
        else if (hasBrace && hexString.str().size() > 5)
200
        {
201
          // too many digits enclosed in brace, not an escape sequence
202
2
          isEscapeSequence = false;
203
2
          break;
204
        }
205
      }
206
152
      if (!isEnd)
207
      {
208
        // if we were interrupted before ending, then this is not a valid
209
        // escape sequence
210
20
        isEscapeSequence = false;
211
      }
212
    }
213
332
    if (isEscapeSequence)
214
    {
215
130
      Assert(!hexString.str().empty() && hexString.str().size() <= 5);
216
      // Otherwise, we add the escaped character.
217
      // This is guaranteed not to overflow due to the length of hstr.
218
      uint32_t val;
219
130
      hexString >> std::hex >> val;
220
130
      if (val > num_codes())
221
      {
222
        // Failed due to being out of range. This can happen for strings of
223
        // the form \ u { d_4 d_3 d_2 d_1 d_0 } where d_4 is a hexadecimal not
224
        // in the range [0-2].
225
2
        isEscapeSequence = false;
226
      }
227
      else
228
      {
229
128
        str.push_back(val);
230
      }
231
    }
232
    // if we did not successfully parse an escape sequence, we add back all
233
    // characters that we cached
234
332
    if (!isEscapeSequence)
235
    {
236
204
      str.insert(str.end(), nonEscCache.begin(), nonEscCache.end());
237
    }
238
  }
239
#ifdef CVC5_ASSERTIONS
240
38726
  for (unsigned u : str)
241
  {
242
17137
    Assert(u < num_codes());
243
  }
244
#endif
245
21589
  return str;
246
}
247
248
72158
unsigned String::front() const
249
{
250
72158
  Assert(!d_str.empty());
251
72158
  return d_str.front();
252
}
253
254
35596
unsigned String::back() const
255
{
256
35596
  Assert(!d_str.empty());
257
35596
  return d_str.back();
258
}
259
260
4378
std::size_t String::overlap(const String &y) const {
261
4378
  std::size_t i = size() < y.size() ? size() : y.size();
262
7850
  for (; i > 0; i--) {
263
6310
    String s = suffix(i);
264
6310
    String p = y.prefix(i);
265
4574
    if (s == p) {
266
2838
      return i;
267
    }
268
  }
269
1540
  return i;
270
}
271
272
1897
std::size_t String::roverlap(const String &y) const {
273
1897
  std::size_t i = size() < y.size() ? size() : y.size();
274
4233
  for (; i > 0; i--) {
275
2768
    String s = prefix(i);
276
2768
    String p = y.suffix(i);
277
1600
    if (s == p) {
278
432
      return i;
279
    }
280
  }
281
1465
  return i;
282
}
283
284
2162089
std::string String::toString(bool useEscSequences) const {
285
4324178
  std::stringstream str;
286
4280038
  for (unsigned int i = 0; i < size(); ++i) {
287
    // we always print backslash as a code point so that it cannot be
288
    // interpreted as specifying part of a code point, e.g. the string '\' +
289
    // 'u' + '0' of length three.
290
2117949
    if (isPrintable(d_str[i]) && d_str[i] != '\\' && !useEscSequences)
291
    {
292
2070869
      str << static_cast<char>(d_str[i]);
293
    }
294
    else
295
    {
296
94160
      std::stringstream ss;
297
47080
      ss << std::hex << d_str[i];
298
47080
      str << "\\u{" << ss.str() << "}";
299
    }
300
  }
301
4324178
  return str.str();
302
}
303
304
12471
std::wstring String::toWString() const
305
{
306
12471
  std::wstring res(size(), static_cast<wchar_t>(0));
307
139202
  for (std::size_t i = 0; i < size(); ++i)
308
  {
309
126731
    res[i] = static_cast<wchar_t>(d_str[i]);
310
  }
311
12471
  return res;
312
}
313
314
81
bool String::isLeq(const String &y) const
315
{
316
130
  for (unsigned i = 0; i < size(); ++i)
317
  {
318
114
    if (i >= y.size())
319
    {
320
4
      return false;
321
    }
322
110
    unsigned ci = d_str[i];
323
110
    unsigned cyi = y.d_str[i];
324
110
    if (ci > cyi)
325
    {
326
42
      return false;
327
    }
328
68
    if (ci < cyi)
329
    {
330
19
      return true;
331
    }
332
  }
333
16
  return true;
334
}
335
336
20
bool String::isRepeated() const {
337
20
  if (size() > 1) {
338
20
    unsigned int f = d_str[0];
339
34
    for (unsigned i = 1; i < size(); ++i) {
340
20
      if (f != d_str[i]) return false;
341
    }
342
  }
343
14
  return true;
344
}
345
346
18
bool String::tailcmp(const String &y, int &c) const {
347
18
  int id_x = size() - 1;
348
18
  int id_y = y.size() - 1;
349
138
  while (id_x >= 0 && id_y >= 0) {
350
60
    if (d_str[id_x] != y.d_str[id_y]) {
351
      c = id_x;
352
      return false;
353
    }
354
60
    --id_x;
355
60
    --id_y;
356
  }
357
18
  c = id_x == -1 ? (-(id_y + 1)) : (id_x + 1);
358
18
  return true;
359
}
360
361
73907
std::size_t String::find(const String &y, const std::size_t start) const {
362
73907
  if (size() < y.size() + start) return std::string::npos;
363
70091
  if (y.empty()) return start;
364
69813
  if (empty()) return std::string::npos;
365
366
  std::vector<unsigned>::const_iterator itr = std::search(
367
69813
      d_str.begin() + start, d_str.end(), y.d_str.begin(), y.d_str.end());
368
69813
  if (itr != d_str.end()) {
369
65660
    return itr - d_str.begin();
370
  }
371
4153
  return std::string::npos;
372
}
373
374
9121
std::size_t String::rfind(const String &y, const std::size_t start) const {
375
9121
  if (size() < y.size() + start) return std::string::npos;
376
6464
  if (y.empty()) return start;
377
6462
  if (empty()) return std::string::npos;
378
379
  std::vector<unsigned>::const_reverse_iterator itr = std::search(
380
6462
      d_str.rbegin() + start, d_str.rend(), y.d_str.rbegin(), y.d_str.rend());
381
6462
  if (itr != d_str.rend()) {
382
5810
    return itr - d_str.rbegin();
383
  }
384
652
  return std::string::npos;
385
}
386
387
4441
bool String::hasPrefix(const String& y) const
388
{
389
4441
  size_t s = size();
390
4441
  size_t ys = y.size();
391
4441
  if (ys > s)
392
  {
393
2
    return false;
394
  }
395
13661
  for (size_t i = 0; i < ys; i++)
396
  {
397
9292
    if (d_str[i] != y.d_str[i])
398
    {
399
70
      return false;
400
    }
401
  }
402
4369
  return true;
403
}
404
405
6986
bool String::hasSuffix(const String& y) const
406
{
407
6986
  size_t s = size();
408
6986
  size_t ys = y.size();
409
6986
  if (ys > s)
410
  {
411
2
    return false;
412
  }
413
6984
  size_t idiff = s - ys;
414
16297
  for (size_t i = 0; i < ys; i++)
415
  {
416
9363
    if (d_str[i + idiff] != y.d_str[i])
417
    {
418
50
      return false;
419
    }
420
  }
421
6934
  return true;
422
}
423
424
93
String String::update(std::size_t i, const String& t) const
425
{
426
93
  if (i < size())
427
  {
428
186
    std::vector<unsigned> vec(d_str.begin(), d_str.begin() + i);
429
93
    size_t remNum = size() - i;
430
93
    size_t tnum = t.d_str.size();
431
93
    if (tnum >= remNum)
432
    {
433
21
      vec.insert(vec.end(), t.d_str.begin(), t.d_str.begin() + remNum);
434
    }
435
    else
436
    {
437
72
      vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
438
72
      vec.insert(vec.end(), d_str.begin() + i + tnum, d_str.end());
439
    }
440
93
    return String(vec);
441
  }
442
  return *this;
443
}
444
445
345
String String::replace(const String &s, const String &t) const {
446
345
  std::size_t ret = find(s);
447
345
  if (ret != std::string::npos) {
448
250
    std::vector<unsigned> vec;
449
125
    vec.insert(vec.begin(), d_str.begin(), d_str.begin() + ret);
450
125
    vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
451
125
    vec.insert(vec.end(), d_str.begin() + ret + s.size(), d_str.end());
452
125
    return String(vec);
453
  } else {
454
220
    return *this;
455
  }
456
}
457
458
13859
String String::substr(std::size_t i) const {
459
13859
  Assert(i <= size());
460
27718
  std::vector<unsigned> ret_vec;
461
13859
  std::vector<unsigned>::const_iterator itr = d_str.begin() + i;
462
13859
  ret_vec.insert(ret_vec.end(), itr, d_str.end());
463
27718
  return String(ret_vec);
464
}
465
466
368589
String String::substr(std::size_t i, std::size_t j) const {
467
368589
  Assert(i + j <= size());
468
737178
  std::vector<unsigned> ret_vec;
469
368589
  std::vector<unsigned>::const_iterator itr = d_str.begin() + i;
470
368589
  ret_vec.insert(ret_vec.end(), itr, itr + j);
471
737178
  return String(ret_vec);
472
}
473
474
413
bool String::noOverlapWith(const String& y) const
475
{
476
413
  return y.find(*this) == std::string::npos
477
152
         && this->find(y) == std::string::npos && this->overlap(y) == 0
478
524
         && y.overlap(*this) == 0;
479
}
480
481
224
bool String::isNumber() const {
482
224
  if (d_str.empty()) {
483
20
    return false;
484
  }
485
464
  for (unsigned character : d_str) {
486
349
    if (!isDigit(character))
487
    {
488
89
      return false;
489
    }
490
  }
491
115
  return true;
492
}
493
494
835
bool String::isDigit(unsigned character)
495
{
496
  // '0' to '9'
497
835
  return 48 <= character && character <= 57;
498
}
499
500
410
bool String::isHexDigit(unsigned character)
501
{
502
  // '0' to '9' or 'A' to 'F' or 'a' to 'f'
503
508
  return isDigit(character) || (65 <= character && character <= 70)
504
494
         || (97 <= character && character <= 102);
505
}
506
507
2117949
bool String::isPrintable(unsigned character)
508
{
509
  // Unicode 0x00020 (' ') to 0x0007E ('~')
510
2117949
  return 32 <= character && character <= 126;
511
}
512
513
12034
size_t String::maxSize() { return std::numeric_limits<uint32_t>::max(); }
514
515
91
Rational String::toNumber() const
516
{
517
  // when smt2 standard for strings is set, this may change, based on the
518
  // semantics of str.from.int for leading zeros
519
91
  return Rational(toString());
520
}
521
522
std::ostream &operator<<(std::ostream &os, const String &s) {
523
  return os << "\"" << s.toString() << "\"";
524
}
525
526
28194
}  // namespace cvc5