GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/util/string.cpp Lines: 254 265 95.8 %
Date: 2021-08-17 Branches: 283 486 58.2 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Tim King, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of the string data type.
14
 */
15
16
#include "util/string.h"
17
18
#include <algorithm>
19
#include <climits>
20
#include <iomanip>
21
#include <iostream>
22
#include <sstream>
23
24
#include "base/check.h"
25
#include "base/exception.h"
26
27
using namespace std;
28
29
namespace cvc5 {
30
31
static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
32
33
13134
String::String(const std::wstring& s)
34
{
35
13134
  d_str.resize(s.size());
36
137654
  for (size_t i = 0, n = s.size(); i < n; ++i)
37
  {
38
124520
    d_str[i] = static_cast<unsigned>(s[i]);
39
  }
40
13134
}
41
42
2498023
String::String(const std::vector<unsigned> &s) : d_str(s)
43
{
44
#ifdef CVC5_ASSERTIONS
45
6765781
  for (unsigned u : d_str)
46
  {
47
4267758
    Assert(u < num_codes());
48
  }
49
#endif
50
2498023
}
51
52
2496757
int String::cmp(const String &y) const {
53
2496757
  if (size() != y.size()) {
54
18220
    return size() < y.size() ? -1 : 1;
55
  }
56
4343025
  for (unsigned int i = 0; i < size(); ++i) {
57
1871583
    if (d_str[i] != y.d_str[i]) {
58
7095
      unsigned cp = d_str[i];
59
7095
      unsigned cpy = y.d_str[i];
60
7095
      return cp < cpy ? -1 : 1;
61
    }
62
  }
63
2471442
  return 0;
64
}
65
66
24447
String String::concat(const String &other) const {
67
48894
  std::vector<unsigned int> ret_vec(d_str);
68
24447
  ret_vec.insert(ret_vec.end(), other.d_str.begin(), other.d_str.end());
69
48894
  return String(ret_vec);
70
}
71
72
22227
bool String::strncmp(const String& y, std::size_t n) const
73
{
74
22227
  std::size_t b = (size() >= y.size()) ? size() : y.size();
75
22227
  std::size_t s = (size() <= y.size()) ? size() : y.size();
76
22227
  if (n > s) {
77
    if (b == s) {
78
      n = s;
79
    } else {
80
      return false;
81
    }
82
  }
83
96353
  for (std::size_t i = 0; i < n; ++i) {
84
78341
    if (d_str[i] != y.d_str[i]) return false;
85
  }
86
18012
  return true;
87
}
88
89
25494
bool String::rstrncmp(const String& y, std::size_t n) const
90
{
91
25494
  std::size_t b = (size() >= y.size()) ? size() : y.size();
92
25494
  std::size_t s = (size() <= y.size()) ? size() : y.size();
93
25494
  if (n > s) {
94
    if (b == s) {
95
      n = s;
96
    } else {
97
      return false;
98
    }
99
  }
100
64358
  for (std::size_t i = 0; i < n; ++i) {
101
42837
    if (d_str[size() - i - 1] != y.d_str[y.size() - i - 1]) return false;
102
  }
103
21521
  return true;
104
}
105
106
20637
void String::addCharToInternal(unsigned char ch, std::vector<unsigned>& str)
107
{
108
  // if not a printable character
109
20637
  if (ch > 127 || ch < 32)
110
  {
111
2
    std::stringstream serr;
112
    serr << "Illegal string character: \"" << ch
113
1
         << "\", must use escape sequence";
114
1
    throw cvc5::Exception(serr.str());
115
  }
116
  else
117
  {
118
20636
    str.push_back(static_cast<unsigned>(ch));
119
  }
120
20636
}
121
122
24088
std::vector<unsigned> String::toInternal(const std::string& s,
123
                                         bool useEscSequences)
124
{
125
24088
  std::vector<unsigned> str;
126
24088
  unsigned i = 0;
127
60132
  while (i < s.size())
128
  {
129
    // get the current character
130
18023
    char si = s[i];
131
35343
    if (si != '\\' || !useEscSequences)
132
    {
133
17321
      addCharToInternal(si, str);
134
17320
      ++i;
135
17320
      continue;
136
    }
137
    // the vector of characters, in case we fail to read an escape sequence
138
1404
    std::vector<unsigned> nonEscCache;
139
    // process the '\'
140
702
    addCharToInternal(si, nonEscCache);
141
702
    ++i;
142
    // are we an escape sequence?
143
702
    bool isEscapeSequence = true;
144
    // the string corresponding to the hexadecimal code point
145
1404
    std::stringstream hexString;
146
    // is the slash followed by a 'u'? Could be last character.
147
702
    if (i >= s.size() || s[i] != 'u')
148
    {
149
172
      isEscapeSequence = false;
150
    }
151
    else
152
    {
153
      // process the 'u'
154
530
      addCharToInternal(s[i], nonEscCache);
155
530
      ++i;
156
530
      bool isStart = true;
157
530
      bool isEnd = false;
158
530
      bool hasBrace = false;
159
3674
      while (i < s.size())
160
      {
161
        // add the next character
162
2090
        si = s[i];
163
2090
        if (isStart)
164
        {
165
526
          isStart = false;
166
          // possibly read '{'
167
994
          if (si == '{')
168
          {
169
468
            hasBrace = true;
170
468
            addCharToInternal(si, nonEscCache);
171
468
            ++i;
172
468
            continue;
173
          }
174
        }
175
1564
        else if (si == '}')
176
        {
177
          // can only end if we had an open brace and read at least one digit
178
456
          isEscapeSequence = hasBrace && !hexString.str().empty();
179
456
          isEnd = true;
180
456
          addCharToInternal(si, nonEscCache);
181
456
          ++i;
182
456
          break;
183
        }
184
        // must be a hex digit at this point
185
1166
        if (!isHexDigit(static_cast<unsigned>(si)))
186
        {
187
6
          isEscapeSequence = false;
188
6
          break;
189
        }
190
1160
        hexString << si;
191
1160
        addCharToInternal(si, nonEscCache);
192
1160
        ++i;
193
1160
        if (!hasBrace && hexString.str().size() == 4)
194
        {
195
          // will be finished reading \ u d_3 d_2 d_1 d_0 with no parens
196
54
          isEnd = true;
197
54
          break;
198
        }
199
1106
        else if (hasBrace && hexString.str().size() > 5)
200
        {
201
          // too many digits enclosed in brace, not an escape sequence
202
2
          isEscapeSequence = false;
203
2
          break;
204
        }
205
      }
206
530
      if (!isEnd)
207
      {
208
        // if we were interrupted before ending, then this is not a valid
209
        // escape sequence
210
20
        isEscapeSequence = false;
211
      }
212
    }
213
702
    if (isEscapeSequence)
214
    {
215
508
      Assert(!hexString.str().empty() && hexString.str().size() <= 5);
216
      // Otherwise, we add the escaped character.
217
      // This is guaranteed not to overflow due to the length of hstr.
218
      uint32_t val;
219
508
      hexString >> std::hex >> val;
220
508
      if (val > num_codes())
221
      {
222
        // Failed due to being out of range. This can happen for strings of
223
        // the form \ u { d_4 d_3 d_2 d_1 d_0 } where d_4 is a hexadecimal not
224
        // in the range [0-2].
225
2
        isEscapeSequence = false;
226
      }
227
      else
228
      {
229
506
        str.push_back(val);
230
      }
231
    }
232
    // if we did not successfully parse an escape sequence, we add back all
233
    // characters that we cached
234
702
    if (!isEscapeSequence)
235
    {
236
196
      str.insert(str.end(), nonEscCache.begin(), nonEscCache.end());
237
    }
238
  }
239
#ifdef CVC5_ASSERTIONS
240
42139
  for (unsigned u : str)
241
  {
242
18052
    Assert(u < num_codes());
243
  }
244
#endif
245
24087
  return str;
246
}
247
248
71621
unsigned String::front() const
249
{
250
71621
  Assert(!d_str.empty());
251
71621
  return d_str.front();
252
}
253
254
35832
unsigned String::back() const
255
{
256
35832
  Assert(!d_str.empty());
257
35832
  return d_str.back();
258
}
259
260
2319
std::size_t String::overlap(const String &y) const {
261
2319
  std::size_t i = size() < y.size() ? size() : y.size();
262
5895
  for (; i > 0; i--) {
263
4251
    String s = suffix(i);
264
4251
    String p = y.prefix(i);
265
2463
    if (s == p) {
266
675
      return i;
267
    }
268
  }
269
1644
  return i;
270
}
271
272
2177
std::size_t String::roverlap(const String &y) const {
273
2177
  std::size_t i = size() < y.size() ? size() : y.size();
274
4459
  for (; i > 0; i--) {
275
2911
    String s = prefix(i);
276
2911
    String p = y.suffix(i);
277
1770
    if (s == p) {
278
629
      return i;
279
    }
280
  }
281
1548
  return i;
282
}
283
284
2587546
std::string String::toString(bool useEscSequences) const {
285
5175092
  std::stringstream str;
286
5125201
  for (unsigned int i = 0; i < size(); ++i) {
287
    // we always print backslash as a code point so that it cannot be
288
    // interpreted as specifying part of a code point, e.g. the string '\' +
289
    // 'u' + '0' of length three.
290
2537655
    if (isPrintable(d_str[i]) && d_str[i] != '\\' && !useEscSequences)
291
    {
292
2491602
      str << static_cast<char>(d_str[i]);
293
    }
294
    else
295
    {
296
92106
      std::stringstream ss;
297
46053
      ss << std::hex << d_str[i];
298
46053
      str << "\\u{" << ss.str() << "}";
299
    }
300
  }
301
5175092
  return str.str();
302
}
303
304
12651
std::wstring String::toWString() const
305
{
306
12651
  std::wstring res(size(), static_cast<wchar_t>(0));
307
134934
  for (std::size_t i = 0; i < size(); ++i)
308
  {
309
122283
    res[i] = static_cast<wchar_t>(d_str[i]);
310
  }
311
12651
  return res;
312
}
313
314
77
bool String::isLeq(const String &y) const
315
{
316
140
  for (unsigned i = 0; i < size(); ++i)
317
  {
318
121
    if (i >= y.size())
319
    {
320
6
      return false;
321
    }
322
115
    unsigned ci = d_str[i];
323
115
    unsigned cyi = y.d_str[i];
324
115
    if (ci > cyi)
325
    {
326
29
      return false;
327
    }
328
86
    if (ci < cyi)
329
    {
330
23
      return true;
331
    }
332
  }
333
19
  return true;
334
}
335
336
20
bool String::isRepeated() const {
337
20
  if (size() > 1) {
338
20
    unsigned int f = d_str[0];
339
34
    for (unsigned i = 1; i < size(); ++i) {
340
20
      if (f != d_str[i]) return false;
341
    }
342
  }
343
14
  return true;
344
}
345
346
18
bool String::tailcmp(const String &y, int &c) const {
347
18
  int id_x = size() - 1;
348
18
  int id_y = y.size() - 1;
349
138
  while (id_x >= 0 && id_y >= 0) {
350
60
    if (d_str[id_x] != y.d_str[id_y]) {
351
      c = id_x;
352
      return false;
353
    }
354
60
    --id_x;
355
60
    --id_y;
356
  }
357
18
  c = id_x == -1 ? (-(id_y + 1)) : (id_x + 1);
358
18
  return true;
359
}
360
361
92304
std::size_t String::find(const String &y, const std::size_t start) const {
362
92304
  if (size() < y.size() + start) return std::string::npos;
363
88822
  if (y.empty()) return start;
364
88540
  if (empty()) return std::string::npos;
365
366
  std::vector<unsigned>::const_iterator itr = std::search(
367
88540
      d_str.begin() + start, d_str.end(), y.d_str.begin(), y.d_str.end());
368
88540
  if (itr != d_str.end()) {
369
84617
    return itr - d_str.begin();
370
  }
371
3923
  return std::string::npos;
372
}
373
374
6500
std::size_t String::rfind(const String &y, const std::size_t start) const {
375
6500
  if (size() < y.size() + start) return std::string::npos;
376
5269
  if (y.empty()) return start;
377
5267
  if (empty()) return std::string::npos;
378
379
  std::vector<unsigned>::const_reverse_iterator itr = std::search(
380
5267
      d_str.rbegin() + start, d_str.rend(), y.d_str.rbegin(), y.d_str.rend());
381
5267
  if (itr != d_str.rend()) {
382
4628
    return itr - d_str.rbegin();
383
  }
384
639
  return std::string::npos;
385
}
386
387
9530
bool String::hasPrefix(const String& y) const
388
{
389
9530
  size_t s = size();
390
9530
  size_t ys = y.size();
391
9530
  if (ys > s)
392
  {
393
2
    return false;
394
  }
395
25489
  for (size_t i = 0; i < ys; i++)
396
  {
397
16087
    if (d_str[i] != y.d_str[i])
398
    {
399
126
      return false;
400
    }
401
  }
402
9402
  return true;
403
}
404
405
11030
bool String::hasSuffix(const String& y) const
406
{
407
11030
  size_t s = size();
408
11030
  size_t ys = y.size();
409
11030
  if (ys > s)
410
  {
411
2
    return false;
412
  }
413
11028
  size_t idiff = s - ys;
414
32780
  for (size_t i = 0; i < ys; i++)
415
  {
416
21826
    if (d_str[i + idiff] != y.d_str[i])
417
    {
418
74
      return false;
419
    }
420
  }
421
10954
  return true;
422
}
423
424
92
String String::update(std::size_t i, const String& t) const
425
{
426
92
  if (i < size())
427
  {
428
184
    std::vector<unsigned> vec(d_str.begin(), d_str.begin() + i);
429
92
    size_t remNum = size() - i;
430
92
    size_t tnum = t.d_str.size();
431
92
    if (tnum >= remNum)
432
    {
433
22
      vec.insert(vec.end(), t.d_str.begin(), t.d_str.begin() + remNum);
434
    }
435
    else
436
    {
437
70
      vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
438
70
      vec.insert(vec.end(), d_str.begin() + i + tnum, d_str.end());
439
    }
440
92
    return String(vec);
441
  }
442
  return *this;
443
}
444
445
345
String String::replace(const String &s, const String &t) const {
446
345
  std::size_t ret = find(s);
447
345
  if (ret != std::string::npos) {
448
250
    std::vector<unsigned> vec;
449
125
    vec.insert(vec.begin(), d_str.begin(), d_str.begin() + ret);
450
125
    vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
451
125
    vec.insert(vec.end(), d_str.begin() + ret + s.size(), d_str.end());
452
125
    return String(vec);
453
  } else {
454
220
    return *this;
455
  }
456
}
457
458
13478
String String::substr(std::size_t i) const {
459
13478
  Assert(i <= size());
460
26956
  std::vector<unsigned> ret_vec;
461
13478
  std::vector<unsigned>::const_iterator itr = d_str.begin() + i;
462
13478
  ret_vec.insert(ret_vec.end(), itr, d_str.end());
463
26956
  return String(ret_vec);
464
}
465
466
392682
String String::substr(std::size_t i, std::size_t j) const {
467
392682
  Assert(i + j <= size());
468
785364
  std::vector<unsigned> ret_vec;
469
392682
  std::vector<unsigned>::const_iterator itr = d_str.begin() + i;
470
392682
  ret_vec.insert(ret_vec.end(), itr, itr + j);
471
785364
  return String(ret_vec);
472
}
473
474
201
bool String::noOverlapWith(const String& y) const
475
{
476
201
  return y.find(*this) == std::string::npos
477
123
         && this->find(y) == std::string::npos && this->overlap(y) == 0
478
274
         && y.overlap(*this) == 0;
479
}
480
481
422
bool String::isNumber() const {
482
422
  if (d_str.empty()) {
483
19
    return false;
484
  }
485
1125
  for (unsigned character : d_str) {
486
860
    if (!isDigit(character))
487
    {
488
138
      return false;
489
    }
490
  }
491
265
  return true;
492
}
493
494
2096
bool String::isDigit(unsigned character)
495
{
496
  // '0' to '9'
497
2096
  return 48 <= character && character <= 57;
498
}
499
500
1166
bool String::isHexDigit(unsigned character)
501
{
502
  // '0' to '9' or 'A' to 'F' or 'a' to 'f'
503
1396
  return isDigit(character) || (65 <= character && character <= 70)
504
1382
         || (97 <= character && character <= 102);
505
}
506
507
2537655
bool String::isPrintable(unsigned character)
508
{
509
  // Unicode 0x00020 (' ') to 0x0007E ('~')
510
2537655
  return 32 <= character && character <= 126;
511
}
512
513
12970
size_t String::maxSize() { return std::numeric_limits<uint32_t>::max(); }
514
515
85
Rational String::toNumber() const
516
{
517
  // when smt2 standard for strings is set, this may change, based on the
518
  // semantics of str.from.int for leading zeros
519
85
  return Rational(toString());
520
}
521
522
std::ostream &operator<<(std::ostream &os, const String &s) {
523
  return os << "\"" << s.toString() << "\"";
524
}
525
526
29337
}  // namespace cvc5