GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/util/string.cpp Lines: 254 265 95.8 %
Date: 2021-09-29 Branches: 283 486 58.2 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Andrew Reynolds, Tim King, Tianyi Liang
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Implementation of the string data type.
14
 */
15
16
#include "util/string.h"
17
18
#include <algorithm>
19
#include <climits>
20
#include <iomanip>
21
#include <iostream>
22
#include <sstream>
23
24
#include "base/check.h"
25
#include "base/exception.h"
26
27
using namespace std;
28
29
namespace cvc5 {
30
31
static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
32
33
7223
String::String(const std::wstring& s)
34
{
35
7223
  d_str.resize(s.size());
36
71364
  for (size_t i = 0, n = s.size(); i < n; ++i)
37
  {
38
64141
    d_str[i] = static_cast<unsigned>(s[i]);
39
  }
40
7223
}
41
42
2305405
String::String(const std::vector<unsigned> &s) : d_str(s)
43
{
44
#ifdef CVC5_ASSERTIONS
45
6561765
  for (unsigned u : d_str)
46
  {
47
4256360
    Assert(u < num_codes());
48
  }
49
#endif
50
2305405
}
51
52
2284147
int String::cmp(const String &y) const {
53
2284147
  if (size() != y.size()) {
54
18195
    return size() < y.size() ? -1 : 1;
55
  }
56
3990733
  for (unsigned int i = 0; i < size(); ++i) {
57
1731605
    if (d_str[i] != y.d_str[i]) {
58
6824
      unsigned cp = d_str[i];
59
6824
      unsigned cpy = y.d_str[i];
60
6824
      return cp < cpy ? -1 : 1;
61
    }
62
  }
63
2259128
  return 0;
64
}
65
66
24437
String String::concat(const String &other) const {
67
48874
  std::vector<unsigned int> ret_vec(d_str);
68
24437
  ret_vec.insert(ret_vec.end(), other.d_str.begin(), other.d_str.end());
69
48874
  return String(ret_vec);
70
}
71
72
30250
bool String::strncmp(const String& y, std::size_t n) const
73
{
74
30250
  std::size_t b = (size() >= y.size()) ? size() : y.size();
75
30250
  std::size_t s = (size() <= y.size()) ? size() : y.size();
76
30250
  if (n > s) {
77
    if (b == s) {
78
      n = s;
79
    } else {
80
      return false;
81
    }
82
  }
83
115836
  for (std::size_t i = 0; i < n; ++i) {
84
89882
    if (d_str[i] != y.d_str[i]) return false;
85
  }
86
25954
  return true;
87
}
88
89
23450
bool String::rstrncmp(const String& y, std::size_t n) const
90
{
91
23450
  std::size_t b = (size() >= y.size()) ? size() : y.size();
92
23450
  std::size_t s = (size() <= y.size()) ? size() : y.size();
93
23450
  if (n > s) {
94
    if (b == s) {
95
      n = s;
96
    } else {
97
      return false;
98
    }
99
  }
100
62238
  for (std::size_t i = 0; i < n; ++i) {
101
42723
    if (d_str[size() - i - 1] != y.d_str[y.size() - i - 1]) return false;
102
  }
103
19515
  return true;
104
}
105
106
19978
void String::addCharToInternal(unsigned char ch, std::vector<unsigned>& str)
107
{
108
  // if not a printable character
109
19978
  if (ch > 127 || ch < 32)
110
  {
111
2
    std::stringstream serr;
112
    serr << "Illegal string character: \"" << ch
113
1
         << "\", must use escape sequence";
114
1
    throw cvc5::Exception(serr.str());
115
  }
116
  else
117
  {
118
19977
    str.push_back(static_cast<unsigned>(ch));
119
  }
120
19977
}
121
122
18279
std::vector<unsigned> String::toInternal(const std::string& s,
123
                                         bool useEscSequences)
124
{
125
18279
  std::vector<unsigned> str;
126
18279
  unsigned i = 0;
127
53005
  while (i < s.size())
128
  {
129
    // get the current character
130
17364
    char si = s[i];
131
34049
    if (si != '\\' || !useEscSequences)
132
    {
133
16686
      addCharToInternal(si, str);
134
16685
      ++i;
135
16685
      continue;
136
    }
137
    // the vector of characters, in case we fail to read an escape sequence
138
1356
    std::vector<unsigned> nonEscCache;
139
    // process the '\'
140
678
    addCharToInternal(si, nonEscCache);
141
678
    ++i;
142
    // are we an escape sequence?
143
678
    bool isEscapeSequence = true;
144
    // the string corresponding to the hexadecimal code point
145
1356
    std::stringstream hexString;
146
    // is the slash followed by a 'u'? Could be last character.
147
678
    if (i >= s.size() || s[i] != 'u')
148
    {
149
148
      isEscapeSequence = false;
150
    }
151
    else
152
    {
153
      // process the 'u'
154
530
      addCharToInternal(s[i], nonEscCache);
155
530
      ++i;
156
530
      bool isStart = true;
157
530
      bool isEnd = false;
158
530
      bool hasBrace = false;
159
3674
      while (i < s.size())
160
      {
161
        // add the next character
162
2090
        si = s[i];
163
2090
        if (isStart)
164
        {
165
526
          isStart = false;
166
          // possibly read '{'
167
994
          if (si == '{')
168
          {
169
468
            hasBrace = true;
170
468
            addCharToInternal(si, nonEscCache);
171
468
            ++i;
172
468
            continue;
173
          }
174
        }
175
1564
        else if (si == '}')
176
        {
177
          // can only end if we had an open brace and read at least one digit
178
456
          isEscapeSequence = hasBrace && !hexString.str().empty();
179
456
          isEnd = true;
180
456
          addCharToInternal(si, nonEscCache);
181
456
          ++i;
182
456
          break;
183
        }
184
        // must be a hex digit at this point
185
1166
        if (!isHexDigit(static_cast<unsigned>(si)))
186
        {
187
6
          isEscapeSequence = false;
188
6
          break;
189
        }
190
1160
        hexString << si;
191
1160
        addCharToInternal(si, nonEscCache);
192
1160
        ++i;
193
1160
        if (!hasBrace && hexString.str().size() == 4)
194
        {
195
          // will be finished reading \ u d_3 d_2 d_1 d_0 with no parens
196
54
          isEnd = true;
197
54
          break;
198
        }
199
1106
        else if (hasBrace && hexString.str().size() > 5)
200
        {
201
          // too many digits enclosed in brace, not an escape sequence
202
2
          isEscapeSequence = false;
203
2
          break;
204
        }
205
      }
206
530
      if (!isEnd)
207
      {
208
        // if we were interrupted before ending, then this is not a valid
209
        // escape sequence
210
20
        isEscapeSequence = false;
211
      }
212
    }
213
678
    if (isEscapeSequence)
214
    {
215
508
      Assert(!hexString.str().empty() && hexString.str().size() <= 5);
216
      // Otherwise, we add the escaped character.
217
      // This is guaranteed not to overflow due to the length of hstr.
218
      uint32_t val;
219
508
      hexString >> std::hex >> val;
220
508
      if (val > num_codes())
221
      {
222
        // Failed due to being out of range. This can happen for strings of
223
        // the form \ u { d_4 d_3 d_2 d_1 d_0 } where d_4 is a hexadecimal not
224
        // in the range [0-2].
225
2
        isEscapeSequence = false;
226
      }
227
      else
228
      {
229
506
        str.push_back(val);
230
      }
231
    }
232
    // if we did not successfully parse an escape sequence, we add back all
233
    // characters that we cached
234
678
    if (!isEscapeSequence)
235
    {
236
172
      str.insert(str.end(), nonEscCache.begin(), nonEscCache.end());
237
    }
238
  }
239
#ifdef CVC5_ASSERTIONS
240
35671
  for (unsigned u : str)
241
  {
242
17393
    Assert(u < num_codes());
243
  }
244
#endif
245
18278
  return str;
246
}
247
248
71556
unsigned String::front() const
249
{
250
71556
  Assert(!d_str.empty());
251
71556
  return d_str.front();
252
}
253
254
35609
unsigned String::back() const
255
{
256
35609
  Assert(!d_str.empty());
257
35609
  return d_str.back();
258
}
259
260
1978
std::size_t String::overlap(const String &y) const {
261
1978
  std::size_t i = size() < y.size() ? size() : y.size();
262
5326
  for (; i > 0; i--) {
263
3823
    String s = suffix(i);
264
3823
    String p = y.prefix(i);
265
2149
    if (s == p) {
266
475
      return i;
267
    }
268
  }
269
1503
  return i;
270
}
271
272
1777
std::size_t String::roverlap(const String &y) const {
273
1777
  std::size_t i = size() < y.size() ? size() : y.size();
274
4047
  for (; i > 0; i--) {
275
2602
    String s = prefix(i);
276
2602
    String p = y.suffix(i);
277
1467
    if (s == p) {
278
332
      return i;
279
    }
280
  }
281
1445
  return i;
282
}
283
284
2344735
std::string String::toString(bool useEscSequences) const {
285
4689470
  std::stringstream str;
286
4561358
  for (unsigned int i = 0; i < size(); ++i) {
287
    // we always print backslash as a code point so that it cannot be
288
    // interpreted as specifying part of a code point, e.g. the string '\' +
289
    // 'u' + '0' of length three.
290
2216623
    if (isPrintable(d_str[i]) && d_str[i] != '\\' && !useEscSequences)
291
    {
292
2180146
      str << static_cast<char>(d_str[i]);
293
    }
294
    else
295
    {
296
72954
      std::stringstream ss;
297
36477
      ss << std::hex << d_str[i];
298
36477
      str << "\\u{" << ss.str() << "}";
299
    }
300
  }
301
4689470
  return str.str();
302
}
303
304
6767
std::wstring String::toWString() const
305
{
306
6767
  std::wstring res(size(), static_cast<wchar_t>(0));
307
68704
  for (std::size_t i = 0; i < size(); ++i)
308
  {
309
61937
    res[i] = static_cast<wchar_t>(d_str[i]);
310
  }
311
6767
  return res;
312
}
313
314
49
bool String::isLeq(const String &y) const
315
{
316
114
  for (unsigned i = 0; i < size(); ++i)
317
  {
318
101
    if (i >= y.size())
319
    {
320
5
      return false;
321
    }
322
96
    unsigned ci = d_str[i];
323
96
    unsigned cyi = y.d_str[i];
324
96
    if (ci > cyi)
325
    {
326
13
      return false;
327
    }
328
83
    if (ci < cyi)
329
    {
330
18
      return true;
331
    }
332
  }
333
13
  return true;
334
}
335
336
20
bool String::isRepeated() const {
337
20
  if (size() > 1) {
338
20
    unsigned int f = d_str[0];
339
34
    for (unsigned i = 1; i < size(); ++i) {
340
20
      if (f != d_str[i]) return false;
341
    }
342
  }
343
14
  return true;
344
}
345
346
18
bool String::tailcmp(const String &y, int &c) const {
347
18
  int id_x = size() - 1;
348
18
  int id_y = y.size() - 1;
349
138
  while (id_x >= 0 && id_y >= 0) {
350
60
    if (d_str[id_x] != y.d_str[id_y]) {
351
      c = id_x;
352
      return false;
353
    }
354
60
    --id_x;
355
60
    --id_y;
356
  }
357
18
  c = id_x == -1 ? (-(id_y + 1)) : (id_x + 1);
358
18
  return true;
359
}
360
361
70404
std::size_t String::find(const String &y, const std::size_t start) const {
362
70404
  if (size() < y.size() + start) return std::string::npos;
363
67280
  if (y.empty()) return start;
364
67033
  if (empty()) return std::string::npos;
365
366
  std::vector<unsigned>::const_iterator itr = std::search(
367
67033
      d_str.begin() + start, d_str.end(), y.d_str.begin(), y.d_str.end());
368
67033
  if (itr != d_str.end()) {
369
63265
    return itr - d_str.begin();
370
  }
371
3768
  return std::string::npos;
372
}
373
374
5971
std::size_t String::rfind(const String &y, const std::size_t start) const {
375
5971
  if (size() < y.size() + start) return std::string::npos;
376
5086
  if (y.empty()) return start;
377
5084
  if (empty()) return std::string::npos;
378
379
  std::vector<unsigned>::const_reverse_iterator itr = std::search(
380
5084
      d_str.rbegin() + start, d_str.rend(), y.d_str.rbegin(), y.d_str.rend());
381
5084
  if (itr != d_str.rend()) {
382
4430
    return itr - d_str.rbegin();
383
  }
384
654
  return std::string::npos;
385
}
386
387
6498
bool String::hasPrefix(const String& y) const
388
{
389
6498
  size_t s = size();
390
6498
  size_t ys = y.size();
391
6498
  if (ys > s)
392
  {
393
2
    return false;
394
  }
395
17264
  for (size_t i = 0; i < ys; i++)
396
  {
397
10889
    if (d_str[i] != y.d_str[i])
398
    {
399
121
      return false;
400
    }
401
  }
402
6375
  return true;
403
}
404
405
7510
bool String::hasSuffix(const String& y) const
406
{
407
7510
  size_t s = size();
408
7510
  size_t ys = y.size();
409
7510
  if (ys > s)
410
  {
411
2
    return false;
412
  }
413
7508
  size_t idiff = s - ys;
414
25285
  for (size_t i = 0; i < ys; i++)
415
  {
416
17847
    if (d_str[i + idiff] != y.d_str[i])
417
    {
418
70
      return false;
419
    }
420
  }
421
7438
  return true;
422
}
423
424
89
String String::update(std::size_t i, const String& t) const
425
{
426
89
  if (i < size())
427
  {
428
178
    std::vector<unsigned> vec(d_str.begin(), d_str.begin() + i);
429
89
    size_t remNum = size() - i;
430
89
    size_t tnum = t.d_str.size();
431
89
    if (tnum >= remNum)
432
    {
433
22
      vec.insert(vec.end(), t.d_str.begin(), t.d_str.begin() + remNum);
434
    }
435
    else
436
    {
437
67
      vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
438
67
      vec.insert(vec.end(), d_str.begin() + i + tnum, d_str.end());
439
    }
440
89
    return String(vec);
441
  }
442
  return *this;
443
}
444
445
264
String String::replace(const String &s, const String &t) const {
446
264
  std::size_t ret = find(s);
447
264
  if (ret != std::string::npos) {
448
188
    std::vector<unsigned> vec;
449
94
    vec.insert(vec.begin(), d_str.begin(), d_str.begin() + ret);
450
94
    vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
451
94
    vec.insert(vec.end(), d_str.begin() + ret + s.size(), d_str.end());
452
94
    return String(vec);
453
  } else {
454
170
    return *this;
455
  }
456
}
457
458
21135
String String::substr(std::size_t i) const {
459
21135
  Assert(i <= size());
460
42270
  std::vector<unsigned> ret_vec;
461
21135
  std::vector<unsigned>::const_iterator itr = d_str.begin() + i;
462
21135
  ret_vec.insert(ret_vec.end(), itr, d_str.end());
463
42270
  return String(ret_vec);
464
}
465
466
388055
String String::substr(std::size_t i, std::size_t j) const {
467
388055
  Assert(i + j <= size());
468
776110
  std::vector<unsigned> ret_vec;
469
388055
  std::vector<unsigned>::const_iterator itr = d_str.begin() + i;
470
388055
  ret_vec.insert(ret_vec.end(), itr, itr + j);
471
776110
  return String(ret_vec);
472
}
473
474
140
bool String::noOverlapWith(const String& y) const
475
{
476
140
  return y.find(*this) == std::string::npos
477
103
         && this->find(y) == std::string::npos && this->overlap(y) == 0
478
202
         && y.overlap(*this) == 0;
479
}
480
481
624
bool String::isNumber() const {
482
624
  if (d_str.empty()) {
483
18
    return false;
484
  }
485
1419
  for (unsigned character : d_str) {
486
1042
    if (!isDigit(character))
487
    {
488
229
      return false;
489
    }
490
  }
491
377
  return true;
492
}
493
494
2230
bool String::isDigit(unsigned character)
495
{
496
  // '0' to '9'
497
2230
  return 48 <= character && character <= 57;
498
}
499
500
1166
bool String::isHexDigit(unsigned character)
501
{
502
  // '0' to '9' or 'A' to 'F' or 'a' to 'f'
503
1396
  return isDigit(character) || (65 <= character && character <= 70)
504
1382
         || (97 <= character && character <= 102);
505
}
506
507
2216623
bool String::isPrintable(unsigned character)
508
{
509
  // Unicode 0x00020 (' ') to 0x0007E ('~')
510
2216623
  return 32 <= character && character <= 126;
511
}
512
513
13413
size_t String::maxSize() { return std::numeric_limits<uint32_t>::max(); }
514
515
99
Rational String::toNumber() const
516
{
517
  // when smt2 standard for strings is set, this may change, based on the
518
  // semantics of str.from.int for leading zeros
519
99
  return Rational(toString());
520
}
521
522
std::ostream &operator<<(std::ostream &os, const String &s) {
523
  return os << "\"" << s.toString() << "\"";
524
}
525
526
22746
}  // namespace cvc5