GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/util/string.cpp Lines: 249 260 95.8 %
Date: 2021-03-22 Branches: 280 482 58.1 %

Line Exec Source
1
/*********************                                                        */
2
/*! \file string.cpp
3
 ** \verbatim
4
 ** Top contributors (to current version):
5
 **   Andrew Reynolds, Tim King, Tianyi Liang
6
 ** This file is part of the CVC4 project.
7
 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 ** in the top-level source directory and their institutional affiliations.
9
 ** All rights reserved.  See the file COPYING in the top-level source
10
 ** directory for licensing information.\endverbatim
11
 **
12
 ** \brief Implementation of the string data type.
13
 **/
14
15
#include "util/string.h"
16
17
#include <algorithm>
18
#include <climits>
19
#include <iomanip>
20
#include <iostream>
21
#include <sstream>
22
23
#include "base/check.h"
24
#include "base/exception.h"
25
26
using namespace std;
27
28
namespace CVC4 {
29
30
static_assert(UCHAR_MAX == 255, "Unsigned char is assumed to have 256 values.");
31
32
2403959
String::String(const std::vector<unsigned> &s) : d_str(s)
33
{
34
#ifdef CVC4_ASSERTIONS
35
7699857
  for (unsigned u : d_str)
36
  {
37
5295898
    Assert(u < num_codes());
38
  }
39
#endif
40
2403959
}
41
42
2329568
int String::cmp(const String &y) const {
43
2329568
  if (size() != y.size()) {
44
16127
    return size() < y.size() ? -1 : 1;
45
  }
46
4400131
  for (unsigned int i = 0; i < size(); ++i) {
47
2092504
    if (d_str[i] != y.d_str[i]) {
48
5814
      unsigned cp = d_str[i];
49
5814
      unsigned cpy = y.d_str[i];
50
5814
      return cp < cpy ? -1 : 1;
51
    }
52
  }
53
2307627
  return 0;
54
}
55
56
24822
String String::concat(const String &other) const {
57
49644
  std::vector<unsigned int> ret_vec(d_str);
58
24822
  ret_vec.insert(ret_vec.end(), other.d_str.begin(), other.d_str.end());
59
49644
  return String(ret_vec);
60
}
61
62
35570
bool String::strncmp(const String& y, std::size_t n) const
63
{
64
35570
  std::size_t b = (size() >= y.size()) ? size() : y.size();
65
35570
  std::size_t s = (size() <= y.size()) ? size() : y.size();
66
35570
  if (n > s) {
67
    if (b == s) {
68
      n = s;
69
    } else {
70
      return false;
71
    }
72
  }
73
156825
  for (std::size_t i = 0; i < n; ++i) {
74
127277
    if (d_str[i] != y.d_str[i]) return false;
75
  }
76
29548
  return true;
77
}
78
79
35430
bool String::rstrncmp(const String& y, std::size_t n) const
80
{
81
35430
  std::size_t b = (size() >= y.size()) ? size() : y.size();
82
35430
  std::size_t s = (size() <= y.size()) ? size() : y.size();
83
35430
  if (n > s) {
84
    if (b == s) {
85
      n = s;
86
    } else {
87
      return false;
88
    }
89
  }
90
100554
  for (std::size_t i = 0; i < n; ++i) {
91
71954
    if (d_str[size() - i - 1] != y.d_str[y.size() - i - 1]) return false;
92
  }
93
28600
  return true;
94
}
95
96
16971
void String::addCharToInternal(unsigned char ch, std::vector<unsigned>& str)
97
{
98
  // if not a printable character
99
16971
  if (ch > 127 || ch < 32)
100
  {
101
2
    std::stringstream serr;
102
    serr << "Illegal string character: \"" << ch
103
1
         << "\", must use escape sequence";
104
1
    throw CVC4::Exception(serr.str());
105
  }
106
  else
107
  {
108
16970
    str.push_back(static_cast<unsigned>(ch));
109
  }
110
16970
}
111
112
21235
std::vector<unsigned> String::toInternal(const std::string& s,
113
                                         bool useEscSequences)
114
{
115
21235
  std::vector<unsigned> str;
116
21235
  unsigned i = 0;
117
53867
  while (i < s.size())
118
  {
119
    // get the current character
120
16317
    char si = s[i];
121
32291
    if (si != '\\' || !useEscSequences)
122
    {
123
15975
      addCharToInternal(si, str);
124
15974
      ++i;
125
15974
      continue;
126
    }
127
    // the vector of characters, in case we fail to read an escape sequence
128
684
    std::vector<unsigned> nonEscCache;
129
    // process the '\'
130
342
    addCharToInternal(si, nonEscCache);
131
342
    ++i;
132
    // are we an escape sequence?
133
342
    bool isEscapeSequence = true;
134
    // the string corresponding to the hexadecimal code point
135
684
    std::stringstream hexString;
136
    // is the slash followed by a 'u'? Could be last character.
137
342
    if (i >= s.size() || s[i] != 'u')
138
    {
139
206
      isEscapeSequence = false;
140
    }
141
    else
142
    {
143
      // process the 'u'
144
136
      addCharToInternal(s[i], nonEscCache);
145
136
      ++i;
146
136
      bool isStart = true;
147
136
      bool isEnd = false;
148
136
      bool hasBrace = false;
149
928
      while (i < s.size())
150
      {
151
        // add the next character
152
524
        si = s[i];
153
524
        if (isStart)
154
        {
155
132
          isStart = false;
156
          // possibly read '{'
157
206
          if (si == '{')
158
          {
159
74
            hasBrace = true;
160
74
            addCharToInternal(si, nonEscCache);
161
74
            ++i;
162
74
            continue;
163
          }
164
        }
165
392
        else if (si == '}')
166
        {
167
          // can only end if we had an open brace and read at least one digit
168
66
          isEscapeSequence = hasBrace && !hexString.str().empty();
169
66
          isEnd = true;
170
66
          addCharToInternal(si, nonEscCache);
171
66
          ++i;
172
66
          break;
173
        }
174
        // must be a hex digit at this point
175
384
        if (!isHexDigit(static_cast<unsigned>(si)))
176
        {
177
6
          isEscapeSequence = false;
178
6
          break;
179
        }
180
378
        hexString << si;
181
378
        addCharToInternal(si, nonEscCache);
182
378
        ++i;
183
378
        if (!hasBrace && hexString.str().size() == 4)
184
        {
185
          // will be finished reading \ u d_3 d_2 d_1 d_0 with no parens
186
54
          isEnd = true;
187
54
          break;
188
        }
189
324
        else if (hasBrace && hexString.str().size() > 5)
190
        {
191
          // too many digits enclosed in brace, not an escape sequence
192
2
          isEscapeSequence = false;
193
2
          break;
194
        }
195
      }
196
136
      if (!isEnd)
197
      {
198
        // if we were interrupted before ending, then this is not a valid
199
        // escape sequence
200
16
        isEscapeSequence = false;
201
      }
202
    }
203
342
    if (isEscapeSequence)
204
    {
205
118
      Assert(!hexString.str().empty() && hexString.str().size() <= 5);
206
      // Otherwise, we add the escaped character.
207
      // This is guaranteed not to overflow due to the length of hstr.
208
      uint32_t val;
209
118
      hexString >> std::hex >> val;
210
118
      if (val > num_codes())
211
      {
212
        // Failed due to being out of range. This can happen for strings of
213
        // the form \ u { d_4 d_3 d_2 d_1 d_0 } where d_4 is a hexadecimal not
214
        // in the range [0-2].
215
2
        isEscapeSequence = false;
216
      }
217
      else
218
      {
219
116
        str.push_back(val);
220
      }
221
    }
222
    // if we did not successfully parse an escape sequence, we add back all
223
    // characters that we cached
224
342
    if (!isEscapeSequence)
225
    {
226
226
      str.insert(str.end(), nonEscCache.begin(), nonEscCache.end());
227
    }
228
  }
229
#ifdef CVC4_ASSERTIONS
230
37570
  for (unsigned u : str)
231
  {
232
16336
    Assert(u < num_codes());
233
  }
234
#endif
235
21234
  return str;
236
}
237
238
87926
unsigned String::front() const
239
{
240
87926
  Assert(!d_str.empty());
241
87926
  return d_str.front();
242
}
243
244
43672
unsigned String::back() const
245
{
246
43672
  Assert(!d_str.empty());
247
43672
  return d_str.back();
248
}
249
250
4679
std::size_t String::overlap(const String &y) const {
251
4679
  std::size_t i = size() < y.size() ? size() : y.size();
252
8173
  for (; i > 0; i--) {
253
6621
    String s = suffix(i);
254
6621
    String p = y.prefix(i);
255
4874
    if (s == p) {
256
3127
      return i;
257
    }
258
  }
259
1552
  return i;
260
}
261
262
1979
std::size_t String::roverlap(const String &y) const {
263
1979
  std::size_t i = size() < y.size() ? size() : y.size();
264
4467
  for (; i > 0; i--) {
265
2976
    String s = prefix(i);
266
2976
    String p = y.suffix(i);
267
1732
    if (s == p) {
268
488
      return i;
269
    }
270
  }
271
1491
  return i;
272
}
273
274
2413077
std::string String::toString(bool useEscSequences) const {
275
4826154
  std::stringstream str;
276
5149100
  for (unsigned int i = 0; i < size(); ++i) {
277
    // we always print backslash as a code point so that it cannot be
278
    // interpreted as specifying part of a code point, e.g. the string '\' +
279
    // 'u' + '0' of length three.
280
2736023
    if (isPrintable(d_str[i]) && d_str[i] != '\\' && !useEscSequences)
281
    {
282
2689128
      str << static_cast<char>(d_str[i]);
283
    }
284
    else
285
    {
286
93790
      std::stringstream ss;
287
46895
      ss << std::hex << d_str[i];
288
46895
      str << "\\u{" << ss.str() << "}";
289
    }
290
  }
291
4826154
  return str.str();
292
}
293
294
12083
std::wstring String::toWString() const
295
{
296
12083
  std::wstring res(size(), static_cast<wchar_t>(0));
297
135066
  for (std::size_t i = 0; i < size(); ++i)
298
  {
299
122983
    res[i] = static_cast<wchar_t>(d_str[i]);
300
  }
301
12083
  return res;
302
}
303
304
90
bool String::isLeq(const String &y) const
305
{
306
140
  for (unsigned i = 0; i < size(); ++i)
307
  {
308
129
    if (i >= y.size())
309
    {
310
5
      return false;
311
    }
312
124
    unsigned ci = d_str[i];
313
124
    unsigned cyi = y.d_str[i];
314
124
    if (ci > cyi)
315
    {
316
56
      return false;
317
    }
318
68
    if (ci < cyi)
319
    {
320
18
      return true;
321
    }
322
  }
323
11
  return true;
324
}
325
326
20
bool String::isRepeated() const {
327
20
  if (size() > 1) {
328
20
    unsigned int f = d_str[0];
329
34
    for (unsigned i = 1; i < size(); ++i) {
330
20
      if (f != d_str[i]) return false;
331
    }
332
  }
333
14
  return true;
334
}
335
336
18
bool String::tailcmp(const String &y, int &c) const {
337
18
  int id_x = size() - 1;
338
18
  int id_y = y.size() - 1;
339
138
  while (id_x >= 0 && id_y >= 0) {
340
60
    if (d_str[id_x] != y.d_str[id_y]) {
341
      c = id_x;
342
      return false;
343
    }
344
60
    --id_x;
345
60
    --id_y;
346
  }
347
18
  c = id_x == -1 ? (-(id_y + 1)) : (id_x + 1);
348
18
  return true;
349
}
350
351
92098
std::size_t String::find(const String &y, const std::size_t start) const {
352
92098
  if (size() < y.size() + start) return std::string::npos;
353
87312
  if (y.empty()) return start;
354
86941
  if (empty()) return std::string::npos;
355
356
  std::vector<unsigned>::const_iterator itr = std::search(
357
86941
      d_str.begin() + start, d_str.end(), y.d_str.begin(), y.d_str.end());
358
86941
  if (itr != d_str.end()) {
359
80789
    return itr - d_str.begin();
360
  }
361
6152
  return std::string::npos;
362
}
363
364
10483
std::size_t String::rfind(const String &y, const std::size_t start) const {
365
10483
  if (size() < y.size() + start) return std::string::npos;
366
7657
  if (y.empty()) return start;
367
7655
  if (empty()) return std::string::npos;
368
369
  std::vector<unsigned>::const_reverse_iterator itr = std::search(
370
7655
      d_str.rbegin() + start, d_str.rend(), y.d_str.rbegin(), y.d_str.rend());
371
7655
  if (itr != d_str.rend()) {
372
6992
    return itr - d_str.rbegin();
373
  }
374
663
  return std::string::npos;
375
}
376
377
5581
bool String::hasPrefix(const String& y) const
378
{
379
5581
  size_t s = size();
380
5581
  size_t ys = y.size();
381
5581
  if (ys > s)
382
  {
383
2
    return false;
384
  }
385
15949
  for (size_t i = 0; i < ys; i++)
386
  {
387
10460
    if (d_str[i] != y.d_str[i])
388
    {
389
90
      return false;
390
    }
391
  }
392
5489
  return true;
393
}
394
395
9371
bool String::hasSuffix(const String& y) const
396
{
397
9371
  size_t s = size();
398
9371
  size_t ys = y.size();
399
9371
  if (ys > s)
400
  {
401
2
    return false;
402
  }
403
9369
  size_t idiff = s - ys;
404
21192
  for (size_t i = 0; i < ys; i++)
405
  {
406
11881
    if (d_str[i + idiff] != y.d_str[i])
407
    {
408
58
      return false;
409
    }
410
  }
411
9311
  return true;
412
}
413
414
93
String String::update(std::size_t i, const String& t) const
415
{
416
93
  if (i < size())
417
  {
418
186
    std::vector<unsigned> vec(d_str.begin(), d_str.begin() + i);
419
93
    size_t remNum = size() - i;
420
93
    size_t tnum = t.d_str.size();
421
93
    if (tnum >= remNum)
422
    {
423
21
      vec.insert(vec.end(), t.d_str.begin(), t.d_str.begin() + remNum);
424
    }
425
    else
426
    {
427
72
      vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
428
72
      vec.insert(vec.end(), d_str.begin() + i + tnum, d_str.end());
429
    }
430
93
    return String(vec);
431
  }
432
  return *this;
433
}
434
435
345
String String::replace(const String &s, const String &t) const {
436
345
  std::size_t ret = find(s);
437
345
  if (ret != std::string::npos) {
438
250
    std::vector<unsigned> vec;
439
125
    vec.insert(vec.begin(), d_str.begin(), d_str.begin() + ret);
440
125
    vec.insert(vec.end(), t.d_str.begin(), t.d_str.end());
441
125
    vec.insert(vec.end(), d_str.begin() + ret + s.size(), d_str.end());
442
125
    return String(vec);
443
  } else {
444
220
    return *this;
445
  }
446
}
447
448
16527
String String::substr(std::size_t i) const {
449
16527
  Assert(i <= size());
450
33054
  std::vector<unsigned> ret_vec;
451
16527
  std::vector<unsigned>::const_iterator itr = d_str.begin() + i;
452
16527
  ret_vec.insert(ret_vec.end(), itr, d_str.end());
453
33054
  return String(ret_vec);
454
}
455
456
477204
String String::substr(std::size_t i, std::size_t j) const {
457
477204
  Assert(i + j <= size());
458
954408
  std::vector<unsigned> ret_vec;
459
477204
  std::vector<unsigned>::const_iterator itr = d_str.begin() + i;
460
477204
  ret_vec.insert(ret_vec.end(), itr, itr + j);
461
954408
  return String(ret_vec);
462
}
463
464
363
bool String::noOverlapWith(const String& y) const
465
{
466
363
  return y.find(*this) == std::string::npos
467
146
         && this->find(y) == std::string::npos && this->overlap(y) == 0
468
472
         && y.overlap(*this) == 0;
469
}
470
471
221
bool String::isNumber() const {
472
221
  if (d_str.empty()) {
473
20
    return false;
474
  }
475
461
  for (unsigned character : d_str) {
476
346
    if (!isDigit(character))
477
    {
478
86
      return false;
479
    }
480
  }
481
115
  return true;
482
}
483
484
806
bool String::isDigit(unsigned character)
485
{
486
  // '0' to '9'
487
806
  return 48 <= character && character <= 57;
488
}
489
490
384
bool String::isHexDigit(unsigned character)
491
{
492
  // '0' to '9' or 'A' to 'F' or 'a' to 'f'
493
476
  return isDigit(character) || (65 <= character && character <= 70)
494
462
         || (97 <= character && character <= 102);
495
}
496
497
2736023
bool String::isPrintable(unsigned character)
498
{
499
  // Unicode 0x00020 (' ') to 0x0007E ('~')
500
2736023
  return 32 <= character && character <= 126;
501
}
502
503
16212
size_t String::maxSize() { return std::numeric_limits<uint32_t>::max(); }
504
505
91
Rational String::toNumber() const
506
{
507
  // when smt2 standard for strings is set, this may change, based on the
508
  // semantics of str.from.int for leading zeros
509
91
  return Rational(toString());
510
}
511
512
std::ostream &operator<<(std::ostream &os, const String &s) {
513
  return os << "\"" << s.toString() << "\"";
514
}
515
516
26676
}  // namespace CVC4