GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/parser/antlr_input.cpp Lines: 199 213 93.4 %
Date: 2021-09-29 Branches: 221 462 47.8 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Christopher L. Conway, Kshitij Bansal, Tim King
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * A super-class for ANTLR-generated input language parsers.
14
 */
15
16
#include "parser/antlr_input.h"
17
18
#include <antlr3.h>
19
#include <limits>
20
21
#include "base/check.h"
22
#include "base/output.h"
23
#include "parser/antlr_line_buffered_input.h"
24
#include "parser/bounded_token_buffer.h"
25
#include "parser/bounded_token_factory.h"
26
#include "parser/input.h"
27
#include "parser/memory_mapped_input_buffer.h"
28
#include "parser/parser.h"
29
#include "parser/parser_exception.h"
30
#include "parser/smt2/smt2_input.h"
31
#include "parser/smt2/sygus_input.h"
32
#include "parser/tptp/tptp_input.h"
33
34
using namespace std;
35
using namespace cvc5;
36
using namespace cvc5::parser;
37
using namespace cvc5::kind;
38
39
namespace cvc5 {
40
namespace parser {
41
42
// These functions exactly wrap the antlr3 source inconsistencies.
43
// These are the only location CVC5_ANTLR3_OLD_INPUT_STREAM ifdefs appear.
44
// No other sanity checking happens;
45
pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
46
                                             const std::string& name,
47
                                             LineBuffer* line_buffer);
48
pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name);
49
pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
50
                                            uint32_t size,
51
                                            const std::string& name);
52
53
4
pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
54
                                             const std::string& name,
55
                                             LineBuffer* line_buffer) {
56
4
  pANTLR3_INPUT_STREAM inputStream = NULL;
57
4
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
58
59
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
60
  inputStream =
61
      antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer);
62
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
63
4
  inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT,
64
                                            name_duplicate, line_buffer);
65
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
66
67
4
  free(name_duplicate);
68
4
  return inputStream;
69
}
70
71
3903
pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){
72
3903
  pANTLR3_INPUT_STREAM input = NULL;
73
3903
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
74
75
  // libantlr3c v3.2 isn't source-compatible with v3.4
76
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
77
  input = antlr3AsciiFileStreamNew(name_duplicate);
78
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
79
3903
  input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT);
80
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
81
82
3903
  free(name_duplicate);
83
3903
  return input;
84
}
85
86
87
163
pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
88
                                            uint32_t size,
89
                                            const std::string& name){
90
163
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
91
163
  pANTLR3_INPUT_STREAM inputStream = NULL;
92
  /* Create an ANTLR input backed by the buffer. */
93
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
94
  inputStream =
95
    antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate);
96
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
97
163
  inputStream =
98
    antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size,
99
                          name_duplicate);
100
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
101
163
  free(name_duplicate);
102
163
  return inputStream;
103
}
104
105
4070
AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
106
                                   bool fileIsTemporary,
107
                                   pANTLR3_UINT8 inputString,
108
4070
                                   LineBuffer* line_buffer)
109
    : InputStream(name, fileIsTemporary),
110
      d_input(input),
111
      d_inputString(inputString),
112
4070
      d_line_buffer(line_buffer) {
113
4070
  Assert(input != NULL);
114
4070
  input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
115
4070
}
116
117
12210
AntlrInputStream::~AntlrInputStream() {
118
4070
  d_input->free(d_input);
119
4070
  if(d_inputString != NULL){
120
163
    free(d_inputString);
121
  }
122
4070
  if (d_line_buffer != NULL) {
123
4
    delete d_line_buffer;
124
  }
125
8140
}
126
127
8140
pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
128
8140
  return d_input;
129
}
130
131
132
133
AntlrInputStream*
134
3903
AntlrInputStream::newFileInputStream(const std::string& name,
135
                                     bool useMmap)
136
{
137
#ifdef _WIN32
138
  if(useMmap) {
139
    useMmap = false;
140
  }
141
#endif
142
3903
  pANTLR3_INPUT_STREAM input = NULL;
143
3903
  if(useMmap) {
144
    input = MemoryMappedInputBufferNew(name);
145
  } else {
146
3903
    input = newAntlr3FileStream(name);
147
  }
148
3903
  if(input == NULL) {
149
    throw InputStreamException("Couldn't open file: " + name);
150
  }
151
3903
  return new AntlrInputStream(name, input, false, NULL, NULL);
152
}
153
154
4
AntlrInputStream* AntlrInputStream::newStreamInputStream(
155
    std::istream& input, const std::string& name)
156
{
157
4
  pANTLR3_INPUT_STREAM inputStream = NULL;
158
4
  pANTLR3_UINT8 inputStringCopy = NULL;
159
4
  LineBuffer* line_buffer = new LineBuffer(&input);
160
4
  inputStream = newAntlr3BufferedStream(input, name, line_buffer);
161
  return new AntlrInputStream(name, inputStream, false, inputStringCopy,
162
4
                              line_buffer);
163
}
164
165
166
AntlrInputStream*
167
163
AntlrInputStream::newStringInputStream(const std::string& input,
168
                                       const std::string& name)
169
{
170
163
  size_t input_size = input.size();
171
163
  Assert(input_size <= std::numeric_limits<uint32_t>::max());
172
173
  // Ownership of input_duplicate  is transferred to the AntlrInputStream.
174
163
  pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str());
175
176
163
  if( input_duplicate == NULL ) {
177
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
178
  }
179
180
163
  pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name);
181
163
  if( inputStream==NULL ) {
182
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
183
  }
184
163
  return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL);
185
}
186
187
4070
AntlrInput* AntlrInput::newInput(const std::string& lang,
188
                                 AntlrInputStream& inputStream)
189
{
190
4070
  if (lang == "LANG_SYGUS_V2")
191
  {
192
198
    return new SygusInput(inputStream);
193
  }
194
3872
  else if (lang == "LANG_TPTP")
195
  {
196
41
    return new TptpInput(inputStream);
197
  }
198
3831
  else if (lang == "LANG_SMTLIB_V2_6")
199
  {
200
3831
    return new Smt2Input(inputStream);
201
  }
202
  else
203
  {
204
    throw InputStreamException(
205
        "unable to detect input file format, try --lang ");
206
  }
207
}
208
209
4070
AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
210
    Input(inputStream),
211
    d_lookahead(lookahead),
212
    d_lexer(NULL),
213
    d_parser(NULL),
214
4070
    d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
215
8140
    d_tokenBuffer(NULL) {
216
4070
}
217
218
/*
219
AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
220
  Parser(exprManager,name),
221
  d_lookahead(lookahead) {
222
223
}
224
*/
225
226
/*
227
AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
228
  Input(exprManager,name),
229
  d_lookahead(lookahead),
230
  d_lexer(NULL),
231
  d_parser(NULL),
232
  d_tokenStream(NULL) {
233
234
  char* inputStr = strdup(input.c_str());
235
  char* nameStr = strdup(name.c_str());
236
  if( inputStr==NULL || nameStr==NULL ) {
237
    throw ParserException("Couldn't initialize string input: '" + input + "'");
238
  }
239
  d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
240
  if( d_inputStream == NULL ) {
241
    throw ParserException("Couldn't create input stream for string: '" + input + "'");
242
  }
243
244
}
245
*/
246
247
8140
AntlrInput::~AntlrInput() {
248
4070
  BoundedTokenBufferFree(d_tokenBuffer);
249
4070
}
250
251
4070
pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
252
4070
  return d_tokenBuffer->commonTstream;
253
}
254
255
12
void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
256
12
  pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
257
12
  Assert(lexer != NULL);
258
12
  Parser *parser = (Parser*)(lexer->super);
259
12
  Assert(parser != NULL);
260
12
  AntlrInput *input = (AntlrInput*) parser->getInput();
261
12
  Assert(input != NULL);
262
263
  /* Call the error display routine *if* there's not already a
264
   * parse error pending.  If a parser error is pending, this
265
   * error is probably less important, so we just drop it. */
266
12
  if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
267
24
    input->parseError("Error finding next token.");
268
  }
269
}
270
271
235
void AntlrInput::warning(const std::string& message) {
272
235
  Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
273
235
}
274
275
276
277
/**
278
 * characters considered part of a simple symbol in SMTLIB.
279
 *
280
 * TODO: Ideally this code shouldn't be smtlib specific (should work
281
 * with CVC language too), but this per-language specialization has
282
 * been left for a later point.
283
 */
284
4367
inline bool isSimpleChar(char ch) {
285
4367
  return isalnum(ch) || (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL);
286
}
287
288
104
size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) {
289
104
  size_t st = 0;
290
104
  size_t N = input.size();
291
1136
  while(st < N) {
292
1159
    while( st < N && (*isWordChar)(input[st])  == false ) st++;
293
555
    size_t en = st;
294
2745
    while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++;
295
555
    if(en - st + 1 == pattern.size()) {
296
93
      bool match = true;
297
258
      for(size_t i = 0; match && i < pattern.size(); ++i) {
298
165
        match &= (pattern[i] == input[st+i]);
299
      }
300
93
      if(match == true) {
301
39
        return st;
302
      }
303
    }
304
516
    st = en + 1;
305
  }
306
65
  return string::npos;
307
}
308
309
/**
310
 * Gets part of original input and tries to visually hint where the
311
 * error might be.
312
 *
313
 * Something like:
314
 *
315
 *   ...nd (= alpha beta) (= beta delta))
316
 *                                ^
317
 *
318
 * Implementation (as on 2014/04/24):
319
 *
320
 * > if suggested pointer by lexer is under a "simple char", move to
321
 *   start of the word and print pointer there.
322
 *
323
 * > in the other case, it tries to find the nearest word in the error
324
 *   message passed along. if it can't find it, we don't add this
325
 *   visual hint, as experimentally position suggested by lexer was
326
 *   found to be totally unhelpful. (TODO: fix this upstream to
327
 *   improve)
328
 */
329
76
std::string parseErrorHelper(const char* lineStart,
330
                             std::size_t lineLength,
331
                             std::size_t charPositionInLine,
332
                             const std::string& message)
333
{
334
  // Is it a multi-line message
335
76
  bool multilineMessage = (message.find('\n') != string::npos);
336
  // Useful only if it is a multi-line message
337
76
  int firstLineEnd = message.find('\n');
338
339
152
  std::ostringstream ss, slicess;
340
341
  // Keep first line of message
342
76
  if(multilineMessage) {
343
    ss << message.substr(0, firstLineEnd) << endl << endl;
344
  } else {
345
76
    ss << message << endl << endl;
346
  }
347
348
76
  std::size_t posSliceStart =
349
76
      (charPositionInLine <= 50) ? 0 : charPositionInLine - 50 + 5;
350
76
  std::size_t posSliceEnd = posSliceStart + 70;
351
76
  std::size_t caretPos = 0;
352
76
  std::size_t caretPosExtra = 0;  // for inital intendation, epilipses etc.
353
354
76
  ss << "  "; caretPosExtra += 2;
355
76
  if(posSliceStart > 0) {
356
5
    ss << "..."; caretPosExtra += 3;
357
  }
358
359
1493
  for (std::size_t i = posSliceStart; i < lineLength && lineStart[i] != '\n';
360
       ++i)
361
  {
362
1417
    if(i == posSliceEnd) {
363
      ss << "...";
364
      break;
365
    }
366
1417
    if(i < charPositionInLine) { caretPos++; }
367
368
1417
    if(!isprint(lineStart[i])) {
369
      // non-printable character, something wrong, bail out
370
      return message;
371
    }
372
373
1417
    ss << (lineStart[i]);
374
1417
    slicess << (lineStart[i]);
375
  }
376
377
  // adjust position of caret, based on slice and message
378
  {
379
76
    int caretPosOrig = caretPos;
380
136
    string slice = slicess.str();
381
76
    if(isSimpleChar(slice[caretPos])) {
382
      // if alphanumeric, try to go to beginning of word/number
383
93
      while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; }
384
31
      if(caretPos == 0 && posSliceStart > 0) {
385
        // reached start and this is not really the start? bail out
386
        return message;
387
      } else {
388
        // likely it is also in original message? if so, very likely
389
        // we found the right place
390
62
        string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1));
391
31
        size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
392
31
        Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
393
31
        if( matchLoc != string::npos ) {
394
10
          Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl;
395
        }
396
      }
397
    } else {
398
45
      bool foundCaretPos = false;
399
400
122
      for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) {
401
        // go to nearest alphanumeric string (before current position),
402
        // see if that word can be found in original message. If so,
403
        // point to that, else keep pointer where it was.
404
77
        int nearestWordEn = caretPos - 1;
405
165
        while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) {
406
44
          --nearestWordEn;
407
        }
408
77
        if(isSimpleChar(slice[nearestWordEn])) {
409
73
          int nearestWordSt = nearestWordEn;
410
281
          while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) {
411
104
            --nearestWordSt;
412
          }
413
146
          string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1));
414
73
          size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
415
73
          Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl;
416
73
          Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
417
73
          if( matchLoc != string::npos ) {
418
58
            Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at "
419
29
                                    << nearestWordSt << std::endl;
420
29
            foundCaretPos = true;
421
          }
422
73
          caretPos = nearestWordSt;
423
        }
424
      }
425
45
      if( !foundCaretPos) {
426
        // this doesn't look good. caret generally getting printed
427
        // at unhelpful positions. improve upstream?
428
16
        return message;
429
      }
430
    }
431
60
    caretPos += caretPosExtra;
432
  }// end of caret position computation/heuristics
433
434
60
  ss << std::endl;
435
1404
  while( caretPos-- > 0 ) {
436
672
    ss << ' ';
437
  }
438
60
  ss << '^' << endl;
439
60
  if(multilineMessage) {
440
     ss << message.substr(firstLineEnd, message.size() - firstLineEnd);;
441
  }
442
60
  return ss.str();
443
}
444
445
76
void AntlrInput::parseError(const std::string& message, bool eofException)
446
{
447
152
  auto lineLength = d_antlr3InputStream->sizeBuf
448
76
                    - (static_cast<char*>(d_antlr3InputStream->currentLine)
449
76
                       - static_cast<char*>(d_antlr3InputStream->data));
450
  std::string updatedMessage = parseErrorHelper(
451
76
      (const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream),
452
      lineLength,
453
76
      d_lexer->getCharPositionInLine(d_lexer),
454
228
      message);
455
456
152
  Debug("parser") << "Throwing exception: "
457
152
      << (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
458
152
      << d_lexer->getLine(d_lexer) << "."
459
152
      << d_lexer->getCharPositionInLine(d_lexer) << ": "
460
76
      << updatedMessage << endl;
461
76
  if(eofException) {
462
    throw ParserEndOfFileException(message,
463
2
                                   (const char*)d_lexer->rec->state->tokSource->fileName->chars,
464
2
                                   d_lexer->getLine(d_lexer),
465
6
                                   d_lexer->getCharPositionInLine(d_lexer));
466
  } else {
467
    throw ParserException(updatedMessage,
468
74
                          (const char*)d_lexer->rec->state->tokSource->fileName->chars,
469
74
                          d_lexer->getLine(d_lexer),
470
222
                          d_lexer->getCharPositionInLine(d_lexer));
471
  }
472
}
473
474
475
4070
void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
476
4070
  d_lexer = pLexer;
477
478
4070
  pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
479
4070
  if( pTokenFactory != NULL ) {
480
4070
    pTokenFactory->close(pTokenFactory);
481
  }
482
483
  /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
484
4070
  pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
485
4070
  if( pTokenFactory == NULL ) {
486
    throw InputStreamException("Couldn't create token factory.");
487
  }
488
4070
  d_lexer->rec->state->tokFactory = pTokenFactory;
489
490
4070
  pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
491
4070
  if( buffer == NULL ) {
492
    throw InputStreamException("Couldn't create token buffer.");
493
  }
494
495
4070
  d_tokenBuffer = buffer;
496
497
  // Override default lexer error reporting
498
4070
  d_lexer->rec->reportError = &lexerError;
499
  // Override default nextToken function, just to prevent exceptions escaping.
500
4070
  d_lexer->rec->state->tokSource->nextToken = &nextToken;
501
4070
}
502
503
4070
void AntlrInput::setParser(Parser& parser) {
504
  // ANTLR isn't using super in the lexer or the parser, AFAICT.
505
  // We could also use @lexer/parser::context to add a field to the generated
506
  // objects, but then it would have to be declared separately in every
507
  // language's grammar and we'd have to in the address of the field anyway.
508
4070
  d_lexer->super = &parser;
509
4070
  d_parser->super = &parser;
510
4070
}
511
512
4070
void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
513
4070
  d_parser = pParser;
514
//  d_parser->rec->match = &match;
515
4070
  d_parser->rec->reportError = &reportError;
516
  /* Don't try to recover from a parse error. */
517
  // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
518
8140
  d_parser->rec->recoverFromMismatchedToken =
519
      (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
520
8140
      d_parser->rec->mismatch;
521
4070
}
522
523
}  // namespace parser
524
22731
}  // namespace cvc5