GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/parser/antlr_input.cpp Lines: 205 215 95.3 %
Date: 2021-09-15 Branches: 236 466 50.6 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Christopher L. Conway, Kshitij Bansal, Tim King
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * A super-class for ANTLR-generated input language parsers.
14
 */
15
16
#include "parser/antlr_input.h"
17
18
#include <antlr3.h>
19
#include <limits>
20
21
#include "base/check.h"
22
#include "base/output.h"
23
#include "parser/antlr_line_buffered_input.h"
24
#include "parser/bounded_token_buffer.h"
25
#include "parser/bounded_token_factory.h"
26
#include "parser/cvc/cvc_input.h"
27
#include "parser/input.h"
28
#include "parser/memory_mapped_input_buffer.h"
29
#include "parser/parser.h"
30
#include "parser/parser_exception.h"
31
#include "parser/smt2/smt2_input.h"
32
#include "parser/smt2/sygus_input.h"
33
#include "parser/tptp/tptp_input.h"
34
35
using namespace std;
36
using namespace cvc5;
37
using namespace cvc5::parser;
38
using namespace cvc5::kind;
39
40
namespace cvc5 {
41
namespace parser {
42
43
// These functions exactly wrap the antlr3 source inconsistencies.
44
// These are the only location CVC5_ANTLR3_OLD_INPUT_STREAM ifdefs appear.
45
// No other sanity checking happens;
46
pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
47
                                             const std::string& name,
48
                                             LineBuffer* line_buffer);
49
pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name);
50
pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
51
                                            uint32_t size,
52
                                            const std::string& name);
53
54
4
pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
55
                                             const std::string& name,
56
                                             LineBuffer* line_buffer) {
57
4
  pANTLR3_INPUT_STREAM inputStream = NULL;
58
4
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
59
60
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
61
  inputStream =
62
      antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer);
63
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
64
4
  inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT,
65
                                            name_duplicate, line_buffer);
66
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
67
68
4
  free(name_duplicate);
69
4
  return inputStream;
70
}
71
72
6177
pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){
73
6177
  pANTLR3_INPUT_STREAM input = NULL;
74
6177
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
75
76
  // libantlr3c v3.2 isn't source-compatible with v3.4
77
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
78
  input = antlr3AsciiFileStreamNew(name_duplicate);
79
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
80
6177
  input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT);
81
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
82
83
6177
  free(name_duplicate);
84
6177
  return input;
85
}
86
87
88
287
pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
89
                                            uint32_t size,
90
                                            const std::string& name){
91
287
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
92
287
  pANTLR3_INPUT_STREAM inputStream = NULL;
93
  /* Create an ANTLR input backed by the buffer. */
94
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
95
  inputStream =
96
    antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate);
97
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
98
287
  inputStream =
99
    antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size,
100
                          name_duplicate);
101
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
102
287
  free(name_duplicate);
103
287
  return inputStream;
104
}
105
106
6468
AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
107
                                   bool fileIsTemporary,
108
                                   pANTLR3_UINT8 inputString,
109
6468
                                   LineBuffer* line_buffer)
110
    : InputStream(name, fileIsTemporary),
111
      d_input(input),
112
      d_inputString(inputString),
113
6468
      d_line_buffer(line_buffer) {
114
6468
  Assert(input != NULL);
115
6468
  input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
116
6468
}
117
118
19404
AntlrInputStream::~AntlrInputStream() {
119
6468
  d_input->free(d_input);
120
6468
  if(d_inputString != NULL){
121
287
    free(d_inputString);
122
  }
123
6468
  if (d_line_buffer != NULL) {
124
4
    delete d_line_buffer;
125
  }
126
12936
}
127
128
12936
pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
129
12936
  return d_input;
130
}
131
132
133
134
AntlrInputStream*
135
6177
AntlrInputStream::newFileInputStream(const std::string& name,
136
                                     bool useMmap)
137
{
138
#ifdef _WIN32
139
  if(useMmap) {
140
    useMmap = false;
141
  }
142
#endif
143
6177
  pANTLR3_INPUT_STREAM input = NULL;
144
6177
  if(useMmap) {
145
    input = MemoryMappedInputBufferNew(name);
146
  } else {
147
6177
    input = newAntlr3FileStream(name);
148
  }
149
6177
  if(input == NULL) {
150
    throw InputStreamException("Couldn't open file: " + name);
151
  }
152
6177
  return new AntlrInputStream(name, input, false, NULL, NULL);
153
}
154
155
4
AntlrInputStream* AntlrInputStream::newStreamInputStream(
156
    std::istream& input, const std::string& name)
157
{
158
4
  pANTLR3_INPUT_STREAM inputStream = NULL;
159
4
  pANTLR3_UINT8 inputStringCopy = NULL;
160
4
  LineBuffer* line_buffer = new LineBuffer(&input);
161
4
  inputStream = newAntlr3BufferedStream(input, name, line_buffer);
162
  return new AntlrInputStream(name, inputStream, false, inputStringCopy,
163
4
                              line_buffer);
164
}
165
166
167
AntlrInputStream*
168
287
AntlrInputStream::newStringInputStream(const std::string& input,
169
                                       const std::string& name)
170
{
171
287
  size_t input_size = input.size();
172
287
  Assert(input_size <= std::numeric_limits<uint32_t>::max());
173
174
  // Ownership of input_duplicate  is transferred to the AntlrInputStream.
175
287
  pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str());
176
177
287
  if( input_duplicate == NULL ) {
178
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
179
  }
180
181
287
  pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name);
182
287
  if( inputStream==NULL ) {
183
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
184
  }
185
287
  return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL);
186
}
187
188
6468
AntlrInput* AntlrInput::newInput(const std::string& lang,
189
                                 AntlrInputStream& inputStream)
190
{
191
6468
  if (lang == "LANG_CVC")
192
  {
193
937
    return new CvcInput(inputStream);
194
  }
195
5531
  else if (lang == "LANG_SYGUS_V2")
196
  {
197
198
    return new SygusInput(inputStream);
198
  }
199
5333
  else if (lang == "LANG_TPTP")
200
  {
201
41
    return new TptpInput(inputStream);
202
  }
203
5292
  else if (lang == "LANG_SMTLIB_V2_6")
204
  {
205
5292
    return new Smt2Input(inputStream);
206
  }
207
  else
208
  {
209
    throw InputStreamException(
210
        "unable to detect input file format, try --lang ");
211
  }
212
}
213
214
6468
AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
215
    Input(inputStream),
216
    d_lookahead(lookahead),
217
    d_lexer(NULL),
218
    d_parser(NULL),
219
6468
    d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
220
12936
    d_tokenBuffer(NULL) {
221
6468
}
222
223
/*
224
AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
225
  Parser(exprManager,name),
226
  d_lookahead(lookahead) {
227
228
}
229
*/
230
231
/*
232
AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
233
  Input(exprManager,name),
234
  d_lookahead(lookahead),
235
  d_lexer(NULL),
236
  d_parser(NULL),
237
  d_tokenStream(NULL) {
238
239
  char* inputStr = strdup(input.c_str());
240
  char* nameStr = strdup(name.c_str());
241
  if( inputStr==NULL || nameStr==NULL ) {
242
    throw ParserException("Couldn't initialize string input: '" + input + "'");
243
  }
244
  d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
245
  if( d_inputStream == NULL ) {
246
    throw ParserException("Couldn't create input stream for string: '" + input + "'");
247
  }
248
249
}
250
*/
251
252
12936
AntlrInput::~AntlrInput() {
253
6468
  BoundedTokenBufferFree(d_tokenBuffer);
254
6468
}
255
256
6468
pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
257
6468
  return d_tokenBuffer->commonTstream;
258
}
259
260
12
void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
261
12
  pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
262
12
  Assert(lexer != NULL);
263
12
  Parser *parser = (Parser*)(lexer->super);
264
12
  Assert(parser != NULL);
265
12
  AntlrInput *input = (AntlrInput*) parser->getInput();
266
12
  Assert(input != NULL);
267
268
  /* Call the error display routine *if* there's not already a
269
   * parse error pending.  If a parser error is pending, this
270
   * error is probably less important, so we just drop it. */
271
12
  if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
272
24
    input->parseError("Error finding next token.");
273
  }
274
}
275
276
287
void AntlrInput::warning(const std::string& message) {
277
287
  Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
278
287
}
279
280
281
282
/**
283
 * characters considered part of a simple symbol in SMTLIB.
284
 *
285
 * TODO: Ideally this code shouldn't be smtlib specific (should work
286
 * with CVC language too), but this per-language specialization has
287
 * been left for a later point.
288
 */
289
6524
inline bool isSimpleChar(char ch) {
290
6524
  return isalnum(ch) || (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL);
291
}
292
293
159
size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) {
294
159
  size_t st = 0;
295
159
  size_t N = input.size();
296
1677
  while(st < N) {
297
1759
    while( st < N && (*isWordChar)(input[st])  == false ) st++;
298
819
    size_t en = st;
299
4063
    while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++;
300
819
    if(en - st + 1 == pattern.size()) {
301
146
      bool match = true;
302
393
      for(size_t i = 0; match && i < pattern.size(); ++i) {
303
247
        match &= (pattern[i] == input[st+i]);
304
      }
305
146
      if(match == true) {
306
60
        return st;
307
      }
308
    }
309
759
    st = en + 1;
310
  }
311
99
  return string::npos;
312
}
313
314
/**
315
 * Gets part of original input and tries to visually hint where the
316
 * error might be.
317
 *
318
 * Something like:
319
 *
320
 *   ...nd (= alpha beta) (= beta delta))
321
 *                                ^
322
 *
323
 * Implementation (as on 2014/04/24):
324
 *
325
 * > if suggested pointer by lexer is under a "simple char", move to
326
 *   start of the word and print pointer there.
327
 *
328
 * > in the other case, it tries to find the nearest word in the error
329
 *   message passed along. if it can't find it, we don't add this
330
 *   visual hint, as experimentally position suggested by lexer was
331
 *   found to be totally unhelpful. (TODO: fix this upstream to
332
 *   improve)
333
 */
334
121
std::string parseErrorHelper(const char* lineStart,
335
                             std::size_t lineLength,
336
                             std::size_t charPositionInLine,
337
                             const std::string& message)
338
{
339
  // Is it a multi-line message
340
121
  bool multilineMessage = (message.find('\n') != string::npos);
341
  // Useful only if it is a multi-line message
342
121
  int firstLineEnd = message.find('\n');
343
344
242
  std::ostringstream ss, slicess;
345
346
  // Keep first line of message
347
121
  if(multilineMessage) {
348
6
    ss << message.substr(0, firstLineEnd) << endl << endl;
349
  } else {
350
115
    ss << message << endl << endl;
351
  }
352
353
121
  std::size_t posSliceStart =
354
121
      (charPositionInLine <= 50) ? 0 : charPositionInLine - 50 + 5;
355
121
  std::size_t posSliceEnd = posSliceStart + 70;
356
121
  std::size_t caretPos = 0;
357
121
  std::size_t caretPosExtra = 0;  // for inital intendation, epilipses etc.
358
359
121
  ss << "  "; caretPosExtra += 2;
360
121
  if(posSliceStart > 0) {
361
11
    ss << "..."; caretPosExtra += 3;
362
  }
363
364
2483
  for (std::size_t i = posSliceStart; i < lineLength && lineStart[i] != '\n';
365
       ++i)
366
  {
367
2366
    if(i == posSliceEnd) {
368
4
      ss << "...";
369
4
      break;
370
    }
371
2362
    if(i < charPositionInLine) { caretPos++; }
372
373
2362
    if(!isprint(lineStart[i])) {
374
      // non-printable character, something wrong, bail out
375
      return message;
376
    }
377
378
2362
    ss << (lineStart[i]);
379
2362
    slicess << (lineStart[i]);
380
  }
381
382
  // adjust position of caret, based on slice and message
383
  {
384
121
    int caretPosOrig = caretPos;
385
222
    string slice = slicess.str();
386
121
    if(isSimpleChar(slice[caretPos])) {
387
      // if alphanumeric, try to go to beginning of word/number
388
187
      while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; }
389
57
      if(caretPos == 0 && posSliceStart > 0) {
390
        // reached start and this is not really the start? bail out
391
        return message;
392
      } else {
393
        // likely it is also in original message? if so, very likely
394
        // we found the right place
395
114
        string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1));
396
57
        size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
397
57
        Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
398
57
        if( matchLoc != string::npos ) {
399
16
          Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl;
400
        }
401
      }
402
    } else {
403
64
      bool foundCaretPos = false;
404
405
170
      for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) {
406
        // go to nearest alphanumeric string (before current position),
407
        // see if that word can be found in original message. If so,
408
        // point to that, else keep pointer where it was.
409
106
        int nearestWordEn = caretPos - 1;
410
234
        while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) {
411
64
          --nearestWordEn;
412
        }
413
106
        if(isSimpleChar(slice[nearestWordEn])) {
414
102
          int nearestWordSt = nearestWordEn;
415
396
          while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) {
416
147
            --nearestWordSt;
417
          }
418
204
          string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1));
419
102
          size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
420
102
          Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl;
421
102
          Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
422
102
          if( matchLoc != string::npos ) {
423
88
            Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at "
424
44
                                    << nearestWordSt << std::endl;
425
44
            foundCaretPos = true;
426
          }
427
102
          caretPos = nearestWordSt;
428
        }
429
      }
430
64
      if( !foundCaretPos) {
431
        // this doesn't look good. caret generally getting printed
432
        // at unhelpful positions. improve upstream?
433
20
        return message;
434
      }
435
    }
436
101
    caretPos += caretPosExtra;
437
  }// end of caret position computation/heuristics
438
439
101
  ss << std::endl;
440
2637
  while( caretPos-- > 0 ) {
441
1268
    ss << ' ';
442
  }
443
101
  ss << '^' << endl;
444
101
  if(multilineMessage) {
445
6
     ss << message.substr(firstLineEnd, message.size() - firstLineEnd);;
446
  }
447
101
  return ss.str();
448
}
449
450
121
void AntlrInput::parseError(const std::string& message, bool eofException)
451
{
452
242
  auto lineLength = d_antlr3InputStream->sizeBuf
453
121
                    - (static_cast<char*>(d_antlr3InputStream->currentLine)
454
121
                       - static_cast<char*>(d_antlr3InputStream->data));
455
  std::string updatedMessage = parseErrorHelper(
456
121
      (const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream),
457
      lineLength,
458
121
      d_lexer->getCharPositionInLine(d_lexer),
459
363
      message);
460
461
242
  Debug("parser") << "Throwing exception: "
462
242
      << (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
463
242
      << d_lexer->getLine(d_lexer) << "."
464
242
      << d_lexer->getCharPositionInLine(d_lexer) << ": "
465
121
      << updatedMessage << endl;
466
121
  if(eofException) {
467
    throw ParserEndOfFileException(message,
468
6
                                   (const char*)d_lexer->rec->state->tokSource->fileName->chars,
469
6
                                   d_lexer->getLine(d_lexer),
470
18
                                   d_lexer->getCharPositionInLine(d_lexer));
471
  } else {
472
    throw ParserException(updatedMessage,
473
115
                          (const char*)d_lexer->rec->state->tokSource->fileName->chars,
474
115
                          d_lexer->getLine(d_lexer),
475
345
                          d_lexer->getCharPositionInLine(d_lexer));
476
  }
477
}
478
479
480
6468
void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
481
6468
  d_lexer = pLexer;
482
483
6468
  pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
484
6468
  if( pTokenFactory != NULL ) {
485
6468
    pTokenFactory->close(pTokenFactory);
486
  }
487
488
  /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
489
6468
  pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
490
6468
  if( pTokenFactory == NULL ) {
491
    throw InputStreamException("Couldn't create token factory.");
492
  }
493
6468
  d_lexer->rec->state->tokFactory = pTokenFactory;
494
495
6468
  pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
496
6468
  if( buffer == NULL ) {
497
    throw InputStreamException("Couldn't create token buffer.");
498
  }
499
500
6468
  d_tokenBuffer = buffer;
501
502
  // Override default lexer error reporting
503
6468
  d_lexer->rec->reportError = &lexerError;
504
  // Override default nextToken function, just to prevent exceptions escaping.
505
6468
  d_lexer->rec->state->tokSource->nextToken = &nextToken;
506
6468
}
507
508
6468
void AntlrInput::setParser(Parser& parser) {
509
  // ANTLR isn't using super in the lexer or the parser, AFAICT.
510
  // We could also use @lexer/parser::context to add a field to the generated
511
  // objects, but then it would have to be declared separately in every
512
  // language's grammar and we'd have to in the address of the field anyway.
513
6468
  d_lexer->super = &parser;
514
6468
  d_parser->super = &parser;
515
6468
}
516
517
6468
void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
518
6468
  d_parser = pParser;
519
//  d_parser->rec->match = &match;
520
6468
  d_parser->rec->reportError = &reportError;
521
  /* Don't try to recover from a parse error. */
522
  // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
523
12936
  d_parser->rec->recoverFromMismatchedToken =
524
      (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
525
12936
      d_parser->rec->mismatch;
526
6468
}
527
528
}  // namespace parser
529
29562
}  // namespace cvc5