GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/parser/antlr_input.cpp Lines: 209 221 94.6 %
Date: 2021-08-14 Branches: 234 468 50.0 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Christopher L. Conway, Kshitij Bansal, Tim King
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * A super-class for ANTLR-generated input language parsers.
14
 */
15
16
#include "parser/antlr_input.h"
17
18
#include <antlr3.h>
19
#include <limits>
20
21
#include "base/check.h"
22
#include "base/output.h"
23
#include "parser/antlr_line_buffered_input.h"
24
#include "parser/bounded_token_buffer.h"
25
#include "parser/bounded_token_factory.h"
26
#include "parser/cvc/cvc_input.h"
27
#include "parser/input.h"
28
#include "parser/memory_mapped_input_buffer.h"
29
#include "parser/parser.h"
30
#include "parser/parser_exception.h"
31
#include "parser/smt2/smt2_input.h"
32
#include "parser/smt2/sygus_input.h"
33
#include "parser/tptp/tptp_input.h"
34
35
using namespace std;
36
using namespace cvc5;
37
using namespace cvc5::parser;
38
using namespace cvc5::kind;
39
40
namespace cvc5 {
41
namespace parser {
42
43
// These functions exactly wrap the antlr3 source inconsistencies.
44
// These are the only location CVC5_ANTLR3_OLD_INPUT_STREAM ifdefs appear.
45
// No other sanity checking happens;
46
pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
47
                                             const std::string& name,
48
                                             LineBuffer* line_buffer);
49
pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name);
50
pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
51
                                            uint32_t size,
52
                                            const std::string& name);
53
54
4
pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
55
                                             const std::string& name,
56
                                             LineBuffer* line_buffer) {
57
4
  pANTLR3_INPUT_STREAM inputStream = NULL;
58
4
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
59
60
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
61
  inputStream =
62
      antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer);
63
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
64
4
  inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT,
65
                                            name_duplicate, line_buffer);
66
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
67
68
4
  free(name_duplicate);
69
4
  return inputStream;
70
}
71
72
6135
pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){
73
6135
  pANTLR3_INPUT_STREAM input = NULL;
74
6135
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
75
76
  // libantlr3c v3.2 isn't source-compatible with v3.4
77
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
78
  input = antlr3AsciiFileStreamNew(name_duplicate);
79
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
80
6135
  input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT);
81
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
82
83
6135
  free(name_duplicate);
84
6135
  return input;
85
}
86
87
88
287
pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
89
                                            uint32_t size,
90
                                            const std::string& name){
91
287
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
92
287
  pANTLR3_INPUT_STREAM inputStream = NULL;
93
  /* Create an ANTLR input backed by the buffer. */
94
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
95
  inputStream =
96
    antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate);
97
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
98
287
  inputStream =
99
    antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size,
100
                          name_duplicate);
101
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
102
287
  free(name_duplicate);
103
287
  return inputStream;
104
}
105
106
6426
AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
107
                                   bool fileIsTemporary,
108
                                   pANTLR3_UINT8 inputString,
109
6426
                                   LineBuffer* line_buffer)
110
    : InputStream(name, fileIsTemporary),
111
      d_input(input),
112
      d_inputString(inputString),
113
6426
      d_line_buffer(line_buffer) {
114
6426
  Assert(input != NULL);
115
6426
  input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
116
6426
}
117
118
19278
AntlrInputStream::~AntlrInputStream() {
119
6426
  d_input->free(d_input);
120
6426
  if(d_inputString != NULL){
121
287
    free(d_inputString);
122
  }
123
6426
  if (d_line_buffer != NULL) {
124
4
    delete d_line_buffer;
125
  }
126
12852
}
127
128
12852
pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
129
12852
  return d_input;
130
}
131
132
133
134
AntlrInputStream*
135
6135
AntlrInputStream::newFileInputStream(const std::string& name,
136
                                     bool useMmap)
137
{
138
#ifdef _WIN32
139
  if(useMmap) {
140
    useMmap = false;
141
  }
142
#endif
143
6135
  pANTLR3_INPUT_STREAM input = NULL;
144
6135
  if(useMmap) {
145
    input = MemoryMappedInputBufferNew(name);
146
  } else {
147
6135
    input = newAntlr3FileStream(name);
148
  }
149
6135
  if(input == NULL) {
150
    throw InputStreamException("Couldn't open file: " + name);
151
  }
152
6135
  return new AntlrInputStream(name, input, false, NULL, NULL);
153
}
154
155
4
AntlrInputStream* AntlrInputStream::newStreamInputStream(
156
    std::istream& input, const std::string& name)
157
{
158
4
  pANTLR3_INPUT_STREAM inputStream = NULL;
159
4
  pANTLR3_UINT8 inputStringCopy = NULL;
160
4
  LineBuffer* line_buffer = new LineBuffer(&input);
161
4
  inputStream = newAntlr3BufferedStream(input, name, line_buffer);
162
  return new AntlrInputStream(name, inputStream, false, inputStringCopy,
163
4
                              line_buffer);
164
}
165
166
167
AntlrInputStream*
168
287
AntlrInputStream::newStringInputStream(const std::string& input,
169
                                       const std::string& name)
170
{
171
287
  size_t input_size = input.size();
172
287
  Assert(input_size <= std::numeric_limits<uint32_t>::max());
173
174
  // Ownership of input_duplicate  is transferred to the AntlrInputStream.
175
287
  pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str());
176
177
287
  if( input_duplicate == NULL ) {
178
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
179
  }
180
181
287
  pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name);
182
287
  if( inputStream==NULL ) {
183
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
184
  }
185
287
  return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL);
186
}
187
188
6426
AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) {
189
  using namespace language::input;
190
191
  AntlrInput* input;
192
193
6426
  switch(lang) {
194
952
    case LANG_CVC:
195
    {
196
952
      input = new CvcInput(inputStream);
197
952
      break;
198
    }
199
200
194
  case LANG_SYGUS_V2: input = new SygusInput(inputStream); break;
201
202
41
  case LANG_TPTP:
203
41
    input = new TptpInput(inputStream);
204
41
    break;
205
206
5239
  default:
207
5239
    if (language::isInputLang_smt2(lang))
208
    {
209
5239
      input = new Smt2Input(inputStream);
210
    }
211
    else
212
    {
213
      std::stringstream ss;
214
      ss << "unable to detect input file format, try --lang ";
215
      throw InputStreamException(ss.str());
216
    }
217
  }
218
219
6426
  return input;
220
}
221
222
6426
AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
223
    Input(inputStream),
224
    d_lookahead(lookahead),
225
    d_lexer(NULL),
226
    d_parser(NULL),
227
6426
    d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
228
12852
    d_tokenBuffer(NULL) {
229
6426
}
230
231
/*
232
AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
233
  Parser(exprManager,name),
234
  d_lookahead(lookahead) {
235
236
}
237
*/
238
239
/*
240
AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
241
  Input(exprManager,name),
242
  d_lookahead(lookahead),
243
  d_lexer(NULL),
244
  d_parser(NULL),
245
  d_tokenStream(NULL) {
246
247
  char* inputStr = strdup(input.c_str());
248
  char* nameStr = strdup(name.c_str());
249
  if( inputStr==NULL || nameStr==NULL ) {
250
    throw ParserException("Couldn't initialize string input: '" + input + "'");
251
  }
252
  d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
253
  if( d_inputStream == NULL ) {
254
    throw ParserException("Couldn't create input stream for string: '" + input + "'");
255
  }
256
257
}
258
*/
259
260
12852
AntlrInput::~AntlrInput() {
261
6426
  BoundedTokenBufferFree(d_tokenBuffer);
262
6426
}
263
264
6426
pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
265
6426
  return d_tokenBuffer->commonTstream;
266
}
267
268
12
void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
269
12
  pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
270
12
  Assert(lexer != NULL);
271
12
  Parser *parser = (Parser*)(lexer->super);
272
12
  Assert(parser != NULL);
273
12
  AntlrInput *input = (AntlrInput*) parser->getInput();
274
12
  Assert(input != NULL);
275
276
  /* Call the error display routine *if* there's not already a
277
   * parse error pending.  If a parser error is pending, this
278
   * error is probably less important, so we just drop it. */
279
12
  if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
280
24
    input->parseError("Error finding next token.");
281
  }
282
}
283
284
254
void AntlrInput::warning(const std::string& message) {
285
254
  Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
286
254
}
287
288
289
290
/**
291
 * characters considered part of a simple symbol in SMTLIB.
292
 *
293
 * TODO: Ideally this code shouldn't be smtlib specific (should work
294
 * with CVC language too), but this per-language specialization has
295
 * been left for a later point.
296
 */
297
6524
inline bool isSimpleChar(char ch) {
298
6524
  return isalnum(ch) || (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL);
299
}
300
301
159
size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) {
302
159
  size_t st = 0;
303
159
  size_t N = input.size();
304
1677
  while(st < N) {
305
1759
    while( st < N && (*isWordChar)(input[st])  == false ) st++;
306
819
    size_t en = st;
307
4063
    while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++;
308
819
    if(en - st + 1 == pattern.size()) {
309
146
      bool match = true;
310
393
      for(size_t i = 0; match && i < pattern.size(); ++i) {
311
247
        match &= (pattern[i] == input[st+i]);
312
      }
313
146
      if(match == true) {
314
60
        return st;
315
      }
316
    }
317
759
    st = en + 1;
318
  }
319
99
  return string::npos;
320
}
321
322
/**
323
 * Gets part of original input and tries to visually hint where the
324
 * error might be.
325
 *
326
 * Something like:
327
 *
328
 *   ...nd (= alpha beta) (= beta delta))
329
 *                                ^
330
 *
331
 * Implementation (as on 2014/04/24):
332
 *
333
 * > if suggested pointer by lexer is under a "simple char", move to
334
 *   start of the word and print pointer there.
335
 *
336
 * > in the other case, it tries to find the nearest word in the error
337
 *   message passed along. if it can't find it, we don't add this
338
 *   visual hint, as experimentally position suggested by lexer was
339
 *   found to be totally unhelpful. (TODO: fix this upstream to
340
 *   improve)
341
 */
342
121
std::string parseErrorHelper(const char* lineStart,
343
                             std::size_t lineLength,
344
                             std::size_t charPositionInLine,
345
                             const std::string& message)
346
{
347
  // Is it a multi-line message
348
121
  bool multilineMessage = (message.find('\n') != string::npos);
349
  // Useful only if it is a multi-line message
350
121
  int firstLineEnd = message.find('\n');
351
352
242
  std::ostringstream ss, slicess;
353
354
  // Keep first line of message
355
121
  if(multilineMessage) {
356
6
    ss << message.substr(0, firstLineEnd) << endl << endl;
357
  } else {
358
115
    ss << message << endl << endl;
359
  }
360
361
121
  std::size_t posSliceStart =
362
121
      (charPositionInLine <= 50) ? 0 : charPositionInLine - 50 + 5;
363
121
  std::size_t posSliceEnd = posSliceStart + 70;
364
121
  std::size_t caretPos = 0;
365
121
  std::size_t caretPosExtra = 0;  // for inital intendation, epilipses etc.
366
367
121
  ss << "  "; caretPosExtra += 2;
368
121
  if(posSliceStart > 0) {
369
11
    ss << "..."; caretPosExtra += 3;
370
  }
371
372
2483
  for (std::size_t i = posSliceStart; i < lineLength && lineStart[i] != '\n';
373
       ++i)
374
  {
375
2366
    if(i == posSliceEnd) {
376
4
      ss << "...";
377
4
      break;
378
    }
379
2362
    if(i < charPositionInLine) { caretPos++; }
380
381
2362
    if(!isprint(lineStart[i])) {
382
      // non-printable character, something wrong, bail out
383
      return message;
384
    }
385
386
2362
    ss << (lineStart[i]);
387
2362
    slicess << (lineStart[i]);
388
  }
389
390
  // adjust position of caret, based on slice and message
391
  {
392
121
    int caretPosOrig = caretPos;
393
222
    string slice = slicess.str();
394
121
    if(isSimpleChar(slice[caretPos])) {
395
      // if alphanumeric, try to go to beginning of word/number
396
187
      while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; }
397
57
      if(caretPos == 0 && posSliceStart > 0) {
398
        // reached start and this is not really the start? bail out
399
        return message;
400
      } else {
401
        // likely it is also in original message? if so, very likely
402
        // we found the right place
403
114
        string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1));
404
57
        size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
405
57
        Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
406
57
        if( matchLoc != string::npos ) {
407
16
          Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl;
408
        }
409
      }
410
    } else {
411
64
      bool foundCaretPos = false;
412
413
170
      for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) {
414
        // go to nearest alphanumeric string (before current position),
415
        // see if that word can be found in original message. If so,
416
        // point to that, else keep pointer where it was.
417
106
        int nearestWordEn = caretPos - 1;
418
234
        while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) {
419
64
          --nearestWordEn;
420
        }
421
106
        if(isSimpleChar(slice[nearestWordEn])) {
422
102
          int nearestWordSt = nearestWordEn;
423
396
          while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) {
424
147
            --nearestWordSt;
425
          }
426
204
          string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1));
427
102
          size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
428
102
          Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl;
429
102
          Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
430
102
          if( matchLoc != string::npos ) {
431
88
            Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at "
432
44
                                    << nearestWordSt << std::endl;
433
44
            foundCaretPos = true;
434
          }
435
102
          caretPos = nearestWordSt;
436
        }
437
      }
438
64
      if( !foundCaretPos) {
439
        // this doesn't look good. caret generally getting printed
440
        // at unhelpful positions. improve upstream?
441
20
        return message;
442
      }
443
    }
444
101
    caretPos += caretPosExtra;
445
  }// end of caret position computation/heuristics
446
447
101
  ss << std::endl;
448
2637
  while( caretPos-- > 0 ) {
449
1268
    ss << ' ';
450
  }
451
101
  ss << '^' << endl;
452
101
  if(multilineMessage) {
453
6
     ss << message.substr(firstLineEnd, message.size() - firstLineEnd);;
454
  }
455
101
  return ss.str();
456
}
457
458
121
void AntlrInput::parseError(const std::string& message, bool eofException)
459
{
460
242
  auto lineLength = d_antlr3InputStream->sizeBuf
461
121
                    - (static_cast<char*>(d_antlr3InputStream->currentLine)
462
121
                       - static_cast<char*>(d_antlr3InputStream->data));
463
  std::string updatedMessage = parseErrorHelper(
464
121
      (const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream),
465
      lineLength,
466
121
      d_lexer->getCharPositionInLine(d_lexer),
467
363
      message);
468
469
242
  Debug("parser") << "Throwing exception: "
470
242
      << (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
471
242
      << d_lexer->getLine(d_lexer) << "."
472
242
      << d_lexer->getCharPositionInLine(d_lexer) << ": "
473
121
      << updatedMessage << endl;
474
121
  if(eofException) {
475
    throw ParserEndOfFileException(message,
476
6
                                   (const char*)d_lexer->rec->state->tokSource->fileName->chars,
477
6
                                   d_lexer->getLine(d_lexer),
478
18
                                   d_lexer->getCharPositionInLine(d_lexer));
479
  } else {
480
    throw ParserException(updatedMessage,
481
115
                          (const char*)d_lexer->rec->state->tokSource->fileName->chars,
482
115
                          d_lexer->getLine(d_lexer),
483
345
                          d_lexer->getCharPositionInLine(d_lexer));
484
  }
485
}
486
487
488
6426
void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
489
6426
  d_lexer = pLexer;
490
491
6426
  pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
492
6426
  if( pTokenFactory != NULL ) {
493
6426
    pTokenFactory->close(pTokenFactory);
494
  }
495
496
  /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
497
6426
  pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
498
6426
  if( pTokenFactory == NULL ) {
499
    throw InputStreamException("Couldn't create token factory.");
500
  }
501
6426
  d_lexer->rec->state->tokFactory = pTokenFactory;
502
503
6426
  pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
504
6426
  if( buffer == NULL ) {
505
    throw InputStreamException("Couldn't create token buffer.");
506
  }
507
508
6426
  d_tokenBuffer = buffer;
509
510
  // Override default lexer error reporting
511
6426
  d_lexer->rec->reportError = &lexerError;
512
  // Override default nextToken function, just to prevent exceptions escaping.
513
6426
  d_lexer->rec->state->tokSource->nextToken = &nextToken;
514
6426
}
515
516
6426
void AntlrInput::setParser(Parser& parser) {
517
  // ANTLR isn't using super in the lexer or the parser, AFAICT.
518
  // We could also use @lexer/parser::context to add a field to the generated
519
  // objects, but then it would have to be declared separately in every
520
  // language's grammar and we'd have to in the address of the field anyway.
521
6426
  d_lexer->super = &parser;
522
6426
  d_parser->super = &parser;
523
6426
}
524
525
6426
void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
526
6426
  d_parser = pParser;
527
//  d_parser->rec->match = &match;
528
6426
  d_parser->rec->reportError = &reportError;
529
  /* Don't try to recover from a parse error. */
530
  // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
531
12852
  d_parser->rec->recoverFromMismatchedToken =
532
      (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
533
12852
      d_parser->rec->mismatch;
534
6426
}
535
536
}  // namespace parser
537
29325
}  // namespace cvc5