GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/parser/antlr_input_imports.cpp Lines: 73 111 65.8 %
Date: 2021-05-22 Branches: 55 238 23.1 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Christopher L. Conway, Francois Bobot, Mathias Preiner
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * [[ Add one-line brief description here ]]
14
 *
15
 * [[ Add lengthier description here ]]
16
 * \todo document this file
17
 */
18
19
/*
20
 * The functions in this file are based on implementations in libantlr3c,
21
 * with only minor cvc5-specific changes.
22
 */
23
24
// [The "BSD licence"]
25
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
26
// http://www.temporal-wave.com
27
// http://www.linkedin.com/in/jimidle
28
//
29
// All rights reserved.
30
//
31
// Redistribution and use in source and binary forms, with or without
32
// modification, are permitted provided that the following conditions
33
// are met:
34
// 1. Redistributions of source code must retain the above copyright
35
//    notice, this list of conditions and the following disclaimer.
36
// 2. Redistributions in binary form must reproduce the above copyright
37
//    notice, this list of conditions and the following disclaimer in the
38
//    documentation and/or other materials provided with the distribution.
39
// 3. The name of the author may not be used to endorse or promote products
40
//    derived from this software without specific prior written permission.
41
//
42
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
43
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
45
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
46
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
47
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
51
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52
53
#include <antlr3.h>
54
55
#include <sstream>
56
57
#include "base/check.h"
58
#include "parser/antlr_input.h"
59
#include "parser/parser.h"
60
#include "parser/parser_exception.h"
61
62
using namespace std;
63
64
namespace cvc5 {
65
namespace parser {
66
67
/// Report a recognition problem.
68
///
69
/// This method sets errorRecovery to indicate the parser is recovering
70
/// not parsing.  Once in recovery mode, no errors are generated.
71
/// To get out of recovery mode, the parser must successfully match
72
/// a token (after a resync).  So it will go:
73
///
74
///             1. error occurs
75
///             2. enter recovery mode, report error
76
///             3. consume until token found in resynch set
77
///             4. try to resume parsing
78
///             5. next match() will reset errorRecovery mode
79
///
80
/// If you override, make sure to update errorCount if you care about that.
81
///
82
/* *** cvc5 NOTE ***
83
 * This function is has been modified in not-completely-trivial ways from its
84
 * libantlr3c implementation to support more informative error messages and to
85
 * invoke the error reporting mechanism of the Input class instead of the
86
 * default error printer.
87
 */
88
36
void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer) {
89
36
  pANTLR3_EXCEPTION ex = recognizer->state->exception;
90
36
  pANTLR3_UINT8 * tokenNames = recognizer->state->tokenNames;
91
72
  stringstream ss;
92
93
  // Dig the cvc5 objects out of the ANTLR3 mess
94
36
  pANTLR3_PARSER antlr3Parser = (pANTLR3_PARSER)(recognizer->super);
95
36
  Assert(antlr3Parser != NULL);
96
36
  Parser *parser = (Parser*)(antlr3Parser->super);
97
36
  Assert(parser != NULL);
98
36
  AntlrInput *input = (AntlrInput*) parser->getInput() ;
99
36
  Assert(input != NULL);
100
101
  // Signal we are in error recovery now
102
36
  recognizer->state->errorRecovery = ANTLR3_TRUE;
103
104
  // Indicate this recognizer had an error while processing.
105
36
  recognizer->state->errorCount++;
106
107
  // Call the builtin error formatter
108
  // recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
109
110
  /* TODO: Make error messages more useful, maybe by including more expected tokens and information
111
   * about the current token. */
112
36
  switch(ex->type) {
113
  case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
114
115
    // Indicates that the recognizer was fed a token which seems to be
116
    // spurious input. We can detect this when the token that follows
117
    // this unwanted token would normally be part of the syntactically
118
    // correct stream. Then we can see that the token we are looking at
119
    // is just something that should not be there and throw this exception.
120
    //
121
    if(tokenNames == NULL) {
122
      ss << "Unexpected token." ;
123
    } else {
124
      if(ex->expecting == ANTLR3_TOKEN_EOF) {
125
        ss << "Expected end of file.";
126
      } else {
127
        ss << "Expected " << tokenNames[ex->expecting]
128
           << ", found '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
129
      }
130
    }
131
    break;
132
133
12
  case ANTLR3_MISSING_TOKEN_EXCEPTION:
134
135
    // Indicates that the recognizer detected that the token we just
136
    // hit would be valid syntactically if preceded by a particular
137
    // token. Perhaps a missing ';' at line end or a missing ',' in an
138
    // expression list, and such like.
139
    //
140
12
    if(tokenNames == NULL) {
141
      ss << "Missing token (" << ex->expecting << ").";
142
    } else {
143
12
      if(ex->expecting == ANTLR3_TOKEN_EOF) {
144
        ss << "Missing end of file marker.";
145
12
      } else if( ex->expecting == 0 ) {
146
12
        ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
147
12
        if( std::string(tokenText((pANTLR3_COMMON_TOKEN)ex->token)) == std::string("IN") ) {
148
          ss << " Did you mean: `IS_IN'?";
149
        }
150
      } else {
151
        ss << "Missing " << tokenNames[ex->expecting] << ".";
152
      }
153
    }
154
12
    break;
155
156
  case ANTLR3_RECOGNITION_EXCEPTION:
157
158
    // Indicates that the recognizer received a token
159
    // in the input that was not predicted. This is the basic exception type
160
    // from which all others are derived. So we assume it was a syntax error.
161
    // You may get this if there are not more tokens and more are needed
162
    // to complete a parse for instance.
163
    //
164
    ss <<"Syntax error.";
165
    break;
166
167
4
  case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
168
169
    // We were expecting to see one thing and got another. This is the
170
    // most common error if we could not detect a missing or unwanted token.
171
    // Here you can spend your efforts to
172
    // derive more useful error messages based on the expected
173
    // token set and the last token and so on. The error following
174
    // bitmaps do a good job of reducing the set that we were looking
175
    // for down to something small. Knowing what you are parsing may be
176
    // able to allow you to be even more specific about an error.
177
    //
178
4
    if(tokenNames == NULL) {
179
      ss << "Syntax error.";
180
    } else {
181
4
      if(ex->expecting == ANTLR3_TOKEN_EOF) {
182
        ss << "Expected end of file.";
183
4
      } else if( ex->expecting == 0 ) {
184
4
        ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
185
      } else {
186
        ss << "Expected " << tokenNames[ex->expecting] << ".";
187
      }
188
    }
189
4
    break;
190
191
18
  case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
192
    // We could not pick any alt decision from the input given
193
    // so god knows what happened - however when you examine your grammar,
194
    // you should. It means that at the point where the current token occurred
195
    // that the DFA indicates nowhere to go from here.
196
    //
197
18
    ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
198
18
    break;
199
200
  case ANTLR3_MISMATCHED_SET_EXCEPTION:
201
202
  {
203
    ANTLR3_UINT32 count;
204
    ANTLR3_UINT32 bit;
205
    ANTLR3_UINT32 size;
206
    ANTLR3_UINT32 numbits;
207
    pANTLR3_BITSET errBits;
208
209
    // This means we were able to deal with one of a set of
210
    // possible tokens at this point, but we did not see any
211
    // member of that set.
212
    //
213
    ss << "Unexpected input: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token)
214
       << "'. Expected one of: ";
215
216
    // What tokens could we have accepted at this point in the
217
    // parse?
218
    //
219
    count = 0;
220
    errBits = antlr3BitsetLoad(ex->expectingSet);
221
    numbits = errBits->numBits(errBits);
222
    size = errBits->size(errBits);
223
224
    if(size > 0) {
225
      // However many tokens we could have dealt with here, it is usually
226
      // not useful to print ALL of the set here. I arbitrarily chose 8
227
      // here, but you should do whatever makes sense for you of course.
228
      // No token number 0, so look for bit 1 and on.
229
      //
230
      for(bit = 1; bit < numbits && count < 8 && count < size; bit++) {
231
        // TODO: This doesn;t look right - should be asking if the bit is set!!
232
        //
233
        if(tokenNames[bit]) {
234
          if( count++ > 0 ) {
235
            ss << ", ";
236
          }
237
          ss << tokenNames[bit];
238
        }
239
      }
240
    } else {
241
      Assert(false);  //("Parse error with empty set of expected tokens.");
242
    }
243
  }
244
    break;
245
246
2
  case ANTLR3_EARLY_EXIT_EXCEPTION:
247
248
    // We entered a loop requiring a number of token sequences
249
    // but found a token that ended that sequence earlier than
250
    // we should have done.
251
    //
252
    ss << "Sequence terminated early by token: '"
253
2
       << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
254
2
    break;
255
256
  default:
257
258
    // We don't handle any other exceptions here, but you can
259
    // if you wish. If we get an exception that hits this point
260
    // then we are just going to report what we know about the
261
    // token.
262
    //
263
    Assert(false);  //("Unexpected exception in parser.");
264
    break;
265
  }
266
267
  // Call the error display routine
268
72
  input->parseError(ss.str(), ((pANTLR3_COMMON_TOKEN)ex->token)->type == ANTLR3_TOKEN_EOF);
269
}
270
271
///
272
/// \brief
273
/// Returns the next available token from the current input stream.
274
///
275
/// \param toksource
276
/// Points to the implementation of a token source. The lexer is
277
/// addressed by the super structure pointer.
278
///
279
/// \returns
280
/// The next token in the current input stream or the EOF token
281
/// if there are no more tokens.
282
///
283
/// \remarks
284
/// Write remarks for nextToken here.
285
///
286
/// \see nextToken
287
///
288
/* *** cvc5 NOTE ***
289
 * This is copied, largely unmodified, from antlr3lexer.c
290
 *
291
 */
292
pANTLR3_COMMON_TOKEN
293
17925781
AntlrInput::nextTokenStr (pANTLR3_TOKEN_SOURCE toksource)
294
{
295
  pANTLR3_LEXER lexer;
296
297
17925781
  lexer = (pANTLR3_LEXER)(toksource->super);
298
299
  /// Loop until we get a non skipped token or EOF
300
  ///
301
  for (;;)
302
  {
303
    // Get rid of any previous token (token factory takes care of
304
    // any de-allocation when this token is finally used up.
305
    //
306
17925781
    lexer->rec->state->token = NULL;
307
17925781
    lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception
308
17925781
    lexer->rec->state->failed = ANTLR3_FALSE;
309
310
    // Now call the matching rules and see if we can generate a new token
311
    //
312
    for (;;)
313
    {
314
      // Record the start of the token in our input stream.
315
      //
316
27212306
      lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL;
317
27212306
      lexer->rec->state->tokenStartCharIndex = lexer->input->istream->index(lexer->input->istream);
318
27212306
      lexer->rec->state->tokenStartCharPositionInLine = lexer->input->getCharPositionInLine(lexer->input);
319
27212306
      lexer->rec->state->tokenStartLine = lexer->input->getLine(lexer->input);
320
27212306
      lexer->rec->state->text = NULL;
321
322
27212306
      if (lexer->input->istream->_LA(lexer->input->istream, 1) == ANTLR3_CHARSTREAM_EOF)
323
      {
324
        // Reached the end of the current stream, nothing more to do if this is
325
        // the last in the stack.
326
        //
327
5301
        pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken);
328
329
5301
        teof->setStartIndex (teof, lexer->getCharIndex(lexer));
330
5301
        teof->setStopIndex (teof, lexer->getCharIndex(lexer));
331
5301
        teof->setLine (teof, lexer->getLine(lexer));
332
5301
        teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it
333
5301
        return teof;
334
      }
335
336
27207005
      lexer->rec->state->token = NULL;
337
27207005
      lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception
338
27207005
      lexer->rec->state->failed = ANTLR3_FALSE;
339
340
      // Call the generated lexer, see if it can get a new token together.
341
      //
342
27207005
      lexer->mTokens(lexer->ctx);
343
344
27207005
      if (lexer->rec->state->error == ANTLR3_TRUE)
345
      {
346
        // Recognition exception, report it and try to recover.
347
        //
348
12
        lexer->rec->state->failed = ANTLR3_TRUE;
349
        // *** cvc5 EDIT: Just call the AntlrInput error routine
350
12
        lexerError(lexer->rec);
351
        lexer->recover(lexer);
352
      }
353
      else
354
      {
355
27206993
        if (lexer->rec->state->token == NULL)
356
        {
357
          // Emit the real token, which adds it in to the token stream basically
358
          //
359
          // *** cvc5 Edit: call emit on the lexer object
360
17920468
          lexer->emit(lexer);
361
        }
362
9286525
        else if (lexer->rec->state->token == &(toksource->skipToken))
363
        {
364
          // A real token could have been generated, but "Computer say's naaaaah" and it
365
          // it is just something we need to skip altogether.
366
          //
367
9286525
          continue;
368
        }
369
370
        // Good token, not skipped, not EOF token
371
        //
372
17920468
        return lexer->rec->state->token;
373
      }
374
9286525
    }
375
  }
376
}
377
378
/* *** cvc5 NOTE ***
379
 * This is copied, totaly unmodified, from antlr3lexer.c
380
 * in order to use nextTokenStr previously defined.
381
 *
382
 */
383
pANTLR3_COMMON_TOKEN
384
17925774
AntlrInput::nextToken	    (pANTLR3_TOKEN_SOURCE toksource)
385
{
386
	pANTLR3_COMMON_TOKEN tok;
387
388
	// Find the next token in the current stream
389
	//
390
17925774
	tok = nextTokenStr(toksource);
391
392
	// If we got to the EOF token then switch to the previous
393
	// input stream if there were any and just return the
394
	// EOF if there are none. We must check the next token
395
	// in any outstanding input stream we pop into the active
396
	// role to see if it was sitting at EOF after PUSHing the
397
	// stream we just consumed, otherwise we will return EOF
398
	// on the reinstalled input stream, when in actual fact
399
	// there might be more input streams to POP before the
400
	// real EOF of the whole logical inptu stream. Hence we
401
	// use a while loop here until we find somethign in the stream
402
	// that isn't EOF or we reach the actual end of the last input
403
	// stream on the stack.
404
	//
405
17925776
	while	(tok->type == ANTLR3_TOKEN_EOF)
406
	{
407
		pANTLR3_LEXER   lexer;
408
409
5301
		lexer   = (pANTLR3_LEXER)(toksource->super);
410
411
5301
		if  (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
412
		{
413
			// We have another input stream in the stack so we
414
			// need to revert to it, then resume the loop to check
415
			// it wasn't sitting at EOF itself.
416
			//
417
7
			lexer->popCharStream(lexer);
418
7
			tok = nextTokenStr(toksource);
419
		}
420
		else
421
		{
422
			// There were no more streams on the input stack
423
			// so this EOF is the 'real' logical EOF for
424
			// the input stream. So we just exit the loop and
425
			// return the EOF we have found.
426
			//
427
5294
			break;
428
		}
429
430
	}
431
432
	// return whatever token we have, which may be EOF
433
	//
434
17925762
	return  tok;
435
}
436
437
438
439
} // namespace parser
440
28182
}  // namespace cvc5