GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/parser/antlr_input_imports.cpp Lines: 73 111 65.8 %
Date: 2021-03-23 Branches: 55 238 23.1 %

Line Exec Source
1
/*********************                                                        */
2
/*! \file antlr_input_imports.cpp
3
 ** \verbatim
4
 ** Top contributors (to current version):
5
 **   Christopher L. Conway, Francois Bobot, Morgan Deters
6
 ** This file is part of the CVC4 project.
7
 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 ** in the top-level source directory and their institutional affiliations.
9
 ** All rights reserved.  See the file COPYING in the top-level source
10
 ** directory for licensing information.\endverbatim
11
 **
12
 ** \brief [[ Add one-line brief description here ]]
13
 **
14
 ** [[ Add lengthier description here ]]
15
 ** \todo document this file
16
 **/
17
18
/*
19
 * The functions in this file are based on implementations in libantlr3c,
20
 * with only minor CVC4-specific changes.
21
 */
22
23
// [The "BSD licence"]
24
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
25
// http://www.temporal-wave.com
26
// http://www.linkedin.com/in/jimidle
27
//
28
// All rights reserved.
29
//
30
// Redistribution and use in source and binary forms, with or without
31
// modification, are permitted provided that the following conditions
32
// are met:
33
// 1. Redistributions of source code must retain the above copyright
34
//    notice, this list of conditions and the following disclaimer.
35
// 2. Redistributions in binary form must reproduce the above copyright
36
//    notice, this list of conditions and the following disclaimer in the
37
//    documentation and/or other materials provided with the distribution.
38
// 3. The name of the author may not be used to endorse or promote products
39
//    derived from this software without specific prior written permission.
40
//
41
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
42
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
44
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
45
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
50
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51
52
#include <antlr3.h>
53
54
#include <sstream>
55
56
#include "base/check.h"
57
#include "parser/antlr_input.h"
58
#include "parser/parser.h"
59
#include "parser/parser_exception.h"
60
61
using namespace std;
62
63
namespace CVC4 {
64
namespace parser {
65
66
/// Report a recognition problem.
67
///
68
/// This method sets errorRecovery to indicate the parser is recovering
69
/// not parsing.  Once in recovery mode, no errors are generated.
70
/// To get out of recovery mode, the parser must successfully match
71
/// a token (after a resync).  So it will go:
72
///
73
///             1. error occurs
74
///             2. enter recovery mode, report error
75
///             3. consume until token found in resynch set
76
///             4. try to resume parsing
77
///             5. next match() will reset errorRecovery mode
78
///
79
/// If you override, make sure to update errorCount if you care about that.
80
///
81
/* *** CVC4 NOTE ***
82
 * This function is has been modified in not-completely-trivial ways from its
83
 * libantlr3c implementation to support more informative error messages and to
84
 * invoke the error reporting mechanism of the Input class instead of the
85
 * default error printer.
86
 */
87
36
void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer) {
88
36
  pANTLR3_EXCEPTION ex = recognizer->state->exception;
89
36
  pANTLR3_UINT8 * tokenNames = recognizer->state->tokenNames;
90
72
  stringstream ss;
91
92
  // Dig the CVC4 objects out of the ANTLR3 mess
93
36
  pANTLR3_PARSER antlr3Parser = (pANTLR3_PARSER)(recognizer->super);
94
36
  Assert(antlr3Parser != NULL);
95
36
  Parser *parser = (Parser*)(antlr3Parser->super);
96
36
  Assert(parser != NULL);
97
36
  AntlrInput *input = (AntlrInput*) parser->getInput() ;
98
36
  Assert(input != NULL);
99
100
  // Signal we are in error recovery now
101
36
  recognizer->state->errorRecovery = ANTLR3_TRUE;
102
103
  // Indicate this recognizer had an error while processing.
104
36
  recognizer->state->errorCount++;
105
106
  // Call the builtin error formatter
107
  // recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
108
109
  /* TODO: Make error messages more useful, maybe by including more expected tokens and information
110
   * about the current token. */
111
36
  switch(ex->type) {
112
  case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
113
114
    // Indicates that the recognizer was fed a token which seems to be
115
    // spurious input. We can detect this when the token that follows
116
    // this unwanted token would normally be part of the syntactically
117
    // correct stream. Then we can see that the token we are looking at
118
    // is just something that should not be there and throw this exception.
119
    //
120
    if(tokenNames == NULL) {
121
      ss << "Unexpected token." ;
122
    } else {
123
      if(ex->expecting == ANTLR3_TOKEN_EOF) {
124
        ss << "Expected end of file.";
125
      } else {
126
        ss << "Expected " << tokenNames[ex->expecting]
127
           << ", found '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
128
      }
129
    }
130
    break;
131
132
12
  case ANTLR3_MISSING_TOKEN_EXCEPTION:
133
134
    // Indicates that the recognizer detected that the token we just
135
    // hit would be valid syntactically if preceded by a particular
136
    // token. Perhaps a missing ';' at line end or a missing ',' in an
137
    // expression list, and such like.
138
    //
139
12
    if(tokenNames == NULL) {
140
      ss << "Missing token (" << ex->expecting << ").";
141
    } else {
142
12
      if(ex->expecting == ANTLR3_TOKEN_EOF) {
143
        ss << "Missing end of file marker.";
144
12
      } else if( ex->expecting == 0 ) {
145
12
        ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
146
12
        if( std::string(tokenText((pANTLR3_COMMON_TOKEN)ex->token)) == std::string("IN") ) {
147
          ss << " Did you mean: `IS_IN'?";
148
        }
149
      } else {
150
        ss << "Missing " << tokenNames[ex->expecting] << ".";
151
      }
152
    }
153
12
    break;
154
155
  case ANTLR3_RECOGNITION_EXCEPTION:
156
157
    // Indicates that the recognizer received a token
158
    // in the input that was not predicted. This is the basic exception type
159
    // from which all others are derived. So we assume it was a syntax error.
160
    // You may get this if there are not more tokens and more are needed
161
    // to complete a parse for instance.
162
    //
163
    ss <<"Syntax error.";
164
    break;
165
166
4
  case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
167
168
    // We were expecting to see one thing and got another. This is the
169
    // most common error if we could not detect a missing or unwanted token.
170
    // Here you can spend your efforts to
171
    // derive more useful error messages based on the expected
172
    // token set and the last token and so on. The error following
173
    // bitmaps do a good job of reducing the set that we were looking
174
    // for down to something small. Knowing what you are parsing may be
175
    // able to allow you to be even more specific about an error.
176
    //
177
4
    if(tokenNames == NULL) {
178
      ss << "Syntax error.";
179
    } else {
180
4
      if(ex->expecting == ANTLR3_TOKEN_EOF) {
181
        ss << "Expected end of file.";
182
4
      } else if( ex->expecting == 0 ) {
183
4
        ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
184
      } else {
185
        ss << "Expected " << tokenNames[ex->expecting] << ".";
186
      }
187
    }
188
4
    break;
189
190
18
  case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
191
    // We could not pick any alt decision from the input given
192
    // so god knows what happened - however when you examine your grammar,
193
    // you should. It means that at the point where the current token occurred
194
    // that the DFA indicates nowhere to go from here.
195
    //
196
18
    ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
197
18
    break;
198
199
  case ANTLR3_MISMATCHED_SET_EXCEPTION:
200
201
  {
202
    ANTLR3_UINT32 count;
203
    ANTLR3_UINT32 bit;
204
    ANTLR3_UINT32 size;
205
    ANTLR3_UINT32 numbits;
206
    pANTLR3_BITSET errBits;
207
208
    // This means we were able to deal with one of a set of
209
    // possible tokens at this point, but we did not see any
210
    // member of that set.
211
    //
212
    ss << "Unexpected input: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token)
213
       << "'. Expected one of: ";
214
215
    // What tokens could we have accepted at this point in the
216
    // parse?
217
    //
218
    count = 0;
219
    errBits = antlr3BitsetLoad(ex->expectingSet);
220
    numbits = errBits->numBits(errBits);
221
    size = errBits->size(errBits);
222
223
    if(size > 0) {
224
      // However many tokens we could have dealt with here, it is usually
225
      // not useful to print ALL of the set here. I arbitrarily chose 8
226
      // here, but you should do whatever makes sense for you of course.
227
      // No token number 0, so look for bit 1 and on.
228
      //
229
      for(bit = 1; bit < numbits && count < 8 && count < size; bit++) {
230
        // TODO: This doesn;t look right - should be asking if the bit is set!!
231
        //
232
        if(tokenNames[bit]) {
233
          if( count++ > 0 ) {
234
            ss << ", ";
235
          }
236
          ss << tokenNames[bit];
237
        }
238
      }
239
    } else {
240
      Assert(false);  //("Parse error with empty set of expected tokens.");
241
    }
242
  }
243
    break;
244
245
2
  case ANTLR3_EARLY_EXIT_EXCEPTION:
246
247
    // We entered a loop requiring a number of token sequences
248
    // but found a token that ended that sequence earlier than
249
    // we should have done.
250
    //
251
    ss << "Sequence terminated early by token: '"
252
2
       << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
253
2
    break;
254
255
  default:
256
257
    // We don't handle any other exceptions here, but you can
258
    // if you wish. If we get an exception that hits this point
259
    // then we are just going to report what we know about the
260
    // token.
261
    //
262
    Assert(false);  //("Unexpected exception in parser.");
263
    break;
264
  }
265
266
  // Call the error display routine
267
72
  input->parseError(ss.str(), ((pANTLR3_COMMON_TOKEN)ex->token)->type == ANTLR3_TOKEN_EOF);
268
}
269
270
///
271
/// \brief
272
/// Returns the next available token from the current input stream.
273
///
274
/// \param toksource
275
/// Points to the implementation of a token source. The lexer is
276
/// addressed by the super structure pointer.
277
///
278
/// \returns
279
/// The next token in the current input stream or the EOF token
280
/// if there are no more tokens.
281
///
282
/// \remarks
283
/// Write remarks for nextToken here.
284
///
285
/// \see nextToken
286
///
287
/* *** CVC4 NOTE ***
288
 * This is copied, largely unmodified, from antlr3lexer.c
289
 *
290
 */
291
pANTLR3_COMMON_TOKEN
292
17237574
AntlrInput::nextTokenStr (pANTLR3_TOKEN_SOURCE toksource)
293
{
294
  pANTLR3_LEXER lexer;
295
296
17237574
  lexer = (pANTLR3_LEXER)(toksource->super);
297
298
  /// Loop until we get a non skipped token or EOF
299
  ///
300
  for (;;)
301
  {
302
    // Get rid of any previous token (token factory takes care of
303
    // any de-allocation when this token is finally used up.
304
    //
305
17237574
    lexer->rec->state->token = NULL;
306
17237574
    lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception
307
17237574
    lexer->rec->state->failed = ANTLR3_FALSE;
308
309
    // Now call the matching rules and see if we can generate a new token
310
    //
311
    for (;;)
312
    {
313
      // Record the start of the token in our input stream.
314
      //
315
26167484
      lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL;
316
26167484
      lexer->rec->state->tokenStartCharIndex = lexer->input->istream->index(lexer->input->istream);
317
26167484
      lexer->rec->state->tokenStartCharPositionInLine = lexer->input->getCharPositionInLine(lexer->input);
318
26167484
      lexer->rec->state->tokenStartLine = lexer->input->getLine(lexer->input);
319
26167484
      lexer->rec->state->text = NULL;
320
321
26167484
      if (lexer->input->istream->_LA(lexer->input->istream, 1) == ANTLR3_CHARSTREAM_EOF)
322
      {
323
        // Reached the end of the current stream, nothing more to do if this is
324
        // the last in the stack.
325
        //
326
4941
        pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken);
327
328
4941
        teof->setStartIndex (teof, lexer->getCharIndex(lexer));
329
4941
        teof->setStopIndex (teof, lexer->getCharIndex(lexer));
330
4941
        teof->setLine (teof, lexer->getLine(lexer));
331
4941
        teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it
332
4941
        return teof;
333
      }
334
335
26162543
      lexer->rec->state->token = NULL;
336
26162543
      lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception
337
26162543
      lexer->rec->state->failed = ANTLR3_FALSE;
338
339
      // Call the generated lexer, see if it can get a new token together.
340
      //
341
26162543
      lexer->mTokens(lexer->ctx);
342
343
26162543
      if (lexer->rec->state->error == ANTLR3_TRUE)
344
      {
345
        // Recognition exception, report it and try to recover.
346
        //
347
12
        lexer->rec->state->failed = ANTLR3_TRUE;
348
        // *** CVC4 EDIT: Just call the AntlrInput error routine
349
12
        lexerError(lexer->rec);
350
        lexer->recover(lexer);
351
      }
352
      else
353
      {
354
26162531
        if (lexer->rec->state->token == NULL)
355
        {
356
          // Emit the real token, which adds it in to the token stream basically
357
          //
358
          // *** CVC4 Edit: call emit on the lexer object
359
17232621
          lexer->emit(lexer);
360
        }
361
8929910
        else if (lexer->rec->state->token == &(toksource->skipToken))
362
        {
363
          // A real token could have been generated, but "Computer say's naaaaah" and it
364
          // it is just something we need to skip altogether.
365
          //
366
8929910
          continue;
367
        }
368
369
        // Good token, not skipped, not EOF token
370
        //
371
17232621
        return lexer->rec->state->token;
372
      }
373
8929910
    }
374
  }
375
}
376
377
/* *** CVC4 NOTE ***
378
 * This is copied, totaly unmodified, from antlr3lexer.c
379
 * in order to use nextTokenStr previously defined.
380
 *
381
 */
382
pANTLR3_COMMON_TOKEN
383
17237567
AntlrInput::nextToken	    (pANTLR3_TOKEN_SOURCE toksource)
384
{
385
	pANTLR3_COMMON_TOKEN tok;
386
387
	// Find the next token in the current stream
388
	//
389
17237567
	tok = nextTokenStr(toksource);
390
391
	// If we got to the EOF token then switch to the previous
392
	// input stream if there were any and just return the
393
	// EOF if there are none. We must check the next token
394
	// in any outstanding input stream we pop into the active
395
	// role to see if it was sitting at EOF after PUSHing the
396
	// stream we just consumed, otherwise we will return EOF
397
	// on the reinstalled input stream, when in actual fact
398
	// there might be more input streams to POP before the
399
	// real EOF of the whole logical inptu stream. Hence we
400
	// use a while loop here until we find somethign in the stream
401
	// that isn't EOF or we reach the actual end of the last input
402
	// stream on the stack.
403
	//
404
17237569
	while	(tok->type == ANTLR3_TOKEN_EOF)
405
	{
406
		pANTLR3_LEXER   lexer;
407
408
4941
		lexer   = (pANTLR3_LEXER)(toksource->super);
409
410
4941
		if  (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
411
		{
412
			// We have another input stream in the stack so we
413
			// need to revert to it, then resume the loop to check
414
			// it wasn't sitting at EOF itself.
415
			//
416
7
			lexer->popCharStream(lexer);
417
7
			tok = nextTokenStr(toksource);
418
		}
419
		else
420
		{
421
			// There were no more streams on the input stack
422
			// so this EOF is the 'real' logical EOF for
423
			// the input stream. So we just exit the loop and
424
			// return the EOF we have found.
425
			//
426
4934
			break;
427
		}
428
429
	}
430
431
	// return whatever token we have, which may be EOF
432
	//
433
17237555
	return  tok;
434
}
435
436
437
438
} // namespace parser
439
26673
} // namespace CVC4