1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Christopher L. Conway, Francois Bobot, Mathias Preiner |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* [[ Add one-line brief description here ]] |
14 |
|
* |
15 |
|
* [[ Add lengthier description here ]] |
16 |
|
* \todo document this file |
17 |
|
*/ |
18 |
|
|
19 |
|
/* |
20 |
|
* The functions in this file are based on implementations in libantlr3c, |
21 |
|
* with only minor cvc5-specific changes. |
22 |
|
*/ |
23 |
|
|
24 |
|
// [The "BSD licence"] |
25 |
|
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC |
26 |
|
// http://www.temporal-wave.com |
27 |
|
// http://www.linkedin.com/in/jimidle |
28 |
|
// |
29 |
|
// All rights reserved. |
30 |
|
// |
31 |
|
// Redistribution and use in source and binary forms, with or without |
32 |
|
// modification, are permitted provided that the following conditions |
33 |
|
// are met: |
34 |
|
// 1. Redistributions of source code must retain the above copyright |
35 |
|
// notice, this list of conditions and the following disclaimer. |
36 |
|
// 2. Redistributions in binary form must reproduce the above copyright |
37 |
|
// notice, this list of conditions and the following disclaimer in the |
38 |
|
// documentation and/or other materials provided with the distribution. |
39 |
|
// 3. The name of the author may not be used to endorse or promote products |
40 |
|
// derived from this software without specific prior written permission. |
41 |
|
// |
42 |
|
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
43 |
|
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
44 |
|
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
45 |
|
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
46 |
|
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
47 |
|
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
48 |
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
49 |
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
50 |
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
51 |
|
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
52 |
|
|
53 |
|
#include <antlr3.h> |
54 |
|
|
55 |
|
#include <sstream> |
56 |
|
|
57 |
|
#include "base/check.h" |
58 |
|
#include "parser/antlr_input.h" |
59 |
|
#include "parser/parser.h" |
60 |
|
#include "parser/parser_exception.h" |
61 |
|
|
62 |
|
using namespace std; |
63 |
|
|
64 |
|
namespace cvc5 { |
65 |
|
namespace parser { |
66 |
|
|
67 |
|
/// Report a recognition problem. |
68 |
|
/// |
69 |
|
/// This method sets errorRecovery to indicate the parser is recovering |
70 |
|
/// not parsing. Once in recovery mode, no errors are generated. |
71 |
|
/// To get out of recovery mode, the parser must successfully match |
72 |
|
/// a token (after a resync). So it will go: |
73 |
|
/// |
74 |
|
/// 1. error occurs |
75 |
|
/// 2. enter recovery mode, report error |
76 |
|
/// 3. consume until token found in resynch set |
77 |
|
/// 4. try to resume parsing |
78 |
|
/// 5. next match() will reset errorRecovery mode |
79 |
|
/// |
80 |
|
/// If you override, make sure to update errorCount if you care about that. |
81 |
|
/// |
82 |
|
/* *** cvc5 NOTE *** |
83 |
|
* This function is has been modified in not-completely-trivial ways from its |
84 |
|
* libantlr3c implementation to support more informative error messages and to |
85 |
|
* invoke the error reporting mechanism of the Input class instead of the |
86 |
|
* default error printer. |
87 |
|
*/ |
88 |
36 |
void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer) { |
89 |
36 |
pANTLR3_EXCEPTION ex = recognizer->state->exception; |
90 |
36 |
pANTLR3_UINT8 * tokenNames = recognizer->state->tokenNames; |
91 |
72 |
stringstream ss; |
92 |
|
|
93 |
|
// Dig the cvc5 objects out of the ANTLR3 mess |
94 |
36 |
pANTLR3_PARSER antlr3Parser = (pANTLR3_PARSER)(recognizer->super); |
95 |
36 |
Assert(antlr3Parser != NULL); |
96 |
36 |
Parser *parser = (Parser*)(antlr3Parser->super); |
97 |
36 |
Assert(parser != NULL); |
98 |
36 |
AntlrInput *input = (AntlrInput*) parser->getInput() ; |
99 |
36 |
Assert(input != NULL); |
100 |
|
|
101 |
|
// Signal we are in error recovery now |
102 |
36 |
recognizer->state->errorRecovery = ANTLR3_TRUE; |
103 |
|
|
104 |
|
// Indicate this recognizer had an error while processing. |
105 |
36 |
recognizer->state->errorCount++; |
106 |
|
|
107 |
|
// Call the builtin error formatter |
108 |
|
// recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames); |
109 |
|
|
110 |
|
/* TODO: Make error messages more useful, maybe by including more expected tokens and information |
111 |
|
* about the current token. */ |
112 |
36 |
switch(ex->type) { |
113 |
|
case ANTLR3_UNWANTED_TOKEN_EXCEPTION: |
114 |
|
|
115 |
|
// Indicates that the recognizer was fed a token which seems to be |
116 |
|
// spurious input. We can detect this when the token that follows |
117 |
|
// this unwanted token would normally be part of the syntactically |
118 |
|
// correct stream. Then we can see that the token we are looking at |
119 |
|
// is just something that should not be there and throw this exception. |
120 |
|
// |
121 |
|
if(tokenNames == NULL) { |
122 |
|
ss << "Unexpected token." ; |
123 |
|
} else { |
124 |
|
if(ex->expecting == ANTLR3_TOKEN_EOF) { |
125 |
|
ss << "Expected end of file."; |
126 |
|
} else { |
127 |
|
ss << "Expected " << tokenNames[ex->expecting] |
128 |
|
<< ", found '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'."; |
129 |
|
} |
130 |
|
} |
131 |
|
break; |
132 |
|
|
133 |
12 |
case ANTLR3_MISSING_TOKEN_EXCEPTION: |
134 |
|
|
135 |
|
// Indicates that the recognizer detected that the token we just |
136 |
|
// hit would be valid syntactically if preceded by a particular |
137 |
|
// token. Perhaps a missing ';' at line end or a missing ',' in an |
138 |
|
// expression list, and such like. |
139 |
|
// |
140 |
12 |
if(tokenNames == NULL) { |
141 |
|
ss << "Missing token (" << ex->expecting << ")."; |
142 |
|
} else { |
143 |
12 |
if(ex->expecting == ANTLR3_TOKEN_EOF) { |
144 |
|
ss << "Missing end of file marker."; |
145 |
12 |
} else if( ex->expecting == 0 ) { |
146 |
12 |
ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'."; |
147 |
12 |
if( std::string(tokenText((pANTLR3_COMMON_TOKEN)ex->token)) == std::string("IN") ) { |
148 |
|
ss << " Did you mean: `IS_IN'?"; |
149 |
|
} |
150 |
|
} else { |
151 |
|
ss << "Missing " << tokenNames[ex->expecting] << "."; |
152 |
|
} |
153 |
|
} |
154 |
12 |
break; |
155 |
|
|
156 |
|
case ANTLR3_RECOGNITION_EXCEPTION: |
157 |
|
|
158 |
|
// Indicates that the recognizer received a token |
159 |
|
// in the input that was not predicted. This is the basic exception type |
160 |
|
// from which all others are derived. So we assume it was a syntax error. |
161 |
|
// You may get this if there are not more tokens and more are needed |
162 |
|
// to complete a parse for instance. |
163 |
|
// |
164 |
|
ss <<"Syntax error."; |
165 |
|
break; |
166 |
|
|
167 |
4 |
case ANTLR3_MISMATCHED_TOKEN_EXCEPTION: |
168 |
|
|
169 |
|
// We were expecting to see one thing and got another. This is the |
170 |
|
// most common error if we could not detect a missing or unwanted token. |
171 |
|
// Here you can spend your efforts to |
172 |
|
// derive more useful error messages based on the expected |
173 |
|
// token set and the last token and so on. The error following |
174 |
|
// bitmaps do a good job of reducing the set that we were looking |
175 |
|
// for down to something small. Knowing what you are parsing may be |
176 |
|
// able to allow you to be even more specific about an error. |
177 |
|
// |
178 |
4 |
if(tokenNames == NULL) { |
179 |
|
ss << "Syntax error."; |
180 |
|
} else { |
181 |
4 |
if(ex->expecting == ANTLR3_TOKEN_EOF) { |
182 |
|
ss << "Expected end of file."; |
183 |
4 |
} else if( ex->expecting == 0 ) { |
184 |
4 |
ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'."; |
185 |
|
} else { |
186 |
|
ss << "Expected " << tokenNames[ex->expecting] << "."; |
187 |
|
} |
188 |
|
} |
189 |
4 |
break; |
190 |
|
|
191 |
18 |
case ANTLR3_NO_VIABLE_ALT_EXCEPTION: |
192 |
|
// We could not pick any alt decision from the input given |
193 |
|
// so god knows what happened - however when you examine your grammar, |
194 |
|
// you should. It means that at the point where the current token occurred |
195 |
|
// that the DFA indicates nowhere to go from here. |
196 |
|
// |
197 |
18 |
ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'."; |
198 |
18 |
break; |
199 |
|
|
200 |
|
case ANTLR3_MISMATCHED_SET_EXCEPTION: |
201 |
|
|
202 |
|
{ |
203 |
|
ANTLR3_UINT32 count; |
204 |
|
ANTLR3_UINT32 bit; |
205 |
|
ANTLR3_UINT32 size; |
206 |
|
ANTLR3_UINT32 numbits; |
207 |
|
pANTLR3_BITSET errBits; |
208 |
|
|
209 |
|
// This means we were able to deal with one of a set of |
210 |
|
// possible tokens at this point, but we did not see any |
211 |
|
// member of that set. |
212 |
|
// |
213 |
|
ss << "Unexpected input: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) |
214 |
|
<< "'. Expected one of: "; |
215 |
|
|
216 |
|
// What tokens could we have accepted at this point in the |
217 |
|
// parse? |
218 |
|
// |
219 |
|
count = 0; |
220 |
|
errBits = antlr3BitsetLoad(ex->expectingSet); |
221 |
|
numbits = errBits->numBits(errBits); |
222 |
|
size = errBits->size(errBits); |
223 |
|
|
224 |
|
if(size > 0) { |
225 |
|
// However many tokens we could have dealt with here, it is usually |
226 |
|
// not useful to print ALL of the set here. I arbitrarily chose 8 |
227 |
|
// here, but you should do whatever makes sense for you of course. |
228 |
|
// No token number 0, so look for bit 1 and on. |
229 |
|
// |
230 |
|
for(bit = 1; bit < numbits && count < 8 && count < size; bit++) { |
231 |
|
// TODO: This doesn;t look right - should be asking if the bit is set!! |
232 |
|
// |
233 |
|
if(tokenNames[bit]) { |
234 |
|
if( count++ > 0 ) { |
235 |
|
ss << ", "; |
236 |
|
} |
237 |
|
ss << tokenNames[bit]; |
238 |
|
} |
239 |
|
} |
240 |
|
} else { |
241 |
|
Assert(false); //("Parse error with empty set of expected tokens."); |
242 |
|
} |
243 |
|
} |
244 |
|
break; |
245 |
|
|
246 |
2 |
case ANTLR3_EARLY_EXIT_EXCEPTION: |
247 |
|
|
248 |
|
// We entered a loop requiring a number of token sequences |
249 |
|
// but found a token that ended that sequence earlier than |
250 |
|
// we should have done. |
251 |
|
// |
252 |
|
ss << "Sequence terminated early by token: '" |
253 |
2 |
<< tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'."; |
254 |
2 |
break; |
255 |
|
|
256 |
|
default: |
257 |
|
|
258 |
|
// We don't handle any other exceptions here, but you can |
259 |
|
// if you wish. If we get an exception that hits this point |
260 |
|
// then we are just going to report what we know about the |
261 |
|
// token. |
262 |
|
// |
263 |
|
Assert(false); //("Unexpected exception in parser."); |
264 |
|
break; |
265 |
|
} |
266 |
|
|
267 |
|
// Call the error display routine |
268 |
72 |
input->parseError(ss.str(), ((pANTLR3_COMMON_TOKEN)ex->token)->type == ANTLR3_TOKEN_EOF); |
269 |
|
} |
270 |
|
|
271 |
|
/// |
272 |
|
/// \brief |
273 |
|
/// Returns the next available token from the current input stream. |
274 |
|
/// |
275 |
|
/// \param toksource |
276 |
|
/// Points to the implementation of a token source. The lexer is |
277 |
|
/// addressed by the super structure pointer. |
278 |
|
/// |
279 |
|
/// \returns |
280 |
|
/// The next token in the current input stream or the EOF token |
281 |
|
/// if there are no more tokens. |
282 |
|
/// |
283 |
|
/// \remarks |
284 |
|
/// Write remarks for nextToken here. |
285 |
|
/// |
286 |
|
/// \see nextToken |
287 |
|
/// |
288 |
|
/* *** cvc5 NOTE *** |
289 |
|
* This is copied, largely unmodified, from antlr3lexer.c |
290 |
|
* |
291 |
|
*/ |
292 |
|
pANTLR3_COMMON_TOKEN |
293 |
17531766 |
AntlrInput::nextTokenStr (pANTLR3_TOKEN_SOURCE toksource) |
294 |
|
{ |
295 |
|
pANTLR3_LEXER lexer; |
296 |
|
|
297 |
17531766 |
lexer = (pANTLR3_LEXER)(toksource->super); |
298 |
|
|
299 |
|
/// Loop until we get a non skipped token or EOF |
300 |
|
/// |
301 |
|
for (;;) |
302 |
|
{ |
303 |
|
// Get rid of any previous token (token factory takes care of |
304 |
|
// any de-allocation when this token is finally used up. |
305 |
|
// |
306 |
17531766 |
lexer->rec->state->token = NULL; |
307 |
17531766 |
lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception |
308 |
17531766 |
lexer->rec->state->failed = ANTLR3_FALSE; |
309 |
|
|
310 |
|
// Now call the matching rules and see if we can generate a new token |
311 |
|
// |
312 |
|
for (;;) |
313 |
|
{ |
314 |
|
// Record the start of the token in our input stream. |
315 |
|
// |
316 |
26620708 |
lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; |
317 |
26620708 |
lexer->rec->state->tokenStartCharIndex = lexer->input->istream->index(lexer->input->istream); |
318 |
26620708 |
lexer->rec->state->tokenStartCharPositionInLine = lexer->input->getCharPositionInLine(lexer->input); |
319 |
26620708 |
lexer->rec->state->tokenStartLine = lexer->input->getLine(lexer->input); |
320 |
26620708 |
lexer->rec->state->text = NULL; |
321 |
|
|
322 |
26620708 |
if (lexer->input->istream->_LA(lexer->input->istream, 1) == ANTLR3_CHARSTREAM_EOF) |
323 |
|
{ |
324 |
|
// Reached the end of the current stream, nothing more to do if this is |
325 |
|
// the last in the stack. |
326 |
|
// |
327 |
5584 |
pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken); |
328 |
|
|
329 |
5584 |
teof->setStartIndex (teof, lexer->getCharIndex(lexer)); |
330 |
5584 |
teof->setStopIndex (teof, lexer->getCharIndex(lexer)); |
331 |
5584 |
teof->setLine (teof, lexer->getLine(lexer)); |
332 |
5584 |
teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it |
333 |
5584 |
return teof; |
334 |
|
} |
335 |
|
|
336 |
26615124 |
lexer->rec->state->token = NULL; |
337 |
26615124 |
lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception |
338 |
26615124 |
lexer->rec->state->failed = ANTLR3_FALSE; |
339 |
|
|
340 |
|
// Call the generated lexer, see if it can get a new token together. |
341 |
|
// |
342 |
26615124 |
lexer->mTokens(lexer->ctx); |
343 |
|
|
344 |
26615124 |
if (lexer->rec->state->error == ANTLR3_TRUE) |
345 |
|
{ |
346 |
|
// Recognition exception, report it and try to recover. |
347 |
|
// |
348 |
12 |
lexer->rec->state->failed = ANTLR3_TRUE; |
349 |
|
// *** cvc5 EDIT: Just call the AntlrInput error routine |
350 |
12 |
lexerError(lexer->rec); |
351 |
|
lexer->recover(lexer); |
352 |
|
} |
353 |
|
else |
354 |
|
{ |
355 |
26615112 |
if (lexer->rec->state->token == NULL) |
356 |
|
{ |
357 |
|
// Emit the real token, which adds it in to the token stream basically |
358 |
|
// |
359 |
|
// *** cvc5 Edit: call emit on the lexer object |
360 |
17526170 |
lexer->emit(lexer); |
361 |
|
} |
362 |
9088942 |
else if (lexer->rec->state->token == &(toksource->skipToken)) |
363 |
|
{ |
364 |
|
// A real token could have been generated, but "Computer say's naaaaah" and it |
365 |
|
// it is just something we need to skip altogether. |
366 |
|
// |
367 |
9088942 |
continue; |
368 |
|
} |
369 |
|
|
370 |
|
// Good token, not skipped, not EOF token |
371 |
|
// |
372 |
17526170 |
return lexer->rec->state->token; |
373 |
|
} |
374 |
9088942 |
} |
375 |
|
} |
376 |
|
} |
377 |
|
|
378 |
|
/* *** cvc5 NOTE *** |
379 |
|
* This is copied, totaly unmodified, from antlr3lexer.c |
380 |
|
* in order to use nextTokenStr previously defined. |
381 |
|
* |
382 |
|
*/ |
383 |
|
pANTLR3_COMMON_TOKEN |
384 |
17531759 |
AntlrInput::nextToken (pANTLR3_TOKEN_SOURCE toksource) |
385 |
|
{ |
386 |
|
pANTLR3_COMMON_TOKEN tok; |
387 |
|
|
388 |
|
// Find the next token in the current stream |
389 |
|
// |
390 |
17531759 |
tok = nextTokenStr(toksource); |
391 |
|
|
392 |
|
// If we got to the EOF token then switch to the previous |
393 |
|
// input stream if there were any and just return the |
394 |
|
// EOF if there are none. We must check the next token |
395 |
|
// in any outstanding input stream we pop into the active |
396 |
|
// role to see if it was sitting at EOF after PUSHing the |
397 |
|
// stream we just consumed, otherwise we will return EOF |
398 |
|
// on the reinstalled input stream, when in actual fact |
399 |
|
// there might be more input streams to POP before the |
400 |
|
// real EOF of the whole logical inptu stream. Hence we |
401 |
|
// use a while loop here until we find somethign in the stream |
402 |
|
// that isn't EOF or we reach the actual end of the last input |
403 |
|
// stream on the stack. |
404 |
|
// |
405 |
17531761 |
while (tok->type == ANTLR3_TOKEN_EOF) |
406 |
|
{ |
407 |
|
pANTLR3_LEXER lexer; |
408 |
|
|
409 |
5584 |
lexer = (pANTLR3_LEXER)(toksource->super); |
410 |
|
|
411 |
5584 |
if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0) |
412 |
|
{ |
413 |
|
// We have another input stream in the stack so we |
414 |
|
// need to revert to it, then resume the loop to check |
415 |
|
// it wasn't sitting at EOF itself. |
416 |
|
// |
417 |
7 |
lexer->popCharStream(lexer); |
418 |
7 |
tok = nextTokenStr(toksource); |
419 |
|
} |
420 |
|
else |
421 |
|
{ |
422 |
|
// There were no more streams on the input stack |
423 |
|
// so this EOF is the 'real' logical EOF for |
424 |
|
// the input stream. So we just exit the loop and |
425 |
|
// return the EOF we have found. |
426 |
|
// |
427 |
5577 |
break; |
428 |
|
} |
429 |
|
|
430 |
|
} |
431 |
|
|
432 |
|
// return whatever token we have, which may be EOF |
433 |
|
// |
434 |
17531747 |
return tok; |
435 |
|
} |
436 |
|
|
437 |
|
|
438 |
|
|
439 |
|
} // namespace parser |
440 |
29496 |
} // namespace cvc5 |