1 |
|
/****************************************************************************** |
2 |
|
* Top contributors (to current version): |
3 |
|
* Christopher L. Conway, Tim King, Morgan Deters |
4 |
|
* |
5 |
|
* This file is part of the cvc5 project. |
6 |
|
* |
7 |
|
* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS |
8 |
|
* in the top-level source directory and their institutional affiliations. |
9 |
|
* All rights reserved. See the file COPYING in the top-level source |
10 |
|
* directory for licensing information. |
11 |
|
* **************************************************************************** |
12 |
|
* |
13 |
|
* Base for ANTLR parser classes. |
14 |
|
*/ |
15 |
|
|
16 |
|
#ifndef CVC5__PARSER__ANTLR_INPUT_H |
17 |
|
#define CVC5__PARSER__ANTLR_INPUT_H |
18 |
|
|
19 |
|
#include <antlr3.h> |
20 |
|
|
21 |
|
#include <iostream> |
22 |
|
#include <sstream> |
23 |
|
#include <stdexcept> |
24 |
|
#include <string> |
25 |
|
#include <vector> |
26 |
|
|
27 |
|
#include "base/check.h" |
28 |
|
#include "base/output.h" |
29 |
|
#include "cvc5parser_private.h" |
30 |
|
#include "parser/bounded_token_buffer.h" |
31 |
|
#include "parser/input.h" |
32 |
|
#include "parser/line_buffer.h" |
33 |
|
#include "parser/parser_exception.h" |
34 |
|
|
35 |
|
namespace cvc5 { |
36 |
|
namespace parser { |
37 |
|
|
38 |
|
/** Wrapper around an ANTLR3 input stream. */ |
39 |
|
class AntlrInputStream : public InputStream { |
40 |
|
private: |
41 |
|
pANTLR3_INPUT_STREAM d_input; |
42 |
|
|
43 |
|
/** |
44 |
|
* If the AntlrInputStream corresponds to reading from a string, |
45 |
|
* this is the string literal. The memory is owned by the Antlr3Input. It is |
46 |
|
* assumed to be copied from malloc, and can be free'd at destruction time. |
47 |
|
* It is otherwise NULL. |
48 |
|
*/ |
49 |
|
pANTLR3_UINT8 d_inputString; |
50 |
|
|
51 |
|
LineBuffer* d_line_buffer; |
52 |
|
|
53 |
|
AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input, |
54 |
|
bool fileIsTemporary, pANTLR3_UINT8 inputString, |
55 |
|
LineBuffer* line_buffer); |
56 |
|
|
57 |
|
/* This is private and unimplemented, because you should never use it. */ |
58 |
|
AntlrInputStream(const AntlrInputStream& inputStream) = delete; |
59 |
|
|
60 |
|
/* This is private and unimplemented, because you should never use it. */ |
61 |
|
AntlrInputStream& operator=(const AntlrInputStream& inputStream) = delete; |
62 |
|
|
63 |
|
public: |
64 |
|
|
65 |
|
virtual ~AntlrInputStream(); |
66 |
|
|
67 |
|
pANTLR3_INPUT_STREAM getAntlr3InputStream() const; |
68 |
|
|
69 |
|
/** Create a file input. |
70 |
|
* |
71 |
|
* @param name the path of the file to read |
72 |
|
* @param useMmap <code>true</code> if the input should use memory-mapped I/O; otherwise, the |
73 |
|
* input will use the standard ANTLR3 I/O implementation. |
74 |
|
*/ |
75 |
|
static AntlrInputStream* newFileInputStream(const std::string& name, |
76 |
|
bool useMmap = false); |
77 |
|
|
78 |
|
/** Create an input from an istream. */ |
79 |
|
static AntlrInputStream* newStreamInputStream(std::istream& input, |
80 |
|
const std::string& name); |
81 |
|
|
82 |
|
/** Create a string input. |
83 |
|
* NOTE: the new AntlrInputStream will take ownership of input over |
84 |
|
* and free it at destruction time. |
85 |
|
* |
86 |
|
* @param input the string to read |
87 |
|
* @param name the "filename" to use when reporting errors |
88 |
|
*/ |
89 |
|
static AntlrInputStream* newStringInputStream(const std::string& input, |
90 |
|
const std::string& name); |
91 |
|
};/* class AntlrInputStream */ |
92 |
|
|
93 |
|
class Parser; |
94 |
|
|
95 |
|
/** |
96 |
|
* An input to be parsed. The static factory methods in this class (e.g., |
97 |
|
* <code>newFileInput</code>, <code>newStringInput</code>) create a parser |
98 |
|
* for the given input language and attach it to an input source of the |
99 |
|
* appropriate type. |
100 |
|
*/ |
101 |
|
class AntlrInput : public Input { |
102 |
|
/** The token lookahead used to lex and parse the input. This should usually be equal to |
103 |
|
* <code>K</code> for an LL(k) grammar. */ |
104 |
|
unsigned int d_lookahead; |
105 |
|
|
106 |
|
/** The ANTLR3 lexer associated with this input. This will be <code>NULL</code> initially. It |
107 |
|
* must be set by a call to <code>setLexer</code>, preferably in the subclass constructor. */ |
108 |
|
pANTLR3_LEXER d_lexer; |
109 |
|
|
110 |
|
/** The ANTLR3 parser associated with this input. This will be <code>NULL</code> initially. It |
111 |
|
* must be set by a call to <code>setParser</code>, preferably in the subclass constructor. |
112 |
|
* The <code>super</code> field of <code>d_parser</code> will be set to <code>this</code> and |
113 |
|
* <code>reportError</code> will be set to <code>Input::reportError</code>. */ |
114 |
|
pANTLR3_PARSER d_parser; |
115 |
|
|
116 |
|
/** The ANTLR3 input stream associated with this input. */ |
117 |
|
pANTLR3_INPUT_STREAM d_antlr3InputStream; |
118 |
|
|
119 |
|
/** The ANTLR3 bounded token buffer associated with this input. |
120 |
|
* We only need this so we can free it on exit. |
121 |
|
* This is set by <code>setLexer</code>. |
122 |
|
* NOTE: We assume that we <em>can</em> free it on exit. No sharing! */ |
123 |
|
pBOUNDED_TOKEN_BUFFER d_tokenBuffer; |
124 |
|
|
125 |
|
/** Turns an ANTLR3 exception into a message for the user and calls <code>parseError</code>. */ |
126 |
|
static void reportError(pANTLR3_BASE_RECOGNIZER recognizer); |
127 |
|
|
128 |
|
/** Builds a message for a lexer error and calls <code>parseError</code>. */ |
129 |
|
static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer); |
130 |
|
|
131 |
|
/** Returns the next available lexer token from the current input stream. */ |
132 |
|
/* - auxillary function */ |
133 |
|
static pANTLR3_COMMON_TOKEN |
134 |
|
nextTokenStr (pANTLR3_TOKEN_SOURCE toksource); |
135 |
|
/* - main function */ |
136 |
|
static pANTLR3_COMMON_TOKEN |
137 |
|
nextToken (pANTLR3_TOKEN_SOURCE toksource); |
138 |
|
|
139 |
|
/* Since we own d_tokenStream and it needs to be freed, we need to prevent |
140 |
|
* copy construction and assignment. |
141 |
|
*/ |
142 |
|
AntlrInput(const AntlrInput& input); |
143 |
|
AntlrInput& operator=(const AntlrInput& input); |
144 |
|
|
145 |
|
public: |
146 |
|
|
147 |
|
/** Destructor. Frees the token stream and closes the input. */ |
148 |
|
virtual ~AntlrInput(); |
149 |
|
|
150 |
|
/** Create an input for the given AntlrInputStream. |
151 |
|
* NOTE: the new Input will take ownership of the input stream and delete it |
152 |
|
* at destruction time. |
153 |
|
* |
154 |
|
* @param lang the input language |
155 |
|
* @param inputStream the input stream |
156 |
|
* |
157 |
|
* */ |
158 |
|
static AntlrInput* newInput(InputLanguage lang, AntlrInputStream& inputStream); |
159 |
|
|
160 |
|
/** Retrieve the text associated with a token. */ |
161 |
|
static std::string tokenText(pANTLR3_COMMON_TOKEN token); |
162 |
|
|
163 |
|
/** Retrieve a substring of the text associated with a token. |
164 |
|
* |
165 |
|
* @param token the token |
166 |
|
* @param index the index of the starting character of the substring |
167 |
|
* @param n the size of the substring. If <code>n</code> is 0, then all of the |
168 |
|
* characters up to the end of the token text will be included. If <code>n</code> |
169 |
|
* would make the substring span past the end of the token text, only those |
170 |
|
* characters up to the end of the token text will be included. |
171 |
|
*/ |
172 |
|
static std::string tokenTextSubstr(pANTLR3_COMMON_TOKEN token, size_t index, size_t n = 0); |
173 |
|
|
174 |
|
/** Retrieve an unsigned from the text of a token */ |
175 |
|
static unsigned tokenToUnsigned( pANTLR3_COMMON_TOKEN token ); |
176 |
|
|
177 |
|
/** Get the ANTLR3 lexer for this input. */ |
178 |
7 |
pANTLR3_LEXER getAntlr3Lexer() { return d_lexer; } |
179 |
|
|
180 |
|
pANTLR3_INPUT_STREAM getAntlr3InputStream() { return d_antlr3InputStream; } |
181 |
|
protected: |
182 |
|
/** Create an input. This input takes ownership of the given input stream, |
183 |
|
* and will delete it at destruction time. |
184 |
|
* |
185 |
|
* @param inputStream the input stream to use |
186 |
|
* @param lookahead the lookahead needed to parse the input (i.e., k for |
187 |
|
* an LL(k) grammar) |
188 |
|
*/ |
189 |
|
AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead); |
190 |
|
|
191 |
|
/** Retrieve the token stream for this parser. Must not be called before |
192 |
|
* <code>setLexer()</code>. */ |
193 |
|
pANTLR3_COMMON_TOKEN_STREAM getTokenStream(); |
194 |
|
|
195 |
|
/** |
196 |
|
* Issue a non-fatal warning to the user with file, line, and column info. |
197 |
|
*/ |
198 |
|
void warning(const std::string& msg) override; |
199 |
|
|
200 |
|
/** |
201 |
|
* Throws a <code>ParserException</code> with the given message. |
202 |
|
*/ |
203 |
|
void parseError(const std::string& msg, bool eofException = false) override; |
204 |
|
|
205 |
|
/** Set the ANTLR3 lexer for this input. */ |
206 |
|
void setAntlr3Lexer(pANTLR3_LEXER pLexer); |
207 |
|
|
208 |
|
/** Set the ANTLR3 parser implementation for this input. */ |
209 |
|
void setAntlr3Parser(pANTLR3_PARSER pParser); |
210 |
|
|
211 |
|
/** Set the Parser object for this input. */ |
212 |
|
void setParser(Parser& parser) override; |
213 |
|
};/* class AntlrInput */ |
214 |
|
|
215 |
12286375 |
inline std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) { |
216 |
12286375 |
if( token->type == ANTLR3_TOKEN_EOF ) { |
217 |
12 |
return "<<EOF>>"; |
218 |
|
} |
219 |
|
|
220 |
12286363 |
ANTLR3_MARKER start = token->getStartIndex(token); |
221 |
12286363 |
ANTLR3_MARKER end = token->getStopIndex(token); |
222 |
|
/* start and end are boundary pointers. The text is a string |
223 |
|
* of (end-start+1) bytes beginning at start. */ |
224 |
24572726 |
std::string txt( (const char *)start, end-start+1 ); |
225 |
24572726 |
Debug("parser-extra") << "tokenText: start=" << start << std::endl |
226 |
12286363 |
<< "end=" << end << std::endl |
227 |
12286363 |
<< "txt='" << txt << "'" << std::endl; |
228 |
12286363 |
return txt; |
229 |
|
} |
230 |
|
|
231 |
5633 |
inline std::string AntlrInput::tokenTextSubstr(pANTLR3_COMMON_TOKEN token, |
232 |
|
size_t index, |
233 |
|
size_t n) { |
234 |
|
|
235 |
5633 |
ANTLR3_MARKER start = token->getStartIndex(token); |
236 |
|
// Its the last character of the token (not the one just after) |
237 |
5633 |
ANTLR3_MARKER end = token->getStopIndex(token); |
238 |
5633 |
Assert(start < end); |
239 |
5633 |
if( index > (size_t) end - start ) { |
240 |
|
std::stringstream ss; |
241 |
|
ss << "Out-of-bounds substring index: " << index; |
242 |
|
throw std::invalid_argument(ss.str()); |
243 |
|
} |
244 |
5633 |
start += index; |
245 |
5633 |
if( n==0 || n > (size_t) end - start ) { |
246 |
5628 |
return std::string( (const char *)start, end-start+1 ); |
247 |
|
} else { |
248 |
5 |
return std::string( (const char *)start, n ); |
249 |
|
} |
250 |
|
} |
251 |
|
|
252 |
204944 |
inline unsigned AntlrInput::tokenToUnsigned(pANTLR3_COMMON_TOKEN token) { |
253 |
|
unsigned result; |
254 |
409888 |
std::stringstream ss; |
255 |
204944 |
ss << tokenText(token); |
256 |
204944 |
ss >> result; |
257 |
409888 |
return result; |
258 |
|
} |
259 |
|
|
260 |
|
} // namespace parser |
261 |
|
} // namespace cvc5 |
262 |
|
|
263 |
|
#endif /* CVC5__PARSER__ANTLR_INPUT_H */ |