GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/parser/antlr_input.h Lines: 27 32 84.4 %
Date: 2021-03-22 Branches: 31 82 37.8 %

Line Exec Source
1
/*********************                                                        */
2
/*! \file antlr_input.h
3
 ** \verbatim
4
 ** Top contributors (to current version):
5
 **   Christopher L. Conway, Tim King, Morgan Deters
6
 ** This file is part of the CVC4 project.
7
 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 ** in the top-level source directory and their institutional affiliations.
9
 ** All rights reserved.  See the file COPYING in the top-level source
10
 ** directory for licensing information.\endverbatim
11
 **
12
 ** \brief Base for ANTLR parser classes.
13
 **
14
 ** Base for ANTLR parser classes.
15
 **/
16
17
#ifndef CVC4__PARSER__ANTLR_INPUT_H
18
#define CVC4__PARSER__ANTLR_INPUT_H
19
20
#include <antlr3.h>
21
22
#include <iostream>
23
#include <sstream>
24
#include <stdexcept>
25
#include <string>
26
#include <vector>
27
28
#include "base/check.h"
29
#include "base/output.h"
30
#include "cvc4parser_private.h"
31
#include "parser/bounded_token_buffer.h"
32
#include "parser/input.h"
33
#include "parser/line_buffer.h"
34
#include "parser/parser_exception.h"
35
#include "util/bitvector.h"
36
#include "util/integer.h"
37
#include "util/rational.h"
38
39
namespace CVC4 {
40
namespace parser {
41
42
/** Wrapper around an ANTLR3 input stream. */
43
class AntlrInputStream : public InputStream {
44
private:
45
  pANTLR3_INPUT_STREAM d_input;
46
47
  /**
48
   * If the AntlrInputStream corresponds to reading from a string,
49
   * this is the string literal. The memory is owned by the Antlr3Input. It is
50
   * assumed to be copied from malloc, and can be free'd at destruction time.
51
   * It is otherwise NULL.
52
   */
53
  pANTLR3_UINT8 d_inputString;
54
55
  LineBuffer* d_line_buffer;
56
57
  AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
58
                   bool fileIsTemporary, pANTLR3_UINT8 inputString,
59
                   LineBuffer* line_buffer);
60
61
  /* This is private and unimplemented, because you should never use it. */
62
  AntlrInputStream(const AntlrInputStream& inputStream) = delete;
63
64
  /* This is private and unimplemented, because you should never use it. */
65
  AntlrInputStream& operator=(const AntlrInputStream& inputStream) = delete;
66
67
public:
68
69
  virtual ~AntlrInputStream();
70
71
  pANTLR3_INPUT_STREAM getAntlr3InputStream() const;
72
73
  /** Create a file input.
74
   *
75
   * @param name the path of the file to read
76
   * @param useMmap <code>true</code> if the input should use memory-mapped I/O; otherwise, the
77
   * input will use the standard ANTLR3 I/O implementation.
78
   */
79
  static AntlrInputStream* newFileInputStream(const std::string& name,
80
                                              bool useMmap = false);
81
82
  /** Create an input from an istream. */
83
  static AntlrInputStream* newStreamInputStream(std::istream& input,
84
                                                const std::string& name,
85
                                                bool lineBuffered = false);
86
87
  /** Create a string input.
88
   * NOTE: the new AntlrInputStream will take ownership of input over
89
   * and free it at destruction time.
90
   *
91
   * @param input the string to read
92
   * @param name the "filename" to use when reporting errors
93
   */
94
  static AntlrInputStream* newStringInputStream(const std::string& input,
95
                                                const std::string& name);
96
};/* class AntlrInputStream */
97
98
class Parser;
99
100
/**
101
 * An input to be parsed. The static factory methods in this class (e.g.,
102
 * <code>newFileInput</code>, <code>newStringInput</code>) create a parser
103
 * for the given input language and attach it to an input source of the
104
 * appropriate type.
105
 */
106
class AntlrInput : public Input {
107
  /** The token lookahead used to lex and parse the input. This should usually be equal to
108
   * <code>K</code> for an LL(k) grammar. */
109
  unsigned int d_lookahead;
110
111
  /** The ANTLR3 lexer associated with this input. This will be <code>NULL</code> initially. It
112
   *  must be set by a call to <code>setLexer</code>, preferably in the subclass constructor. */
113
  pANTLR3_LEXER d_lexer;
114
115
  /** The ANTLR3 parser associated with this input. This will be <code>NULL</code> initially. It
116
   *  must be set by a call to <code>setParser</code>, preferably in the subclass constructor.
117
   *  The <code>super</code> field of <code>d_parser</code> will be set to <code>this</code> and
118
   *  <code>reportError</code> will be set to <code>Input::reportError</code>. */
119
  pANTLR3_PARSER d_parser;
120
121
  /** The ANTLR3 input stream associated with this input. */
122
  pANTLR3_INPUT_STREAM d_antlr3InputStream;
123
124
  /** The ANTLR3 bounded token buffer associated with this input.
125
   *  We only need this so we can free it on exit.
126
   *  This is set by <code>setLexer</code>.
127
   *  NOTE: We assume that we <em>can</em> free it on exit. No sharing! */
128
  pBOUNDED_TOKEN_BUFFER d_tokenBuffer;
129
130
  /** Turns an ANTLR3 exception into a message for the user and calls <code>parseError</code>. */
131
  static void reportError(pANTLR3_BASE_RECOGNIZER recognizer);
132
133
  /** Builds a message for a lexer error and calls <code>parseError</code>. */
134
  static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer);
135
136
  /** Returns the next available lexer token from the current input stream. */
137
  /* - auxillary function */
138
  static pANTLR3_COMMON_TOKEN
139
  nextTokenStr (pANTLR3_TOKEN_SOURCE toksource);
140
  /* - main function */
141
  static pANTLR3_COMMON_TOKEN
142
  nextToken (pANTLR3_TOKEN_SOURCE toksource);
143
144
  /* Since we own d_tokenStream and it needs to be freed, we need to prevent
145
   * copy construction and assignment.
146
   */
147
  AntlrInput(const AntlrInput& input);
148
  AntlrInput& operator=(const AntlrInput& input);
149
150
public:
151
152
  /** Destructor. Frees the token stream and closes the input. */
153
  virtual ~AntlrInput();
154
155
  /** Create an input for the given AntlrInputStream.
156
   * NOTE: the new Input will take ownership of the input stream and delete it
157
   * at destruction time.
158
   *
159
   * @param lang the input language
160
   * @param inputStream the input stream
161
   *
162
   * */
163
  static AntlrInput* newInput(InputLanguage lang, AntlrInputStream& inputStream);
164
165
  /** Retrieve the text associated with a token. */
166
  static std::string tokenText(pANTLR3_COMMON_TOKEN token);
167
168
  /** Retrieve a substring of the text associated with a token.
169
   *
170
   * @param token the token
171
   * @param index the index of the starting character of the substring
172
   * @param n the size of the substring. If <code>n</code> is 0, then all of the
173
   * characters up to the end of the token text will be included. If <code>n</code>
174
   * would make the substring span past the end of the token text, only those
175
   * characters up to the end of the token text will be included.
176
   */
177
  static std::string tokenTextSubstr(pANTLR3_COMMON_TOKEN token, size_t index, size_t n = 0);
178
179
  /** Retrieve an unsigned from the text of a token */
180
  static unsigned tokenToUnsigned( pANTLR3_COMMON_TOKEN token );
181
182
  /** Retrieve an Integer from the text of a token */
183
  static Rational tokenToInteger( pANTLR3_COMMON_TOKEN token );
184
185
  /** Retrieve a Rational from the text of a token */
186
  static Rational tokenToRational(pANTLR3_COMMON_TOKEN token);
187
188
  /** Get a bitvector constant from the text of the number and the size token */
189
  static BitVector tokenToBitvector(pANTLR3_COMMON_TOKEN number, pANTLR3_COMMON_TOKEN size);
190
191
  /** Get the ANTLR3 lexer for this input. */
192
7
  pANTLR3_LEXER getAntlr3Lexer() { return d_lexer; }
193
194
  pANTLR3_INPUT_STREAM getAntlr3InputStream() { return d_antlr3InputStream; }
195
protected:
196
  /** Create an input. This input takes ownership of the given input stream,
197
   * and will delete it at destruction time.
198
   *
199
   * @param inputStream the input stream to use
200
   * @param lookahead the lookahead needed to parse the input (i.e., k for
201
   * an LL(k) grammar)
202
   */
203
  AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead);
204
205
  /** Retrieve the token stream for this parser. Must not be called before
206
   * <code>setLexer()</code>. */
207
  pANTLR3_COMMON_TOKEN_STREAM getTokenStream();
208
209
  /**
210
   * Issue a non-fatal warning to the user with file, line, and column info.
211
   */
212
  void warning(const std::string& msg) override;
213
214
  /**
215
   * Throws a <code>ParserException</code> with the given message.
216
   */
217
  void parseError(const std::string& msg, bool eofException = false) override;
218
219
  /** Set the ANTLR3 lexer for this input. */
220
  void setAntlr3Lexer(pANTLR3_LEXER pLexer);
221
222
  /** Set the ANTLR3 parser implementation for this input. */
223
  void setAntlr3Parser(pANTLR3_PARSER pParser);
224
225
  /** Set the Parser object for this input. */
226
  void setParser(Parser& parser) override;
227
};/* class AntlrInput */
228
229
12022749
inline std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) {
230
12022749
  if( token->type == ANTLR3_TOKEN_EOF ) {
231
12
    return "<<EOF>>";
232
  }
233
234
12022737
  ANTLR3_MARKER start = token->getStartIndex(token);
235
12022737
  ANTLR3_MARKER end = token->getStopIndex(token);
236
  /* start and end are boundary pointers. The text is a string
237
   * of (end-start+1) bytes beginning at start. */
238
24045474
  std::string txt( (const char *)start, end-start+1 );
239
24045474
  Debug("parser-extra") << "tokenText: start=" << start << std::endl
240
12022737
                        <<  "end=" << end << std::endl
241
12022737
                        <<  "txt='" << txt << "'" << std::endl;
242
12022737
  return txt;
243
}
244
245
5730
inline std::string AntlrInput::tokenTextSubstr(pANTLR3_COMMON_TOKEN token,
246
                                               size_t index,
247
                                               size_t n) {
248
249
5730
  ANTLR3_MARKER start = token->getStartIndex(token);
250
  // Its the last character of the token (not the one just after)
251
5730
  ANTLR3_MARKER end = token->getStopIndex(token);
252
5730
  Assert(start < end);
253
5730
  if( index > (size_t) end - start ) {
254
    std::stringstream ss;
255
    ss << "Out-of-bounds substring index: " << index;
256
    throw std::invalid_argument(ss.str());
257
  }
258
5730
  start += index;
259
5730
  if( n==0 || n > (size_t) end - start ) {
260
5725
    return std::string( (const char *)start, end-start+1 );
261
  } else {
262
5
    return std::string( (const char *)start, n );
263
  }
264
}
265
266
214305
inline unsigned AntlrInput::tokenToUnsigned(pANTLR3_COMMON_TOKEN token) {
267
  unsigned result;
268
428610
  std::stringstream ss;
269
214305
  ss << tokenText(token);
270
214305
  ss >> result;
271
428610
  return result;
272
}
273
274
inline Rational AntlrInput::tokenToInteger(pANTLR3_COMMON_TOKEN token) {
275
  return Rational( tokenText(token) );
276
}
277
278
3901
inline Rational AntlrInput::tokenToRational(pANTLR3_COMMON_TOKEN token) {
279
3901
  return Rational::fromDecimal( tokenText(token) );
280
}
281
282
inline BitVector AntlrInput::tokenToBitvector(pANTLR3_COMMON_TOKEN number, pANTLR3_COMMON_TOKEN size) {
283
  std::string number_str = tokenTextSubstr(number, 2);
284
  unsigned sz = tokenToUnsigned(size);
285
  Integer val(number_str);
286
  if(val.modByPow2(sz) != val) {
287
    std::stringstream ss;
288
    ss << "Overflow in bitvector construction (specified bitvector size " << sz << " too small to hold value " << tokenText(number) << ")";
289
    throw std::invalid_argument(ss.str());
290
  }
291
  return BitVector(sz, val);
292
}
293
294
}/* CVC4::parser namespace */
295
}/* CVC4 namespace */
296
297
#endif /* CVC4__PARSER__ANTLR_INPUT_H */