GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/parser/antlr_input.h Lines: 25 28 89.3 %
Date: 2021-05-22 Branches: 30 78 38.5 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Christopher L. Conway, Tim King, Morgan Deters
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Base for ANTLR parser classes.
14
 */
15
16
#ifndef CVC5__PARSER__ANTLR_INPUT_H
17
#define CVC5__PARSER__ANTLR_INPUT_H
18
19
#include <antlr3.h>
20
21
#include <iostream>
22
#include <sstream>
23
#include <stdexcept>
24
#include <string>
25
#include <vector>
26
27
#include "base/check.h"
28
#include "base/output.h"
29
#include "cvc5parser_private.h"
30
#include "parser/bounded_token_buffer.h"
31
#include "parser/input.h"
32
#include "parser/line_buffer.h"
33
#include "parser/parser_exception.h"
34
35
namespace cvc5 {
36
namespace parser {
37
38
/** Wrapper around an ANTLR3 input stream. */
39
class AntlrInputStream : public InputStream {
40
private:
41
  pANTLR3_INPUT_STREAM d_input;
42
43
  /**
44
   * If the AntlrInputStream corresponds to reading from a string,
45
   * this is the string literal. The memory is owned by the Antlr3Input. It is
46
   * assumed to be copied from malloc, and can be free'd at destruction time.
47
   * It is otherwise NULL.
48
   */
49
  pANTLR3_UINT8 d_inputString;
50
51
  LineBuffer* d_line_buffer;
52
53
  AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
54
                   bool fileIsTemporary, pANTLR3_UINT8 inputString,
55
                   LineBuffer* line_buffer);
56
57
  /* This is private and unimplemented, because you should never use it. */
58
  AntlrInputStream(const AntlrInputStream& inputStream) = delete;
59
60
  /* This is private and unimplemented, because you should never use it. */
61
  AntlrInputStream& operator=(const AntlrInputStream& inputStream) = delete;
62
63
public:
64
65
  virtual ~AntlrInputStream();
66
67
  pANTLR3_INPUT_STREAM getAntlr3InputStream() const;
68
69
  /** Create a file input.
70
   *
71
   * @param name the path of the file to read
72
   * @param useMmap <code>true</code> if the input should use memory-mapped I/O; otherwise, the
73
   * input will use the standard ANTLR3 I/O implementation.
74
   */
75
  static AntlrInputStream* newFileInputStream(const std::string& name,
76
                                              bool useMmap = false);
77
78
  /** Create an input from an istream. */
79
  static AntlrInputStream* newStreamInputStream(std::istream& input,
80
                                                const std::string& name);
81
82
  /** Create a string input.
83
   * NOTE: the new AntlrInputStream will take ownership of input over
84
   * and free it at destruction time.
85
   *
86
   * @param input the string to read
87
   * @param name the "filename" to use when reporting errors
88
   */
89
  static AntlrInputStream* newStringInputStream(const std::string& input,
90
                                                const std::string& name);
91
};/* class AntlrInputStream */
92
93
class Parser;
94
95
/**
96
 * An input to be parsed. The static factory methods in this class (e.g.,
97
 * <code>newFileInput</code>, <code>newStringInput</code>) create a parser
98
 * for the given input language and attach it to an input source of the
99
 * appropriate type.
100
 */
101
class AntlrInput : public Input {
102
  /** The token lookahead used to lex and parse the input. This should usually be equal to
103
   * <code>K</code> for an LL(k) grammar. */
104
  unsigned int d_lookahead;
105
106
  /** The ANTLR3 lexer associated with this input. This will be <code>NULL</code> initially. It
107
   *  must be set by a call to <code>setLexer</code>, preferably in the subclass constructor. */
108
  pANTLR3_LEXER d_lexer;
109
110
  /** The ANTLR3 parser associated with this input. This will be <code>NULL</code> initially. It
111
   *  must be set by a call to <code>setParser</code>, preferably in the subclass constructor.
112
   *  The <code>super</code> field of <code>d_parser</code> will be set to <code>this</code> and
113
   *  <code>reportError</code> will be set to <code>Input::reportError</code>. */
114
  pANTLR3_PARSER d_parser;
115
116
  /** The ANTLR3 input stream associated with this input. */
117
  pANTLR3_INPUT_STREAM d_antlr3InputStream;
118
119
  /** The ANTLR3 bounded token buffer associated with this input.
120
   *  We only need this so we can free it on exit.
121
   *  This is set by <code>setLexer</code>.
122
   *  NOTE: We assume that we <em>can</em> free it on exit. No sharing! */
123
  pBOUNDED_TOKEN_BUFFER d_tokenBuffer;
124
125
  /** Turns an ANTLR3 exception into a message for the user and calls <code>parseError</code>. */
126
  static void reportError(pANTLR3_BASE_RECOGNIZER recognizer);
127
128
  /** Builds a message for a lexer error and calls <code>parseError</code>. */
129
  static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer);
130
131
  /** Returns the next available lexer token from the current input stream. */
132
  /* - auxillary function */
133
  static pANTLR3_COMMON_TOKEN
134
  nextTokenStr (pANTLR3_TOKEN_SOURCE toksource);
135
  /* - main function */
136
  static pANTLR3_COMMON_TOKEN
137
  nextToken (pANTLR3_TOKEN_SOURCE toksource);
138
139
  /* Since we own d_tokenStream and it needs to be freed, we need to prevent
140
   * copy construction and assignment.
141
   */
142
  AntlrInput(const AntlrInput& input);
143
  AntlrInput& operator=(const AntlrInput& input);
144
145
public:
146
147
  /** Destructor. Frees the token stream and closes the input. */
148
  virtual ~AntlrInput();
149
150
  /** Create an input for the given AntlrInputStream.
151
   * NOTE: the new Input will take ownership of the input stream and delete it
152
   * at destruction time.
153
   *
154
   * @param lang the input language
155
   * @param inputStream the input stream
156
   *
157
   * */
158
  static AntlrInput* newInput(InputLanguage lang, AntlrInputStream& inputStream);
159
160
  /** Retrieve the text associated with a token. */
161
  static std::string tokenText(pANTLR3_COMMON_TOKEN token);
162
163
  /** Retrieve a substring of the text associated with a token.
164
   *
165
   * @param token the token
166
   * @param index the index of the starting character of the substring
167
   * @param n the size of the substring. If <code>n</code> is 0, then all of the
168
   * characters up to the end of the token text will be included. If <code>n</code>
169
   * would make the substring span past the end of the token text, only those
170
   * characters up to the end of the token text will be included.
171
   */
172
  static std::string tokenTextSubstr(pANTLR3_COMMON_TOKEN token, size_t index, size_t n = 0);
173
174
  /** Retrieve an unsigned from the text of a token */
175
  static unsigned tokenToUnsigned( pANTLR3_COMMON_TOKEN token );
176
177
  /** Get the ANTLR3 lexer for this input. */
178
7
  pANTLR3_LEXER getAntlr3Lexer() { return d_lexer; }
179
180
  pANTLR3_INPUT_STREAM getAntlr3InputStream() { return d_antlr3InputStream; }
181
protected:
182
  /** Create an input. This input takes ownership of the given input stream,
183
   * and will delete it at destruction time.
184
   *
185
   * @param inputStream the input stream to use
186
   * @param lookahead the lookahead needed to parse the input (i.e., k for
187
   * an LL(k) grammar)
188
   */
189
  AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead);
190
191
  /** Retrieve the token stream for this parser. Must not be called before
192
   * <code>setLexer()</code>. */
193
  pANTLR3_COMMON_TOKEN_STREAM getTokenStream();
194
195
  /**
196
   * Issue a non-fatal warning to the user with file, line, and column info.
197
   */
198
  void warning(const std::string& msg) override;
199
200
  /**
201
   * Throws a <code>ParserException</code> with the given message.
202
   */
203
  void parseError(const std::string& msg, bool eofException = false) override;
204
205
  /** Set the ANTLR3 lexer for this input. */
206
  void setAntlr3Lexer(pANTLR3_LEXER pLexer);
207
208
  /** Set the ANTLR3 parser implementation for this input. */
209
  void setAntlr3Parser(pANTLR3_PARSER pParser);
210
211
  /** Set the Parser object for this input. */
212
  void setParser(Parser& parser) override;
213
};/* class AntlrInput */
214
215
12503693
inline std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) {
216
12503693
  if( token->type == ANTLR3_TOKEN_EOF ) {
217
12
    return "<<EOF>>";
218
  }
219
220
12503681
  ANTLR3_MARKER start = token->getStartIndex(token);
221
12503681
  ANTLR3_MARKER end = token->getStopIndex(token);
222
  /* start and end are boundary pointers. The text is a string
223
   * of (end-start+1) bytes beginning at start. */
224
25007362
  std::string txt( (const char *)start, end-start+1 );
225
25007362
  Debug("parser-extra") << "tokenText: start=" << start << std::endl
226
12503681
                        <<  "end=" << end << std::endl
227
12503681
                        <<  "txt='" << txt << "'" << std::endl;
228
12503681
  return txt;
229
}
230
231
6900
inline std::string AntlrInput::tokenTextSubstr(pANTLR3_COMMON_TOKEN token,
232
                                               size_t index,
233
                                               size_t n) {
234
235
6900
  ANTLR3_MARKER start = token->getStartIndex(token);
236
  // Its the last character of the token (not the one just after)
237
6900
  ANTLR3_MARKER end = token->getStopIndex(token);
238
6900
  Assert(start < end);
239
6900
  if( index > (size_t) end - start ) {
240
    std::stringstream ss;
241
    ss << "Out-of-bounds substring index: " << index;
242
    throw std::invalid_argument(ss.str());
243
  }
244
6900
  start += index;
245
6900
  if( n==0 || n > (size_t) end - start ) {
246
6895
    return std::string( (const char *)start, end-start+1 );
247
  } else {
248
5
    return std::string( (const char *)start, n );
249
  }
250
}
251
252
219821
inline unsigned AntlrInput::tokenToUnsigned(pANTLR3_COMMON_TOKEN token) {
253
  unsigned result;
254
439642
  std::stringstream ss;
255
219821
  ss << tokenText(token);
256
219821
  ss >> result;
257
439642
  return result;
258
}
259
260
}  // namespace parser
261
}  // namespace cvc5
262
263
#endif /* CVC5__PARSER__ANTLR_INPUT_H */