GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/parser/antlr_input.h Lines: 25 28 89.3 %
Date: 2021-09-07 Branches: 30 78 38.5 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Christopher L. Conway, Tim King, Morgan Deters
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Base for ANTLR parser classes.
14
 */
15
16
#ifndef CVC5__PARSER__ANTLR_INPUT_H
17
#define CVC5__PARSER__ANTLR_INPUT_H
18
19
#include <antlr3.h>
20
21
#include <iostream>
22
#include <sstream>
23
#include <stdexcept>
24
#include <string>
25
#include <vector>
26
27
#include "base/check.h"
28
#include "base/output.h"
29
#include "cvc5parser_private.h"
30
#include "parser/bounded_token_buffer.h"
31
#include "parser/input.h"
32
#include "parser/line_buffer.h"
33
#include "parser/parser_exception.h"
34
35
namespace cvc5 {
36
namespace parser {
37
38
/** Wrapper around an ANTLR3 input stream. */
39
class AntlrInputStream : public InputStream {
40
private:
41
  pANTLR3_INPUT_STREAM d_input;
42
43
  /**
44
   * If the AntlrInputStream corresponds to reading from a string,
45
   * this is the string literal. The memory is owned by the Antlr3Input. It is
46
   * assumed to be copied from malloc, and can be free'd at destruction time.
47
   * It is otherwise NULL.
48
   */
49
  pANTLR3_UINT8 d_inputString;
50
51
  LineBuffer* d_line_buffer;
52
53
  AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
54
                   bool fileIsTemporary, pANTLR3_UINT8 inputString,
55
                   LineBuffer* line_buffer);
56
57
  /* This is private and unimplemented, because you should never use it. */
58
  AntlrInputStream(const AntlrInputStream& inputStream) = delete;
59
60
  /* This is private and unimplemented, because you should never use it. */
61
  AntlrInputStream& operator=(const AntlrInputStream& inputStream) = delete;
62
63
public:
64
65
  virtual ~AntlrInputStream();
66
67
  pANTLR3_INPUT_STREAM getAntlr3InputStream() const;
68
69
  /** Create a file input.
70
   *
71
   * @param name the path of the file to read
72
   * @param useMmap <code>true</code> if the input should use memory-mapped I/O; otherwise, the
73
   * input will use the standard ANTLR3 I/O implementation.
74
   */
75
  static AntlrInputStream* newFileInputStream(const std::string& name,
76
                                              bool useMmap = false);
77
78
  /** Create an input from an istream. */
79
  static AntlrInputStream* newStreamInputStream(std::istream& input,
80
                                                const std::string& name);
81
82
  /** Create a string input.
83
   * NOTE: the new AntlrInputStream will take ownership of input over
84
   * and free it at destruction time.
85
   *
86
   * @param input the string to read
87
   * @param name the "filename" to use when reporting errors
88
   */
89
  static AntlrInputStream* newStringInputStream(const std::string& input,
90
                                                const std::string& name);
91
};/* class AntlrInputStream */
92
93
class Parser;
94
95
/**
96
 * An input to be parsed. The static factory methods in this class (e.g.,
97
 * <code>newFileInput</code>, <code>newStringInput</code>) create a parser
98
 * for the given input language and attach it to an input source of the
99
 * appropriate type.
100
 */
101
class AntlrInput : public Input {
102
  /** The token lookahead used to lex and parse the input. This should usually be equal to
103
   * <code>K</code> for an LL(k) grammar. */
104
  unsigned int d_lookahead;
105
106
  /** The ANTLR3 lexer associated with this input. This will be <code>NULL</code> initially. It
107
   *  must be set by a call to <code>setLexer</code>, preferably in the subclass constructor. */
108
  pANTLR3_LEXER d_lexer;
109
110
  /** The ANTLR3 parser associated with this input. This will be <code>NULL</code> initially. It
111
   *  must be set by a call to <code>setParser</code>, preferably in the subclass constructor.
112
   *  The <code>super</code> field of <code>d_parser</code> will be set to <code>this</code> and
113
   *  <code>reportError</code> will be set to <code>Input::reportError</code>. */
114
  pANTLR3_PARSER d_parser;
115
116
  /** The ANTLR3 input stream associated with this input. */
117
  pANTLR3_INPUT_STREAM d_antlr3InputStream;
118
119
  /** The ANTLR3 bounded token buffer associated with this input.
120
   *  We only need this so we can free it on exit.
121
   *  This is set by <code>setLexer</code>.
122
   *  NOTE: We assume that we <em>can</em> free it on exit. No sharing! */
123
  pBOUNDED_TOKEN_BUFFER d_tokenBuffer;
124
125
  /** Turns an ANTLR3 exception into a message for the user and calls <code>parseError</code>. */
126
  static void reportError(pANTLR3_BASE_RECOGNIZER recognizer);
127
128
  /** Builds a message for a lexer error and calls <code>parseError</code>. */
129
  static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer);
130
131
  /** Returns the next available lexer token from the current input stream. */
132
  /* - auxillary function */
133
  static pANTLR3_COMMON_TOKEN
134
  nextTokenStr (pANTLR3_TOKEN_SOURCE toksource);
135
  /* - main function */
136
  static pANTLR3_COMMON_TOKEN
137
  nextToken (pANTLR3_TOKEN_SOURCE toksource);
138
139
  /* Since we own d_tokenStream and it needs to be freed, we need to prevent
140
   * copy construction and assignment.
141
   */
142
  AntlrInput(const AntlrInput& input);
143
  AntlrInput& operator=(const AntlrInput& input);
144
145
public:
146
147
  /** Destructor. Frees the token stream and closes the input. */
148
  virtual ~AntlrInput();
149
150
  /** Create an input for the given AntlrInputStream.
151
   * NOTE: the new Input will take ownership of the input stream and delete it
152
   * at destruction time.
153
   *
154
   * @param lang the input language
155
   * @param inputStream the input stream
156
   *
157
   * */
158
  static AntlrInput* newInput(const std::string& lang,
159
                              AntlrInputStream& inputStream);
160
161
  /** Retrieve the text associated with a token. */
162
  static std::string tokenText(pANTLR3_COMMON_TOKEN token);
163
164
  /** Retrieve a substring of the text associated with a token.
165
   *
166
   * @param token the token
167
   * @param index the index of the starting character of the substring
168
   * @param n the size of the substring. If <code>n</code> is 0, then all of the
169
   * characters up to the end of the token text will be included. If <code>n</code>
170
   * would make the substring span past the end of the token text, only those
171
   * characters up to the end of the token text will be included.
172
   */
173
  static std::string tokenTextSubstr(pANTLR3_COMMON_TOKEN token, size_t index, size_t n = 0);
174
175
  /** Retrieve an unsigned from the text of a token */
176
  static unsigned tokenToUnsigned( pANTLR3_COMMON_TOKEN token );
177
178
  /** Get the ANTLR3 lexer for this input. */
179
7
  pANTLR3_LEXER getAntlr3Lexer() { return d_lexer; }
180
181
  pANTLR3_INPUT_STREAM getAntlr3InputStream() { return d_antlr3InputStream; }
182
protected:
183
  /** Create an input. This input takes ownership of the given input stream,
184
   * and will delete it at destruction time.
185
   *
186
   * @param inputStream the input stream to use
187
   * @param lookahead the lookahead needed to parse the input (i.e., k for
188
   * an LL(k) grammar)
189
   */
190
  AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead);
191
192
  /** Retrieve the token stream for this parser. Must not be called before
193
   * <code>setLexer()</code>. */
194
  pANTLR3_COMMON_TOKEN_STREAM getTokenStream();
195
196
  /**
197
   * Issue a non-fatal warning to the user with file, line, and column info.
198
   */
199
  void warning(const std::string& msg) override;
200
201
  /**
202
   * Throws a <code>ParserException</code> with the given message.
203
   */
204
  void parseError(const std::string& msg, bool eofException = false) override;
205
206
  /** Set the ANTLR3 lexer for this input. */
207
  void setAntlr3Lexer(pANTLR3_LEXER pLexer);
208
209
  /** Set the ANTLR3 parser implementation for this input. */
210
  void setAntlr3Parser(pANTLR3_PARSER pParser);
211
212
  /** Set the Parser object for this input. */
213
  void setParser(Parser& parser) override;
214
};/* class AntlrInput */
215
216
12247362
inline std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) {
217
12247362
  if( token->type == ANTLR3_TOKEN_EOF ) {
218
12
    return "<<EOF>>";
219
  }
220
221
12247350
  ANTLR3_MARKER start = token->getStartIndex(token);
222
12247350
  ANTLR3_MARKER end = token->getStopIndex(token);
223
  /* start and end are boundary pointers. The text is a string
224
   * of (end-start+1) bytes beginning at start. */
225
24494700
  std::string txt( (const char *)start, end-start+1 );
226
24494700
  Debug("parser-extra") << "tokenText: start=" << start << std::endl
227
12247350
                        <<  "end=" << end << std::endl
228
12247350
                        <<  "txt='" << txt << "'" << std::endl;
229
12247350
  return txt;
230
}
231
232
5633
inline std::string AntlrInput::tokenTextSubstr(pANTLR3_COMMON_TOKEN token,
233
                                               size_t index,
234
                                               size_t n) {
235
236
5633
  ANTLR3_MARKER start = token->getStartIndex(token);
237
  // Its the last character of the token (not the one just after)
238
5633
  ANTLR3_MARKER end = token->getStopIndex(token);
239
5633
  Assert(start < end);
240
5633
  if( index > (size_t) end - start ) {
241
    std::stringstream ss;
242
    ss << "Out-of-bounds substring index: " << index;
243
    throw std::invalid_argument(ss.str());
244
  }
245
5633
  start += index;
246
5633
  if( n==0 || n > (size_t) end - start ) {
247
5628
    return std::string( (const char *)start, end-start+1 );
248
  } else {
249
5
    return std::string( (const char *)start, n );
250
  }
251
}
252
253
204973
inline unsigned AntlrInput::tokenToUnsigned(pANTLR3_COMMON_TOKEN token) {
254
  unsigned result;
255
409946
  std::stringstream ss;
256
204973
  ss << tokenText(token);
257
204973
  ss >> result;
258
409946
  return result;
259
}
260
261
}  // namespace parser
262
}  // namespace cvc5
263
264
#endif /* CVC5__PARSER__ANTLR_INPUT_H */