Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	src/parser/antlr_input.h	Lines:	25	28	89.3 %
Date:	2021-08-14	Branches:	30	78	38.5 %


/******************************************************************************
 * Top contributors (to current version):
 *   Christopher L. Conway, Tim King, Morgan Deters
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * Base for ANTLR parser classes.
 */

#ifndef CVC5__PARSER__ANTLR_INPUT_H
#define CVC5__PARSER__ANTLR_INPUT_H

#include <antlr3.h>

#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>

#include "base/check.h"
#include "base/output.h"
#include "cvc5parser_private.h"
#include "parser/bounded_token_buffer.h"
#include "parser/input.h"
#include "parser/line_buffer.h"
#include "parser/parser_exception.h"

namespace cvc5 {
namespace parser {

/** Wrapper around an ANTLR3 input stream. */
class AntlrInputStream : public InputStream {
private:
  pANTLR3_INPUT_STREAM d_input;

  /**
   * If the AntlrInputStream corresponds to reading from a string,
   * this is the string literal. The memory is owned by the Antlr3Input. It is
   * assumed to be copied from malloc, and can be free'd at destruction time.
   * It is otherwise NULL.
   */
  pANTLR3_UINT8 d_inputString;

  LineBuffer* d_line_buffer;

  AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
                   bool fileIsTemporary, pANTLR3_UINT8 inputString,
                   LineBuffer* line_buffer);

  /* This is private and unimplemented, because you should never use it. */
  AntlrInputStream(const AntlrInputStream& inputStream) = delete;

  /* This is private and unimplemented, because you should never use it. */
  AntlrInputStream& operator=(const AntlrInputStream& inputStream) = delete;

public:

  virtual ~AntlrInputStream();

  pANTLR3_INPUT_STREAM getAntlr3InputStream() const;

  /** Create a file input.
   *
   * @param name the path of the file to read
   * @param useMmap <code>true</code> if the input should use memory-mapped I/O; otherwise, the
   * input will use the standard ANTLR3 I/O implementation.
   */
  static AntlrInputStream* newFileInputStream(const std::string& name,
                                              bool useMmap = false);

  /** Create an input from an istream. */
  static AntlrInputStream* newStreamInputStream(std::istream& input,
                                                const std::string& name);

  /** Create a string input.
   * NOTE: the new AntlrInputStream will take ownership of input over
   * and free it at destruction time.
   *
   * @param input the string to read
   * @param name the "filename" to use when reporting errors
   */
  static AntlrInputStream* newStringInputStream(const std::string& input,
                                                const std::string& name);
};/* class AntlrInputStream */

class Parser;

/**
 * An input to be parsed. The static factory methods in this class (e.g.,
 * <code>newFileInput</code>, <code>newStringInput</code>) create a parser
 * for the given input language and attach it to an input source of the
 * appropriate type.
 */
class AntlrInput : public Input {
  /** The token lookahead used to lex and parse the input. This should usually be equal to
   * <code>K</code> for an LL(k) grammar. */
  unsigned int d_lookahead;

  /** The ANTLR3 lexer associated with this input. This will be <code>NULL</code> initially. It
   *  must be set by a call to <code>setLexer</code>, preferably in the subclass constructor. */
  pANTLR3_LEXER d_lexer;

  /** The ANTLR3 parser associated with this input. This will be <code>NULL</code> initially. It
   *  must be set by a call to <code>setParser</code>, preferably in the subclass constructor.
   *  The <code>super</code> field of <code>d_parser</code> will be set to <code>this</code> and
   *  <code>reportError</code> will be set to <code>Input::reportError</code>. */
  pANTLR3_PARSER d_parser;

  /** The ANTLR3 input stream associated with this input. */
  pANTLR3_INPUT_STREAM d_antlr3InputStream;

  /** The ANTLR3 bounded token buffer associated with this input.
   *  We only need this so we can free it on exit.
   *  This is set by <code>setLexer</code>.
   *  NOTE: We assume that we <em>can</em> free it on exit. No sharing! */
  pBOUNDED_TOKEN_BUFFER d_tokenBuffer;

  /** Turns an ANTLR3 exception into a message for the user and calls <code>parseError</code>. */
  static void reportError(pANTLR3_BASE_RECOGNIZER recognizer);

  /** Builds a message for a lexer error and calls <code>parseError</code>. */
  static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer);

  /** Returns the next available lexer token from the current input stream. */
  /* - auxillary function */
  static pANTLR3_COMMON_TOKEN
  nextTokenStr (pANTLR3_TOKEN_SOURCE toksource);
  /* - main function */
  static pANTLR3_COMMON_TOKEN
  nextToken (pANTLR3_TOKEN_SOURCE toksource);

  /* Since we own d_tokenStream and it needs to be freed, we need to prevent
   * copy construction and assignment.
   */
  AntlrInput(const AntlrInput& input);
  AntlrInput& operator=(const AntlrInput& input);

public:

  /** Destructor. Frees the token stream and closes the input. */
  virtual ~AntlrInput();

  /** Create an input for the given AntlrInputStream.
   * NOTE: the new Input will take ownership of the input stream and delete it
   * at destruction time.
   *
   * @param lang the input language
   * @param inputStream the input stream
   *
   * */
  static AntlrInput* newInput(InputLanguage lang, AntlrInputStream& inputStream);

  /** Retrieve the text associated with a token. */
  static std::string tokenText(pANTLR3_COMMON_TOKEN token);

  /** Retrieve a substring of the text associated with a token.
   *
   * @param token the token
   * @param index the index of the starting character of the substring
   * @param n the size of the substring. If <code>n</code> is 0, then all of the
   * characters up to the end of the token text will be included. If <code>n</code>
   * would make the substring span past the end of the token text, only those
   * characters up to the end of the token text will be included.
   */
  static std::string tokenTextSubstr(pANTLR3_COMMON_TOKEN token, size_t index, size_t n = 0);

  /** Retrieve an unsigned from the text of a token */
  static unsigned tokenToUnsigned( pANTLR3_COMMON_TOKEN token );

  /** Get the ANTLR3 lexer for this input. */
  pANTLR3_LEXER getAntlr3Lexer() { return d_lexer; }

  pANTLR3_INPUT_STREAM getAntlr3InputStream() { return d_antlr3InputStream; }
protected:
  /** Create an input. This input takes ownership of the given input stream,
   * and will delete it at destruction time.
   *
   * @param inputStream the input stream to use
   * @param lookahead the lookahead needed to parse the input (i.e., k for
   * an LL(k) grammar)
   */
  AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead);

  /** Retrieve the token stream for this parser. Must not be called before
   * <code>setLexer()</code>. */
  pANTLR3_COMMON_TOKEN_STREAM getTokenStream();

  /**
   * Issue a non-fatal warning to the user with file, line, and column info.
   */
  void warning(const std::string& msg) override;

  /**
   * Throws a <code>ParserException</code> with the given message.
   */
  void parseError(const std::string& msg, bool eofException = false) override;

  /** Set the ANTLR3 lexer for this input. */
  void setAntlr3Lexer(pANTLR3_LEXER pLexer);

  /** Set the ANTLR3 parser implementation for this input. */
  void setAntlr3Parser(pANTLR3_PARSER pParser);

  /** Set the Parser object for this input. */
  void setParser(Parser& parser) override;
};/* class AntlrInput */

inline std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) {
  if( token->type == ANTLR3_TOKEN_EOF ) {
    return "<<EOF>>";
  }

  ANTLR3_MARKER start = token->getStartIndex(token);
  ANTLR3_MARKER end = token->getStopIndex(token);
  /* start and end are boundary pointers. The text is a string
   * of (end-start+1) bytes beginning at start. */
  std::string txt( (const char *)start, end-start+1 );
  Debug("parser-extra") << "tokenText: start=" << start << std::endl
                        <<  "end=" << end << std::endl
                        <<  "txt='" << txt << "'" << std::endl;
  return txt;
}

inline std::string AntlrInput::tokenTextSubstr(pANTLR3_COMMON_TOKEN token,
                                               size_t index,
                                               size_t n) {

  ANTLR3_MARKER start = token->getStartIndex(token);
  // Its the last character of the token (not the one just after)
  ANTLR3_MARKER end = token->getStopIndex(token);
  Assert(start < end);
  if( index > (size_t) end - start ) {
    std::stringstream ss;
    ss << "Out-of-bounds substring index: " << index;
    throw std::invalid_argument(ss.str());
  }
  start += index;
  if( n==0 || n > (size_t) end - start ) {
    return std::string( (const char *)start, end-start+1 );
  } else {
    return std::string( (const char *)start, n );
  }
}

inline unsigned AntlrInput::tokenToUnsigned(pANTLR3_COMMON_TOKEN token) {
  unsigned result;
  std::stringstream ss;
  ss << tokenText(token);
  ss >> result;
  return result;
}

}  // namespace parser
}  // namespace cvc5

#endif /* CVC5__PARSER__ANTLR_INPUT_H */


Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/******************************************************************************
2		* Top contributors (to current version):
3		* Christopher L. Conway, Tim King, Morgan Deters
4		*
5		* This file is part of the cvc5 project.
6		*
7		* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8		* in the top-level source directory and their institutional affiliations.
9		* All rights reserved. See the file COPYING in the top-level source
10		* directory for licensing information.
11		* ****************************************************************************
12		*
13		* Base for ANTLR parser classes.
14		*/
15
16		#ifndef CVC5__PARSER__ANTLR_INPUT_H
17		#define CVC5__PARSER__ANTLR_INPUT_H
18
19		#include <antlr3.h>
20
21		#include <iostream>
22		#include <sstream>
23		#include <stdexcept>
24		#include <string>
25		#include <vector>
26
27		#include "base/check.h"
28		#include "base/output.h"
29		#include "cvc5parser_private.h"
30		#include "parser/bounded_token_buffer.h"
31		#include "parser/input.h"
32		#include "parser/line_buffer.h"
33		#include "parser/parser_exception.h"
34
35		namespace cvc5 {
36		namespace parser {
37
38		/** Wrapper around an ANTLR3 input stream. */
39		class AntlrInputStream : public InputStream {
40		private:
41		pANTLR3_INPUT_STREAM d_input;
42
43		/**
44		* If the AntlrInputStream corresponds to reading from a string,
45		* this is the string literal. The memory is owned by the Antlr3Input. It is
46		* assumed to be copied from malloc, and can be free'd at destruction time.
47		* It is otherwise NULL.
48		*/
49		pANTLR3_UINT8 d_inputString;
50
51		LineBuffer* d_line_buffer;
52
53		AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
54		bool fileIsTemporary, pANTLR3_UINT8 inputString,
55		LineBuffer* line_buffer);
56
57		/* This is private and unimplemented, because you should never use it. */
58		AntlrInputStream(const AntlrInputStream& inputStream) = delete;
59
60		/* This is private and unimplemented, because you should never use it. */
61		AntlrInputStream& operator=(const AntlrInputStream& inputStream) = delete;
62
63		public:
64
65		virtual ~AntlrInputStream();
66
67		pANTLR3_INPUT_STREAM getAntlr3InputStream() const;
68
69		/** Create a file input.
70		*
71		* @param name the path of the file to read
72		* @param useMmap <code>true</code> if the input should use memory-mapped I/O; otherwise, the
73		* input will use the standard ANTLR3 I/O implementation.
74		*/
75		static AntlrInputStream* newFileInputStream(const std::string& name,
76		bool useMmap = false);
77
78		/** Create an input from an istream. */
79		static AntlrInputStream* newStreamInputStream(std::istream& input,
80		const std::string& name);
81
82		/** Create a string input.
83		* NOTE: the new AntlrInputStream will take ownership of input over
84		* and free it at destruction time.
85		*
86		* @param input the string to read
87		* @param name the "filename" to use when reporting errors
88		*/
89		static AntlrInputStream* newStringInputStream(const std::string& input,
90		const std::string& name);
91		};/* class AntlrInputStream */
92
93		class Parser;
94
95		/**
96		* An input to be parsed. The static factory methods in this class (e.g.,
97		* <code>newFileInput</code>, <code>newStringInput</code>) create a parser
98		* for the given input language and attach it to an input source of the
99		* appropriate type.
100		*/
101		class AntlrInput : public Input {
102		/** The token lookahead used to lex and parse the input. This should usually be equal to
103		* <code>K</code> for an LL(k) grammar. */
104		unsigned int d_lookahead;
105
106		/** The ANTLR3 lexer associated with this input. This will be <code>NULL</code> initially. It
107		* must be set by a call to <code>setLexer</code>, preferably in the subclass constructor. */
108		pANTLR3_LEXER d_lexer;
109
110		/** The ANTLR3 parser associated with this input. This will be <code>NULL</code> initially. It
111		* must be set by a call to <code>setParser</code>, preferably in the subclass constructor.
112		* The <code>super</code> field of <code>d_parser</code> will be set to <code>this</code> and
113		* <code>reportError</code> will be set to <code>Input::reportError</code>. */
114		pANTLR3_PARSER d_parser;
115
116		/** The ANTLR3 input stream associated with this input. */
117		pANTLR3_INPUT_STREAM d_antlr3InputStream;
118
119		/** The ANTLR3 bounded token buffer associated with this input.
120		* We only need this so we can free it on exit.
121		* This is set by <code>setLexer</code>.
122		* NOTE: We assume that we <em>can</em> free it on exit. No sharing! */
123		pBOUNDED_TOKEN_BUFFER d_tokenBuffer;
124
125		/** Turns an ANTLR3 exception into a message for the user and calls <code>parseError</code>. */
126		static void reportError(pANTLR3_BASE_RECOGNIZER recognizer);
127
128		/** Builds a message for a lexer error and calls <code>parseError</code>. */
129		static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer);
130
131		/** Returns the next available lexer token from the current input stream. */
132		/* - auxillary function */
133		static pANTLR3_COMMON_TOKEN
134		nextTokenStr (pANTLR3_TOKEN_SOURCE toksource);
135		/* - main function */
136		static pANTLR3_COMMON_TOKEN
137		nextToken (pANTLR3_TOKEN_SOURCE toksource);
138
139		/* Since we own d_tokenStream and it needs to be freed, we need to prevent
140		* copy construction and assignment.
141		*/
142		AntlrInput(const AntlrInput& input);
143		AntlrInput& operator=(const AntlrInput& input);
144
145		public:
146
147		/** Destructor. Frees the token stream and closes the input. */
148		virtual ~AntlrInput();
149
150		/** Create an input for the given AntlrInputStream.
151		* NOTE: the new Input will take ownership of the input stream and delete it
152		* at destruction time.
153		*
154		* @param lang the input language
155		* @param inputStream the input stream
156		*
157		* */
158		static AntlrInput* newInput(InputLanguage lang, AntlrInputStream& inputStream);
159
160		/** Retrieve the text associated with a token. */
161		static std::string tokenText(pANTLR3_COMMON_TOKEN token);
162
163		/** Retrieve a substring of the text associated with a token.
164		*
165		* @param token the token
166		* @param index the index of the starting character of the substring
167		* @param n the size of the substring. If <code>n</code> is 0, then all of the
168		* characters up to the end of the token text will be included. If <code>n</code>
169		* would make the substring span past the end of the token text, only those
170		* characters up to the end of the token text will be included.
171		*/
172		static std::string tokenTextSubstr(pANTLR3_COMMON_TOKEN token, size_t index, size_t n = 0);
173
174		/** Retrieve an unsigned from the text of a token */
175		static unsigned tokenToUnsigned( pANTLR3_COMMON_TOKEN token );
176
177		/** Get the ANTLR3 lexer for this input. */
178	7	pANTLR3_LEXER getAntlr3Lexer() { return d_lexer; }
179
180		pANTLR3_INPUT_STREAM getAntlr3InputStream() { return d_antlr3InputStream; }
181		protected:
182		/** Create an input. This input takes ownership of the given input stream,
183		* and will delete it at destruction time.
184		*
185		* @param inputStream the input stream to use
186		* @param lookahead the lookahead needed to parse the input (i.e., k for
187		* an LL(k) grammar)
188		*/
189		AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead);
190
191		/** Retrieve the token stream for this parser. Must not be called before
192		* <code>setLexer()</code>. */
193		pANTLR3_COMMON_TOKEN_STREAM getTokenStream();
194
195		/**
196		* Issue a non-fatal warning to the user with file, line, and column info.
197		*/
198		void warning(const std::string& msg) override;
199
200		/**
201		* Throws a <code>ParserException</code> with the given message.
202		*/
203		void parseError(const std::string& msg, bool eofException = false) override;
204
205		/** Set the ANTLR3 lexer for this input. */
206		void setAntlr3Lexer(pANTLR3_LEXER pLexer);
207
208		/** Set the ANTLR3 parser implementation for this input. */
209		void setAntlr3Parser(pANTLR3_PARSER pParser);
210
211		/** Set the Parser object for this input. */
212		void setParser(Parser& parser) override;
213		};/* class AntlrInput */
214
215	12286375	inline std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) {
216	12286375	if( token->type == ANTLR3_TOKEN_EOF ) {
217	12	return "<<EOF>>";
218		}
219
220	12286363	ANTLR3_MARKER start = token->getStartIndex(token);
221	12286363	ANTLR3_MARKER end = token->getStopIndex(token);
222		/* start and end are boundary pointers. The text is a string
223		* of (end-start+1) bytes beginning at start. */
224	24572726	std::string txt( (const char *)start, end-start+1 );
225	24572726	Debug("parser-extra") << "tokenText: start=" << start << std::endl
226	12286363	<< "end=" << end << std::endl
227	12286363	<< "txt='" << txt << "'" << std::endl;
228	12286363	return txt;
229		}
230
231	5633	inline std::string AntlrInput::tokenTextSubstr(pANTLR3_COMMON_TOKEN token,
232		size_t index,
233		size_t n) {
234
235	5633	ANTLR3_MARKER start = token->getStartIndex(token);
236		// Its the last character of the token (not the one just after)
237	5633	ANTLR3_MARKER end = token->getStopIndex(token);
238	5633	Assert(start < end);
239	5633	if( index > (size_t) end - start ) {
240		std::stringstream ss;
241		ss << "Out-of-bounds substring index: " << index;
242		throw std::invalid_argument(ss.str());
243		}
244	5633	start += index;
245	5633	if( n==0 \|\| n > (size_t) end - start ) {
246	5628	return std::string( (const char *)start, end-start+1 );
247		} else {
248	5	return std::string( (const char *)start, n );
249		}
250		}
251
252	204944	inline unsigned AntlrInput::tokenToUnsigned(pANTLR3_COMMON_TOKEN token) {
253		unsigned result;
254	409888	std::stringstream ss;
255	204944	ss << tokenText(token);
256	204944	ss >> result;
257	409888	return result;
258		}
259
260		} // namespace parser
261		} // namespace cvc5
262
263		#endif /* CVC5__PARSER__ANTLR_INPUT_H */