Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	src/parser/antlr_input.cpp	Lines:	205	215	95.3 %
Date:	2021-09-15	Branches:	236	466	50.6 %


/******************************************************************************
 * Top contributors (to current version):
 *   Christopher L. Conway, Kshitij Bansal, Tim King
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * A super-class for ANTLR-generated input language parsers.
 */

#include "parser/antlr_input.h"

#include <antlr3.h>
#include <limits>

#include "base/check.h"
#include "base/output.h"
#include "parser/antlr_line_buffered_input.h"
#include "parser/bounded_token_buffer.h"
#include "parser/bounded_token_factory.h"
#include "parser/cvc/cvc_input.h"
#include "parser/input.h"
#include "parser/memory_mapped_input_buffer.h"
#include "parser/parser.h"
#include "parser/parser_exception.h"
#include "parser/smt2/smt2_input.h"
#include "parser/smt2/sygus_input.h"
#include "parser/tptp/tptp_input.h"

using namespace std;
using namespace cvc5;
using namespace cvc5::parser;
using namespace cvc5::kind;

namespace cvc5 {
namespace parser {

// These functions exactly wrap the antlr3 source inconsistencies.
// These are the only location CVC5_ANTLR3_OLD_INPUT_STREAM ifdefs appear.
// No other sanity checking happens;
pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
                                             const std::string& name,
                                             LineBuffer* line_buffer);
pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name);
pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
                                            uint32_t size,
                                            const std::string& name);

pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
                                             const std::string& name,
                                             LineBuffer* line_buffer) {
  pANTLR3_INPUT_STREAM inputStream = NULL;
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());

#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
  inputStream =
      antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer);
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT,
                                            name_duplicate, line_buffer);
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */

  free(name_duplicate);
  return inputStream;
}

pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){
  pANTLR3_INPUT_STREAM input = NULL;
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());

  // libantlr3c v3.2 isn't source-compatible with v3.4
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
  input = antlr3AsciiFileStreamNew(name_duplicate);
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT);
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */

  free(name_duplicate);
  return input;
}


pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
                                            uint32_t size,
                                            const std::string& name){
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
  pANTLR3_INPUT_STREAM inputStream = NULL;
  /* Create an ANTLR input backed by the buffer. */
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
  inputStream =
    antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate);
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  inputStream =
    antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size,
                          name_duplicate);
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  free(name_duplicate);
  return inputStream;
}

AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
                                   bool fileIsTemporary,
                                   pANTLR3_UINT8 inputString,
                                   LineBuffer* line_buffer)
    : InputStream(name, fileIsTemporary),
      d_input(input),
      d_inputString(inputString),
      d_line_buffer(line_buffer) {
  Assert(input != NULL);
  input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
}

AntlrInputStream::~AntlrInputStream() {
  d_input->free(d_input);
  if(d_inputString != NULL){
    free(d_inputString);
  }
  if (d_line_buffer != NULL) {
    delete d_line_buffer;
  }
}

pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
  return d_input;
}



AntlrInputStream*
AntlrInputStream::newFileInputStream(const std::string& name,
                                     bool useMmap)
{
#ifdef _WIN32
  if(useMmap) {
    useMmap = false;
  }
#endif
  pANTLR3_INPUT_STREAM input = NULL;
  if(useMmap) {
    input = MemoryMappedInputBufferNew(name);
  } else {
    input = newAntlr3FileStream(name);
  }
  if(input == NULL) {
    throw InputStreamException("Couldn't open file: " + name);
  }
  return new AntlrInputStream(name, input, false, NULL, NULL);
}

AntlrInputStream* AntlrInputStream::newStreamInputStream(
    std::istream& input, const std::string& name)
{
  pANTLR3_INPUT_STREAM inputStream = NULL;
  pANTLR3_UINT8 inputStringCopy = NULL;
  LineBuffer* line_buffer = new LineBuffer(&input);
  inputStream = newAntlr3BufferedStream(input, name, line_buffer);
  return new AntlrInputStream(name, inputStream, false, inputStringCopy,
                              line_buffer);
}


AntlrInputStream*
AntlrInputStream::newStringInputStream(const std::string& input,
                                       const std::string& name)
{
  size_t input_size = input.size();
  Assert(input_size <= std::numeric_limits<uint32_t>::max());

  // Ownership of input_duplicate  is transferred to the AntlrInputStream.
  pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str());

  if( input_duplicate == NULL ) {
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
  }

  pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name);
  if( inputStream==NULL ) {
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
  }
  return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL);
}

AntlrInput* AntlrInput::newInput(const std::string& lang,
                                 AntlrInputStream& inputStream)
{
  if (lang == "LANG_CVC")
  {
    return new CvcInput(inputStream);
  }
  else if (lang == "LANG_SYGUS_V2")
  {
    return new SygusInput(inputStream);
  }
  else if (lang == "LANG_TPTP")
  {
    return new TptpInput(inputStream);
  }
  else if (lang == "LANG_SMTLIB_V2_6")
  {
    return new Smt2Input(inputStream);
  }
  else
  {
    throw InputStreamException(
        "unable to detect input file format, try --lang ");
  }
}

AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
    Input(inputStream),
    d_lookahead(lookahead),
    d_lexer(NULL),
    d_parser(NULL),
    d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
    d_tokenBuffer(NULL) {
}

/*
AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
  Parser(exprManager,name),
  d_lookahead(lookahead) {

}
*/

/*
AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
  Input(exprManager,name),
  d_lookahead(lookahead),
  d_lexer(NULL),
  d_parser(NULL),
  d_tokenStream(NULL) {

  char* inputStr = strdup(input.c_str());
  char* nameStr = strdup(name.c_str());
  if( inputStr==NULL || nameStr==NULL ) {
    throw ParserException("Couldn't initialize string input: '" + input + "'");
  }
  d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
  if( d_inputStream == NULL ) {
    throw ParserException("Couldn't create input stream for string: '" + input + "'");
  }

}
*/

AntlrInput::~AntlrInput() {
  BoundedTokenBufferFree(d_tokenBuffer);
}

pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
  return d_tokenBuffer->commonTstream;
}

void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
  pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
  Assert(lexer != NULL);
  Parser *parser = (Parser*)(lexer->super);
  Assert(parser != NULL);
  AntlrInput *input = (AntlrInput*) parser->getInput();
  Assert(input != NULL);

  /* Call the error display routine *if* there's not already a
   * parse error pending.  If a parser error is pending, this
   * error is probably less important, so we just drop it. */
  if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
    input->parseError("Error finding next token.");
  }
}

void AntlrInput::warning(const std::string& message) {
  Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
}



/**
 * characters considered part of a simple symbol in SMTLIB.
 *
 * TODO: Ideally this code shouldn't be smtlib specific (should work
 * with CVC language too), but this per-language specialization has
 * been left for a later point.
 */
inline bool isSimpleChar(char ch) {
  return isalnum(ch) || (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL);
}

size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) {
  size_t st = 0;
  size_t N = input.size();
  while(st < N) {
    while( st < N && (*isWordChar)(input[st])  == false ) st++;
    size_t en = st;
    while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++;
    if(en - st + 1 == pattern.size()) {
      bool match = true;
      for(size_t i = 0; match && i < pattern.size(); ++i) {
        match &= (pattern[i] == input[st+i]);
      }
      if(match == true) {
        return st;
      }
    }
    st = en + 1;
  }
  return string::npos;
}

/**
 * Gets part of original input and tries to visually hint where the
 * error might be.
 *
 * Something like:
 *
 *   ...nd (= alpha beta) (= beta delta))
 *                                ^
 *
 * Implementation (as on 2014/04/24):
 *
 * > if suggested pointer by lexer is under a "simple char", move to
 *   start of the word and print pointer there.
 *
 * > in the other case, it tries to find the nearest word in the error
 *   message passed along. if it can't find it, we don't add this
 *   visual hint, as experimentally position suggested by lexer was
 *   found to be totally unhelpful. (TODO: fix this upstream to
 *   improve)
 */
std::string parseErrorHelper(const char* lineStart,
                             std::size_t lineLength,
                             std::size_t charPositionInLine,
                             const std::string& message)
{
  // Is it a multi-line message
  bool multilineMessage = (message.find('\n') != string::npos);
  // Useful only if it is a multi-line message
  int firstLineEnd = message.find('\n');

  std::ostringstream ss, slicess;

  // Keep first line of message
  if(multilineMessage) {
    ss << message.substr(0, firstLineEnd) << endl << endl;
  } else {
    ss << message << endl << endl;
  }

  std::size_t posSliceStart =
      (charPositionInLine <= 50) ? 0 : charPositionInLine - 50 + 5;
  std::size_t posSliceEnd = posSliceStart + 70;
  std::size_t caretPos = 0;
  std::size_t caretPosExtra = 0;  // for inital intendation, epilipses etc.

  ss << "  "; caretPosExtra += 2;
  if(posSliceStart > 0) {
    ss << "..."; caretPosExtra += 3;
  }

  for (std::size_t i = posSliceStart; i < lineLength && lineStart[i] != '\n';
       ++i)
  {
    if(i == posSliceEnd) {
      ss << "...";
      break;
    }
    if(i < charPositionInLine) { caretPos++; }

    if(!isprint(lineStart[i])) {
      // non-printable character, something wrong, bail out
      return message;
    }

    ss << (lineStart[i]);
    slicess << (lineStart[i]);
  }

  // adjust position of caret, based on slice and message
  {
    int caretPosOrig = caretPos;
    string slice = slicess.str();
    if(isSimpleChar(slice[caretPos])) {
      // if alphanumeric, try to go to beginning of word/number
      while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; }
      if(caretPos == 0 && posSliceStart > 0) {
        // reached start and this is not really the start? bail out
        return message;
      } else {
        // likely it is also in original message? if so, very likely
        // we found the right place
        string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1));
        size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
        Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
        if( matchLoc != string::npos ) {
          Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl;
        }
      }
    } else {
      bool foundCaretPos = false;

      for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) {
        // go to nearest alphanumeric string (before current position),
        // see if that word can be found in original message. If so,
        // point to that, else keep pointer where it was.
        int nearestWordEn = caretPos - 1;
        while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) {
          --nearestWordEn;
        }
        if(isSimpleChar(slice[nearestWordEn])) {
          int nearestWordSt = nearestWordEn;
          while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) {
            --nearestWordSt;
          }
          string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1));
          size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
          Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl;
          Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
          if( matchLoc != string::npos ) {
            Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at "
                                    << nearestWordSt << std::endl;
            foundCaretPos = true;
          }
          caretPos = nearestWordSt;
        }
      }
      if( !foundCaretPos) {
        // this doesn't look good. caret generally getting printed
        // at unhelpful positions. improve upstream?
        return message;
      }
    }
    caretPos += caretPosExtra;
  }// end of caret position computation/heuristics

  ss << std::endl;
  while( caretPos-- > 0 ) {
    ss << ' ';
  }
  ss << '^' << endl;
  if(multilineMessage) {
     ss << message.substr(firstLineEnd, message.size() - firstLineEnd);;
  }
  return ss.str();
}

void AntlrInput::parseError(const std::string& message, bool eofException)
{
  auto lineLength = d_antlr3InputStream->sizeBuf
                    - (static_cast<char*>(d_antlr3InputStream->currentLine)
                       - static_cast<char*>(d_antlr3InputStream->data));
  std::string updatedMessage = parseErrorHelper(
      (const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream),
      lineLength,
      d_lexer->getCharPositionInLine(d_lexer),
      message);

  Debug("parser") << "Throwing exception: "
      << (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
      << d_lexer->getLine(d_lexer) << "."
      << d_lexer->getCharPositionInLine(d_lexer) << ": "
      << updatedMessage << endl;
  if(eofException) {
    throw ParserEndOfFileException(message,
                                   (const char*)d_lexer->rec->state->tokSource->fileName->chars,
                                   d_lexer->getLine(d_lexer),
                                   d_lexer->getCharPositionInLine(d_lexer));
  } else {
    throw ParserException(updatedMessage,
                          (const char*)d_lexer->rec->state->tokSource->fileName->chars,
                          d_lexer->getLine(d_lexer),
                          d_lexer->getCharPositionInLine(d_lexer));
  }
}


void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
  d_lexer = pLexer;

  pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
  if( pTokenFactory != NULL ) {
    pTokenFactory->close(pTokenFactory);
  }

  /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
  pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
  if( pTokenFactory == NULL ) {
    throw InputStreamException("Couldn't create token factory.");
  }
  d_lexer->rec->state->tokFactory = pTokenFactory;

  pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
  if( buffer == NULL ) {
    throw InputStreamException("Couldn't create token buffer.");
  }

  d_tokenBuffer = buffer;

  // Override default lexer error reporting
  d_lexer->rec->reportError = &lexerError;
  // Override default nextToken function, just to prevent exceptions escaping.
  d_lexer->rec->state->tokSource->nextToken = &nextToken;
}

void AntlrInput::setParser(Parser& parser) {
  // ANTLR isn't using super in the lexer or the parser, AFAICT.
  // We could also use @lexer/parser::context to add a field to the generated
  // objects, but then it would have to be declared separately in every
  // language's grammar and we'd have to in the address of the field anyway.
  d_lexer->super = &parser;
  d_parser->super = &parser;
}

void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
  d_parser = pParser;
//  d_parser->rec->match = &match;
  d_parser->rec->reportError = &reportError;
  /* Don't try to recover from a parse error. */
  // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
  d_parser->rec->recoverFromMismatchedToken =
      (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
      d_parser->rec->mismatch;
}

}  // namespace parser
}  // namespace cvc5


Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/******************************************************************************
2		* Top contributors (to current version):
3		* Christopher L. Conway, Kshitij Bansal, Tim King
4		*
5		* This file is part of the cvc5 project.
6		*
7		* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8		* in the top-level source directory and their institutional affiliations.
9		* All rights reserved. See the file COPYING in the top-level source
10		* directory for licensing information.
11		* ****************************************************************************
12		*
13		* A super-class for ANTLR-generated input language parsers.
14		*/
15
16		#include "parser/antlr_input.h"
17
18		#include <antlr3.h>
19		#include <limits>
20
21		#include "base/check.h"
22		#include "base/output.h"
23		#include "parser/antlr_line_buffered_input.h"
24		#include "parser/bounded_token_buffer.h"
25		#include "parser/bounded_token_factory.h"
26		#include "parser/cvc/cvc_input.h"
27		#include "parser/input.h"
28		#include "parser/memory_mapped_input_buffer.h"
29		#include "parser/parser.h"
30		#include "parser/parser_exception.h"
31		#include "parser/smt2/smt2_input.h"
32		#include "parser/smt2/sygus_input.h"
33		#include "parser/tptp/tptp_input.h"
34
35		using namespace std;
36		using namespace cvc5;
37		using namespace cvc5::parser;
38		using namespace cvc5::kind;
39
40		namespace cvc5 {
41		namespace parser {
42
43		// These functions exactly wrap the antlr3 source inconsistencies.
44		// These are the only location CVC5_ANTLR3_OLD_INPUT_STREAM ifdefs appear.
45		// No other sanity checking happens;
46		pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
47		const std::string& name,
48		LineBuffer* line_buffer);
49		pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name);
50		pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
51		uint32_t size,
52		const std::string& name);
53
54	4	pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
55		const std::string& name,
56		LineBuffer* line_buffer) {
57	4	pANTLR3_INPUT_STREAM inputStream = NULL;
58	4	pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
59
60		#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
61		inputStream =
62		antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer);
63		#else /* CVC5_ANTLR3_OLD_INPUT_STREAM */
64	4	inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT,
65		name_duplicate, line_buffer);
66		#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
67
68	4	free(name_duplicate);
69	4	return inputStream;
70		}
71
72	6177	pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){
73	6177	pANTLR3_INPUT_STREAM input = NULL;
74	6177	pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
75
76		// libantlr3c v3.2 isn't source-compatible with v3.4
77		#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
78		input = antlr3AsciiFileStreamNew(name_duplicate);
79		#else /* CVC5_ANTLR3_OLD_INPUT_STREAM */
80	6177	input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT);
81		#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
82
83	6177	free(name_duplicate);
84	6177	return input;
85		}
86
87
88	287	pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
89		uint32_t size,
90		const std::string& name){
91	287	pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
92	287	pANTLR3_INPUT_STREAM inputStream = NULL;
93		/* Create an ANTLR input backed by the buffer. */
94		#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
95		inputStream =
96		antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate);
97		#else /* CVC5_ANTLR3_OLD_INPUT_STREAM */
98	287	inputStream =
99		antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size,
100		name_duplicate);
101		#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
102	287	free(name_duplicate);
103	287	return inputStream;
104		}
105
106	6468	AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
107		bool fileIsTemporary,
108		pANTLR3_UINT8 inputString,
109	6468	LineBuffer* line_buffer)
110		: InputStream(name, fileIsTemporary),
111		d_input(input),
112		d_inputString(inputString),
113	6468	d_line_buffer(line_buffer) {
114	6468	Assert(input != NULL);
115	6468	input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
116	6468	}
117
118	19404	AntlrInputStream::~AntlrInputStream() {
119	6468	d_input->free(d_input);
120	6468	if(d_inputString != NULL){
121	287	free(d_inputString);
122		}
123	6468	if (d_line_buffer != NULL) {
124	4	delete d_line_buffer;
125		}
126	12936	}
127
128	12936	pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
129	12936	return d_input;
130		}
131
132
133
134		AntlrInputStream*
135	6177	AntlrInputStream::newFileInputStream(const std::string& name,
136		bool useMmap)
137		{
138		#ifdef _WIN32
139		if(useMmap) {
140		useMmap = false;
141		}
142		#endif
143	6177	pANTLR3_INPUT_STREAM input = NULL;
144	6177	if(useMmap) {
145		input = MemoryMappedInputBufferNew(name);
146		} else {
147	6177	input = newAntlr3FileStream(name);
148		}
149	6177	if(input == NULL) {
150		throw InputStreamException("Couldn't open file: " + name);
151		}
152	6177	return new AntlrInputStream(name, input, false, NULL, NULL);
153		}
154
155	4	AntlrInputStream* AntlrInputStream::newStreamInputStream(
156		std::istream& input, const std::string& name)
157		{
158	4	pANTLR3_INPUT_STREAM inputStream = NULL;
159	4	pANTLR3_UINT8 inputStringCopy = NULL;
160	4	LineBuffer* line_buffer = new LineBuffer(&input);
161	4	inputStream = newAntlr3BufferedStream(input, name, line_buffer);
162		return new AntlrInputStream(name, inputStream, false, inputStringCopy,
163	4	line_buffer);
164		}
165
166
167		AntlrInputStream*
168	287	AntlrInputStream::newStringInputStream(const std::string& input,
169		const std::string& name)
170		{
171	287	size_t input_size = input.size();
172	287	Assert(input_size <= std::numeric_limits<uint32_t>::max());
173
174		// Ownership of input_duplicate is transferred to the AntlrInputStream.
175	287	pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str());
176
177	287	if( input_duplicate == NULL ) {
178		throw InputStreamException("Couldn't initialize string input: '" + input + "'");
179		}
180
181	287	pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name);
182	287	if( inputStream==NULL ) {
183		throw InputStreamException("Couldn't initialize string input: '" + input + "'");
184		}
185	287	return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL);
186		}
187
188	6468	AntlrInput* AntlrInput::newInput(const std::string& lang,
189		AntlrInputStream& inputStream)
190		{
191	6468	if (lang == "LANG_CVC")
192		{
193	937	return new CvcInput(inputStream);
194		}
195	5531	else if (lang == "LANG_SYGUS_V2")
196		{
197	198	return new SygusInput(inputStream);
198		}
199	5333	else if (lang == "LANG_TPTP")
200		{
201	41	return new TptpInput(inputStream);
202		}
203	5292	else if (lang == "LANG_SMTLIB_V2_6")
204		{
205	5292	return new Smt2Input(inputStream);
206		}
207		else
208		{
209		throw InputStreamException(
210		"unable to detect input file format, try --lang ");
211		}
212		}
213
214	6468	AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
215		Input(inputStream),
216		d_lookahead(lookahead),
217		d_lexer(NULL),
218		d_parser(NULL),
219	6468	d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
220	12936	d_tokenBuffer(NULL) {
221	6468	}
222
223		/*
224		AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
225		Parser(exprManager,name),
226		d_lookahead(lookahead) {
227
228		}
229		*/
230
231		/*
232		AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
233		Input(exprManager,name),
234		d_lookahead(lookahead),
235		d_lexer(NULL),
236		d_parser(NULL),
237		d_tokenStream(NULL) {
238
239		char* inputStr = strdup(input.c_str());
240		char* nameStr = strdup(name.c_str());
241		if( inputStr==NULL \|\| nameStr==NULL ) {
242		throw ParserException("Couldn't initialize string input: '" + input + "'");
243		}
244		d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
245		if( d_inputStream == NULL ) {
246		throw ParserException("Couldn't create input stream for string: '" + input + "'");
247		}
248
249		}
250		*/
251
252	12936	AntlrInput::~AntlrInput() {
253	6468	BoundedTokenBufferFree(d_tokenBuffer);
254	6468	}
255
256	6468	pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
257	6468	return d_tokenBuffer->commonTstream;
258		}
259
260	12	void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
261	12	pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
262	12	Assert(lexer != NULL);
263	12	Parser parser = (Parser)(lexer->super);
264	12	Assert(parser != NULL);
265	12	AntlrInput input = (AntlrInput) parser->getInput();
266	12	Assert(input != NULL);
267
268		/* Call the error display routine if there's not already a
269		* parse error pending. If a parser error is pending, this
270		* error is probably less important, so we just drop it. */
271	12	if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
272	24	input->parseError("Error finding next token.");
273		}
274		}
275
276	287	void AntlrInput::warning(const std::string& message) {
277	287	Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
278	287	}
279
280
281
282		/**
283		* characters considered part of a simple symbol in SMTLIB.
284		*
285		* TODO: Ideally this code shouldn't be smtlib specific (should work
286		* with CVC language too), but this per-language specialization has
287		* been left for a later point.
288		*/
289	6524	inline bool isSimpleChar(char ch) {
290	6524	return isalnum(ch) \|\| (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL);
291		}
292
293	159	size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) {
294	159	size_t st = 0;
295	159	size_t N = input.size();
296	1677	while(st < N) {
297	1759	while( st < N && (*isWordChar)(input[st]) == false ) st++;
298	819	size_t en = st;
299	4063	while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++;
300	819	if(en - st + 1 == pattern.size()) {
301	146	bool match = true;
302	393	for(size_t i = 0; match && i < pattern.size(); ++i) {
303	247	match &= (pattern[i] == input[st+i]);
304		}
305	146	if(match == true) {
306	60	return st;
307		}
308		}
309	759	st = en + 1;
310		}
311	99	return string::npos;
312		}
313
314		/**
315		* Gets part of original input and tries to visually hint where the
316		* error might be.
317		*
318		* Something like:
319		*
320		* ...nd (= alpha beta) (= beta delta))
321		* ^
322		*
323		* Implementation (as on 2014/04/24):
324		*
325		* > if suggested pointer by lexer is under a "simple char", move to
326		* start of the word and print pointer there.
327		*
328		* > in the other case, it tries to find the nearest word in the error
329		* message passed along. if it can't find it, we don't add this
330		* visual hint, as experimentally position suggested by lexer was
331		* found to be totally unhelpful. (TODO: fix this upstream to
332		* improve)
333		*/
334	121	std::string parseErrorHelper(const char* lineStart,
335		std::size_t lineLength,
336		std::size_t charPositionInLine,
337		const std::string& message)
338		{
339		// Is it a multi-line message
340	121	bool multilineMessage = (message.find('\n') != string::npos);
341		// Useful only if it is a multi-line message
342	121	int firstLineEnd = message.find('\n');
343
344	242	std::ostringstream ss, slicess;
345
346		// Keep first line of message
347	121	if(multilineMessage) {
348	6	ss << message.substr(0, firstLineEnd) << endl << endl;
349		} else {
350	115	ss << message << endl << endl;
351		}
352
353	121	std::size_t posSliceStart =
354	121	(charPositionInLine <= 50) ? 0 : charPositionInLine - 50 + 5;
355	121	std::size_t posSliceEnd = posSliceStart + 70;
356	121	std::size_t caretPos = 0;
357	121	std::size_t caretPosExtra = 0; // for inital intendation, epilipses etc.
358
359	121	ss << " "; caretPosExtra += 2;
360	121	if(posSliceStart > 0) {
361	11	ss << "..."; caretPosExtra += 3;
362		}
363
364	2483	for (std::size_t i = posSliceStart; i < lineLength && lineStart[i] != '\n';
365		++i)
366		{
367	2366	if(i == posSliceEnd) {
368	4	ss << "...";
369	4	break;
370		}
371	2362	if(i < charPositionInLine) { caretPos++; }
372
373	2362	if(!isprint(lineStart[i])) {
374		// non-printable character, something wrong, bail out
375		return message;
376		}
377
378	2362	ss << (lineStart[i]);
379	2362	slicess << (lineStart[i]);
380		}
381
382		// adjust position of caret, based on slice and message
383		{
384	121	int caretPosOrig = caretPos;
385	222	string slice = slicess.str();
386	121	if(isSimpleChar(slice[caretPos])) {
387		// if alphanumeric, try to go to beginning of word/number
388	187	while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; }
389	57	if(caretPos == 0 && posSliceStart > 0) {
390		// reached start and this is not really the start? bail out
391		return message;
392		} else {
393		// likely it is also in original message? if so, very likely
394		// we found the right place
395	114	string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1));
396	57	size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
397	57	Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
398	57	if( matchLoc != string::npos ) {
399	16	Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl;
400		}
401		}
402		} else {
403	64	bool foundCaretPos = false;
404
405	170	for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) {
406		// go to nearest alphanumeric string (before current position),
407		// see if that word can be found in original message. If so,
408		// point to that, else keep pointer where it was.
409	106	int nearestWordEn = caretPos - 1;
410	234	while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) {
411	64	--nearestWordEn;
412		}
413	106	if(isSimpleChar(slice[nearestWordEn])) {
414	102	int nearestWordSt = nearestWordEn;
415	396	while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) {
416	147	--nearestWordSt;
417		}
418	204	string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1));
419	102	size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
420	102	Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl;
421	102	Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
422	102	if( matchLoc != string::npos ) {
423	88	Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at "
424	44	<< nearestWordSt << std::endl;
425	44	foundCaretPos = true;
426		}
427	102	caretPos = nearestWordSt;
428		}
429		}
430	64	if( !foundCaretPos) {
431		// this doesn't look good. caret generally getting printed
432		// at unhelpful positions. improve upstream?
433	20	return message;
434		}
435		}
436	101	caretPos += caretPosExtra;
437		}// end of caret position computation/heuristics
438
439	101	ss << std::endl;
440	2637	while( caretPos-- > 0 ) {
441	1268	ss << ' ';
442		}
443	101	ss << '^' << endl;
444	101	if(multilineMessage) {
445	6	ss << message.substr(firstLineEnd, message.size() - firstLineEnd);;
446		}
447	101	return ss.str();
448		}
449
450	121	void AntlrInput::parseError(const std::string& message, bool eofException)
451		{
452	242	auto lineLength = d_antlr3InputStream->sizeBuf
453	121	- (static_cast<char*>(d_antlr3InputStream->currentLine)
454	121	- static_cast<char*>(d_antlr3InputStream->data));
455		std::string updatedMessage = parseErrorHelper(
456	121	(const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream),
457		lineLength,
458	121	d_lexer->getCharPositionInLine(d_lexer),
459	363	message);
460
461	242	Debug("parser") << "Throwing exception: "
462	242	<< (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
463	242	<< d_lexer->getLine(d_lexer) << "."
464	242	<< d_lexer->getCharPositionInLine(d_lexer) << ": "
465	121	<< updatedMessage << endl;
466	121	if(eofException) {
467		throw ParserEndOfFileException(message,
468	6	(const char*)d_lexer->rec->state->tokSource->fileName->chars,
469	6	d_lexer->getLine(d_lexer),
470	18	d_lexer->getCharPositionInLine(d_lexer));
471		} else {
472		throw ParserException(updatedMessage,
473	115	(const char*)d_lexer->rec->state->tokSource->fileName->chars,
474	115	d_lexer->getLine(d_lexer),
475	345	d_lexer->getCharPositionInLine(d_lexer));
476		}
477		}
478
479
480	6468	void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
481	6468	d_lexer = pLexer;
482
483	6468	pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
484	6468	if( pTokenFactory != NULL ) {
485	6468	pTokenFactory->close(pTokenFactory);
486		}
487
488		/* 2lookahead should be sufficient, but we give ourselves some breathing room. /
489	6468	pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
490	6468	if( pTokenFactory == NULL ) {
491		throw InputStreamException("Couldn't create token factory.");
492		}
493	6468	d_lexer->rec->state->tokFactory = pTokenFactory;
494
495	6468	pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
496	6468	if( buffer == NULL ) {
497		throw InputStreamException("Couldn't create token buffer.");
498		}
499
500	6468	d_tokenBuffer = buffer;
501
502		// Override default lexer error reporting
503	6468	d_lexer->rec->reportError = &lexerError;
504		// Override default nextToken function, just to prevent exceptions escaping.
505	6468	d_lexer->rec->state->tokSource->nextToken = &nextToken;
506	6468	}
507
508	6468	void AntlrInput::setParser(Parser& parser) {
509		// ANTLR isn't using super in the lexer or the parser, AFAICT.
510		// We could also use @lexer/parser::context to add a field to the generated
511		// objects, but then it would have to be declared separately in every
512		// language's grammar and we'd have to in the address of the field anyway.
513	6468	d_lexer->super = &parser;
514	6468	d_parser->super = &parser;
515	6468	}
516
517	6468	void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
518	6468	d_parser = pParser;
519		// d_parser->rec->match = &match;
520	6468	d_parser->rec->reportError = &reportError;
521		/* Don't try to recover from a parse error. */
522		// [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
523	12936	d_parser->rec->recoverFromMismatchedToken =
524		(void* ()(ANTLR3_BASE_RECOGNIZER_struct, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
525	12936	d_parser->rec->mismatch;
526	6468	}
527
528		} // namespace parser
529	29562	} // namespace cvc5