Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	src/parser/antlr_input.cpp	Lines:	209	221	94.6 %
Date:	2021-05-21	Branches:	234	468	50.0 %


/******************************************************************************
 * Top contributors (to current version):
 *   Christopher L. Conway, Kshitij Bansal, Tim King
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * A super-class for ANTLR-generated input language parsers.
 */

#include "parser/antlr_input.h"

#include <antlr3.h>
#include <limits>

#include "base/check.h"
#include "base/output.h"
#include "parser/antlr_line_buffered_input.h"
#include "parser/bounded_token_buffer.h"
#include "parser/bounded_token_factory.h"
#include "parser/cvc/cvc_input.h"
#include "parser/input.h"
#include "parser/memory_mapped_input_buffer.h"
#include "parser/parser.h"
#include "parser/parser_exception.h"
#include "parser/smt2/smt2_input.h"
#include "parser/smt2/sygus_input.h"
#include "parser/tptp/tptp_input.h"

using namespace std;
using namespace cvc5;
using namespace cvc5::parser;
using namespace cvc5::kind;

namespace cvc5 {
namespace parser {

// These functions exactly wrap the antlr3 source inconsistencies.
// These are the only location CVC5_ANTLR3_OLD_INPUT_STREAM ifdefs appear.
// No other sanity checking happens;
pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
                                             const std::string& name,
                                             LineBuffer* line_buffer);
pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name);
pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
                                            uint32_t size,
                                            const std::string& name);

pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
                                             const std::string& name,
                                             LineBuffer* line_buffer) {
  pANTLR3_INPUT_STREAM inputStream = NULL;
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());

#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
  inputStream =
      antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer);
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT,
                                            name_duplicate, line_buffer);
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */

  free(name_duplicate);
  return inputStream;
}

pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){
  pANTLR3_INPUT_STREAM input = NULL;
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());

  // libantlr3c v3.2 isn't source-compatible with v3.4
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
  input = antlr3AsciiFileStreamNew(name_duplicate);
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT);
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */

  free(name_duplicate);
  return input;
}


pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
                                            uint32_t size,
                                            const std::string& name){
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
  pANTLR3_INPUT_STREAM inputStream = NULL;
  /* Create an ANTLR input backed by the buffer. */
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
  inputStream =
    antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate);
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  inputStream =
    antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size,
                          name_duplicate);
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  free(name_duplicate);
  return inputStream;
}

AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
                                   bool fileIsTemporary,
                                   pANTLR3_UINT8 inputString,
                                   LineBuffer* line_buffer)
    : InputStream(name, fileIsTemporary),
      d_input(input),
      d_inputString(inputString),
      d_line_buffer(line_buffer) {
  Assert(input != NULL);
  input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
}

AntlrInputStream::~AntlrInputStream() {
  d_input->free(d_input);
  if(d_inputString != NULL){
    free(d_inputString);
  }
  if (d_line_buffer != NULL) {
    delete d_line_buffer;
  }
}

pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
  return d_input;
}



AntlrInputStream*
AntlrInputStream::newFileInputStream(const std::string& name,
                                     bool useMmap)
{
#ifdef _WIN32
  if(useMmap) {
    useMmap = false;
  }
#endif
  pANTLR3_INPUT_STREAM input = NULL;
  if(useMmap) {
    input = MemoryMappedInputBufferNew(name);
  } else {
    input = newAntlr3FileStream(name);
  }
  if(input == NULL) {
    throw InputStreamException("Couldn't open file: " + name);
  }
  return new AntlrInputStream(name, input, false, NULL, NULL);
}

AntlrInputStream* AntlrInputStream::newStreamInputStream(
    std::istream& input, const std::string& name)
{
  pANTLR3_INPUT_STREAM inputStream = NULL;
  pANTLR3_UINT8 inputStringCopy = NULL;
  LineBuffer* line_buffer = new LineBuffer(&input);
  inputStream = newAntlr3BufferedStream(input, name, line_buffer);
  return new AntlrInputStream(name, inputStream, false, inputStringCopy,
                              line_buffer);
}


AntlrInputStream*
AntlrInputStream::newStringInputStream(const std::string& input,
                                       const std::string& name)
{
  size_t input_size = input.size();
  Assert(input_size <= std::numeric_limits<uint32_t>::max());

  // Ownership of input_duplicate  is transferred to the AntlrInputStream.
  pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str());

  if( input_duplicate == NULL ) {
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
  }

  pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name);
  if( inputStream==NULL ) {
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
  }
  return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL);
}

AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) {
  using namespace language::input;

  AntlrInput* input;

  switch(lang) {
    case LANG_CVC:
    {
      input = new CvcInput(inputStream);
      break;
    }

  case LANG_SYGUS_V2: input = new SygusInput(inputStream); break;

  case LANG_TPTP:
    input = new TptpInput(inputStream);
    break;

  default:
    if (language::isInputLang_smt2(lang))
    {
      input = new Smt2Input(inputStream);
    }
    else
    {
      std::stringstream ss;
      ss << "unable to detect input file format, try --lang ";
      throw InputStreamException(ss.str());
    }
  }

  return input;
}

AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
    Input(inputStream),
    d_lookahead(lookahead),
    d_lexer(NULL),
    d_parser(NULL),
    d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
    d_tokenBuffer(NULL) {
}

/*
AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
  Parser(exprManager,name),
  d_lookahead(lookahead) {

}
*/

/*
AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
  Input(exprManager,name),
  d_lookahead(lookahead),
  d_lexer(NULL),
  d_parser(NULL),
  d_tokenStream(NULL) {

  char* inputStr = strdup(input.c_str());
  char* nameStr = strdup(name.c_str());
  if( inputStr==NULL || nameStr==NULL ) {
    throw ParserException("Couldn't initialize string input: '" + input + "'");
  }
  d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
  if( d_inputStream == NULL ) {
    throw ParserException("Couldn't create input stream for string: '" + input + "'");
  }

}
*/

AntlrInput::~AntlrInput() {
  BoundedTokenBufferFree(d_tokenBuffer);
}

pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
  return d_tokenBuffer->commonTstream;
}

void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
  pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
  Assert(lexer != NULL);
  Parser *parser = (Parser*)(lexer->super);
  Assert(parser != NULL);
  AntlrInput *input = (AntlrInput*) parser->getInput();
  Assert(input != NULL);

  /* Call the error display routine *if* there's not already a
   * parse error pending.  If a parser error is pending, this
   * error is probably less important, so we just drop it. */
  if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
    input->parseError("Error finding next token.");
  }
}

void AntlrInput::warning(const std::string& message) {
  Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
}



/**
 * characters considered part of a simple symbol in SMTLIB.
 *
 * TODO: Ideally this code shouldn't be smtlib specific (should work
 * with CVC language too), but this per-language specialization has
 * been left for a later point.
 */
inline bool isSimpleChar(char ch) {
  return isalnum(ch) || (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL);
}

size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) {
  size_t st = 0;
  size_t N = input.size();
  while(st < N) {
    while( st < N && (*isWordChar)(input[st])  == false ) st++;
    size_t en = st;
    while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++;
    if(en - st + 1 == pattern.size()) {
      bool match = true;
      for(size_t i = 0; match && i < pattern.size(); ++i) {
        match &= (pattern[i] == input[st+i]);
      }
      if(match == true) {
        return st;
      }
    }
    st = en + 1;
  }
  return string::npos;
}

/**
 * Gets part of original input and tries to visually hint where the
 * error might be.
 *
 * Something like:
 *
 *   ...nd (= alpha beta) (= beta delta))
 *                                ^
 *
 * Implementation (as on 2014/04/24):
 *
 * > if suggested pointer by lexer is under a "simple char", move to
 *   start of the word and print pointer there.
 *
 * > in the other case, it tries to find the nearest word in the error
 *   message passed along. if it can't find it, we don't add this
 *   visual hint, as experimentally position suggested by lexer was
 *   found to be totally unhelpful. (TODO: fix this upstream to
 *   improve)
 */
std::string parseErrorHelper(const char* lineStart,
                             std::size_t lineLength,
                             std::size_t charPositionInLine,
                             const std::string& message)
{
  // Is it a multi-line message
  bool multilineMessage = (message.find('\n') != string::npos);
  // Useful only if it is a multi-line message
  int firstLineEnd = message.find('\n');

  std::ostringstream ss, slicess;

  // Keep first line of message
  if(multilineMessage) {
    ss << message.substr(0, firstLineEnd) << endl << endl;
  } else {
    ss << message << endl << endl;
  }

  std::size_t posSliceStart =
      (charPositionInLine <= 50) ? 0 : charPositionInLine - 50 + 5;
  std::size_t posSliceEnd = posSliceStart + 70;
  std::size_t caretPos = 0;
  std::size_t caretPosExtra = 0;  // for inital intendation, epilipses etc.

  ss << "  "; caretPosExtra += 2;
  if(posSliceStart > 0) {
    ss << "..."; caretPosExtra += 3;
  }

  for (std::size_t i = posSliceStart; i < lineLength && lineStart[i] != '\n';
       ++i)
  {
    if(i == posSliceEnd) {
      ss << "...";
      break;
    }
    if(i < charPositionInLine) { caretPos++; }

    if(!isprint(lineStart[i])) {
      // non-printable character, something wrong, bail out
      return message;
    }

    ss << (lineStart[i]);
    slicess << (lineStart[i]);
  }

  // adjust position of caret, based on slice and message
  {
    int caretPosOrig = caretPos;
    string slice = slicess.str();
    if(isSimpleChar(slice[caretPos])) {
      // if alphanumeric, try to go to beginning of word/number
      while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; }
      if(caretPos == 0 && posSliceStart > 0) {
        // reached start and this is not really the start? bail out
        return message;
      } else {
        // likely it is also in original message? if so, very likely
        // we found the right place
        string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1));
        size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
        Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
        if( matchLoc != string::npos ) {
          Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl;
        }
      }
    } else {
      bool foundCaretPos = false;

      for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) {
        // go to nearest alphanumeric string (before current position),
        // see if that word can be found in original message. If so,
        // point to that, else keep pointer where it was.
        int nearestWordEn = caretPos - 1;
        while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) {
          --nearestWordEn;
        }
        if(isSimpleChar(slice[nearestWordEn])) {
          int nearestWordSt = nearestWordEn;
          while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) {
            --nearestWordSt;
          }
          string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1));
          size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
          Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl;
          Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
          if( matchLoc != string::npos ) {
            Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at "
                                    << nearestWordSt << std::endl;
            foundCaretPos = true;
          }
          caretPos = nearestWordSt;
        }
      }
      if( !foundCaretPos) {
        // this doesn't look good. caret generally getting printed
        // at unhelpful positions. improve upstream?
        return message;
      }
    }
    caretPos += caretPosExtra;
  }// end of caret position computation/heuristics

  ss << std::endl;
  while( caretPos-- > 0 ) {
    ss << ' ';
  }
  ss << '^' << endl;
  if(multilineMessage) {
     ss << message.substr(firstLineEnd, message.size() - firstLineEnd);;
  }
  return ss.str();
}

void AntlrInput::parseError(const std::string& message, bool eofException)
{
  auto lineLength = d_antlr3InputStream->sizeBuf
                    - (static_cast<char*>(d_antlr3InputStream->currentLine)
                       - static_cast<char*>(d_antlr3InputStream->data));
  std::string updatedMessage = parseErrorHelper(
      (const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream),
      lineLength,
      d_lexer->getCharPositionInLine(d_lexer),
      message);

  Debug("parser") << "Throwing exception: "
      << (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
      << d_lexer->getLine(d_lexer) << "."
      << d_lexer->getCharPositionInLine(d_lexer) << ": "
      << updatedMessage << endl;
  if(eofException) {
    throw ParserEndOfFileException(message,
                                   (const char*)d_lexer->rec->state->tokSource->fileName->chars,
                                   d_lexer->getLine(d_lexer),
                                   d_lexer->getCharPositionInLine(d_lexer));
  } else {
    throw ParserException(updatedMessage,
                          (const char*)d_lexer->rec->state->tokSource->fileName->chars,
                          d_lexer->getLine(d_lexer),
                          d_lexer->getCharPositionInLine(d_lexer));
  }
}


void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
  d_lexer = pLexer;

  pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
  if( pTokenFactory != NULL ) {
    pTokenFactory->close(pTokenFactory);
  }

  /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
  pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
  if( pTokenFactory == NULL ) {
    throw InputStreamException("Couldn't create token factory.");
  }
  d_lexer->rec->state->tokFactory = pTokenFactory;

  pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
  if( buffer == NULL ) {
    throw InputStreamException("Couldn't create token buffer.");
  }

  d_tokenBuffer = buffer;

  // Override default lexer error reporting
  d_lexer->rec->reportError = &lexerError;
  // Override default nextToken function, just to prevent exceptions escaping.
  d_lexer->rec->state->tokSource->nextToken = &nextToken;
}

void AntlrInput::setParser(Parser& parser) {
  // ANTLR isn't using super in the lexer or the parser, AFAICT.
  // We could also use @lexer/parser::context to add a field to the generated
  // objects, but then it would have to be declared separately in every
  // language's grammar and we'd have to in the address of the field anyway.
  d_lexer->super = &parser;
  d_parser->super = &parser;
}

void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
  d_parser = pParser;
//  d_parser->rec->match = &match;
  d_parser->rec->reportError = &reportError;
  /* Don't try to recover from a parse error. */
  // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
  d_parser->rec->recoverFromMismatchedToken =
      (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
      d_parser->rec->mismatch;
}

}  // namespace parser
}  // namespace cvc5


Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/******************************************************************************
2		* Top contributors (to current version):
3		* Christopher L. Conway, Kshitij Bansal, Tim King
4		*
5		* This file is part of the cvc5 project.
6		*
7		* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8		* in the top-level source directory and their institutional affiliations.
9		* All rights reserved. See the file COPYING in the top-level source
10		* directory for licensing information.
11		* ****************************************************************************
12		*
13		* A super-class for ANTLR-generated input language parsers.
14		*/
15
16		#include "parser/antlr_input.h"
17
18		#include <antlr3.h>
19		#include <limits>
20
21		#include "base/check.h"
22		#include "base/output.h"
23		#include "parser/antlr_line_buffered_input.h"
24		#include "parser/bounded_token_buffer.h"
25		#include "parser/bounded_token_factory.h"
26		#include "parser/cvc/cvc_input.h"
27		#include "parser/input.h"
28		#include "parser/memory_mapped_input_buffer.h"
29		#include "parser/parser.h"
30		#include "parser/parser_exception.h"
31		#include "parser/smt2/smt2_input.h"
32		#include "parser/smt2/sygus_input.h"
33		#include "parser/tptp/tptp_input.h"
34
35		using namespace std;
36		using namespace cvc5;
37		using namespace cvc5::parser;
38		using namespace cvc5::kind;
39
40		namespace cvc5 {
41		namespace parser {
42
43		// These functions exactly wrap the antlr3 source inconsistencies.
44		// These are the only location CVC5_ANTLR3_OLD_INPUT_STREAM ifdefs appear.
45		// No other sanity checking happens;
46		pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
47		const std::string& name,
48		LineBuffer* line_buffer);
49		pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name);
50		pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
51		uint32_t size,
52		const std::string& name);
53
54	4	pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
55		const std::string& name,
56		LineBuffer* line_buffer) {
57	4	pANTLR3_INPUT_STREAM inputStream = NULL;
58	4	pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
59
60		#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
61		inputStream =
62		antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer);
63		#else /* CVC5_ANTLR3_OLD_INPUT_STREAM */
64	4	inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT,
65		name_duplicate, line_buffer);
66		#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
67
68	4	free(name_duplicate);
69	4	return inputStream;
70		}
71
72	5730	pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){
73	5730	pANTLR3_INPUT_STREAM input = NULL;
74	5730	pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
75
76		// libantlr3c v3.2 isn't source-compatible with v3.4
77		#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
78		input = antlr3AsciiFileStreamNew(name_duplicate);
79		#else /* CVC5_ANTLR3_OLD_INPUT_STREAM */
80	5730	input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT);
81		#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
82
83	5730	free(name_duplicate);
84	5730	return input;
85		}
86
87
88	287	pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
89		uint32_t size,
90		const std::string& name){
91	287	pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
92	287	pANTLR3_INPUT_STREAM inputStream = NULL;
93		/* Create an ANTLR input backed by the buffer. */
94		#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
95		inputStream =
96		antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate);
97		#else /* CVC5_ANTLR3_OLD_INPUT_STREAM */
98	287	inputStream =
99		antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size,
100		name_duplicate);
101		#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
102	287	free(name_duplicate);
103	287	return inputStream;
104		}
105
106	6021	AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
107		bool fileIsTemporary,
108		pANTLR3_UINT8 inputString,
109	6021	LineBuffer* line_buffer)
110		: InputStream(name, fileIsTemporary),
111		d_input(input),
112		d_inputString(inputString),
113	6021	d_line_buffer(line_buffer) {
114	6021	Assert(input != NULL);
115	6021	input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
116	6021	}
117
118	18063	AntlrInputStream::~AntlrInputStream() {
119	6021	d_input->free(d_input);
120	6021	if(d_inputString != NULL){
121	287	free(d_inputString);
122		}
123	6021	if (d_line_buffer != NULL) {
124	4	delete d_line_buffer;
125		}
126	12042	}
127
128	12042	pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
129	12042	return d_input;
130		}
131
132
133
134		AntlrInputStream*
135	5730	AntlrInputStream::newFileInputStream(const std::string& name,
136		bool useMmap)
137		{
138		#ifdef _WIN32
139		if(useMmap) {
140		useMmap = false;
141		}
142		#endif
143	5730	pANTLR3_INPUT_STREAM input = NULL;
144	5730	if(useMmap) {
145		input = MemoryMappedInputBufferNew(name);
146		} else {
147	5730	input = newAntlr3FileStream(name);
148		}
149	5730	if(input == NULL) {
150		throw InputStreamException("Couldn't open file: " + name);
151		}
152	5730	return new AntlrInputStream(name, input, false, NULL, NULL);
153		}
154
155	4	AntlrInputStream* AntlrInputStream::newStreamInputStream(
156		std::istream& input, const std::string& name)
157		{
158	4	pANTLR3_INPUT_STREAM inputStream = NULL;
159	4	pANTLR3_UINT8 inputStringCopy = NULL;
160	4	LineBuffer* line_buffer = new LineBuffer(&input);
161	4	inputStream = newAntlr3BufferedStream(input, name, line_buffer);
162		return new AntlrInputStream(name, inputStream, false, inputStringCopy,
163	4	line_buffer);
164		}
165
166
167		AntlrInputStream*
168	287	AntlrInputStream::newStringInputStream(const std::string& input,
169		const std::string& name)
170		{
171	287	size_t input_size = input.size();
172	287	Assert(input_size <= std::numeric_limits<uint32_t>::max());
173
174		// Ownership of input_duplicate is transferred to the AntlrInputStream.
175	287	pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str());
176
177	287	if( input_duplicate == NULL ) {
178		throw InputStreamException("Couldn't initialize string input: '" + input + "'");
179		}
180
181	287	pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name);
182	287	if( inputStream==NULL ) {
183		throw InputStreamException("Couldn't initialize string input: '" + input + "'");
184		}
185	287	return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL);
186		}
187
188	6021	AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) {
189		using namespace language::input;
190
191		AntlrInput* input;
192
193	6021	switch(lang) {
194	944	case LANG_CVC:
195		{
196	944	input = new CvcInput(inputStream);
197	944	break;
198		}
199
200	191	case LANG_SYGUS_V2: input = new SygusInput(inputStream); break;
201
202	43	case LANG_TPTP:
203	43	input = new TptpInput(inputStream);
204	43	break;
205
206	4843	default:
207	4843	if (language::isInputLang_smt2(lang))
208		{
209	4843	input = new Smt2Input(inputStream);
210		}
211		else
212		{
213		std::stringstream ss;
214		ss << "unable to detect input file format, try --lang ";
215		throw InputStreamException(ss.str());
216		}
217		}
218
219	6021	return input;
220		}
221
222	6021	AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
223		Input(inputStream),
224		d_lookahead(lookahead),
225		d_lexer(NULL),
226		d_parser(NULL),
227	6021	d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
228	12042	d_tokenBuffer(NULL) {
229	6021	}
230
231		/*
232		AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
233		Parser(exprManager,name),
234		d_lookahead(lookahead) {
235
236		}
237		*/
238
239		/*
240		AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
241		Input(exprManager,name),
242		d_lookahead(lookahead),
243		d_lexer(NULL),
244		d_parser(NULL),
245		d_tokenStream(NULL) {
246
247		char* inputStr = strdup(input.c_str());
248		char* nameStr = strdup(name.c_str());
249		if( inputStr==NULL \|\| nameStr==NULL ) {
250		throw ParserException("Couldn't initialize string input: '" + input + "'");
251		}
252		d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
253		if( d_inputStream == NULL ) {
254		throw ParserException("Couldn't create input stream for string: '" + input + "'");
255		}
256
257		}
258		*/
259
260	12042	AntlrInput::~AntlrInput() {
261	6021	BoundedTokenBufferFree(d_tokenBuffer);
262	6021	}
263
264	6021	pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
265	6021	return d_tokenBuffer->commonTstream;
266		}
267
268	12	void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
269	12	pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
270	12	Assert(lexer != NULL);
271	12	Parser parser = (Parser)(lexer->super);
272	12	Assert(parser != NULL);
273	12	AntlrInput input = (AntlrInput) parser->getInput();
274	12	Assert(input != NULL);
275
276		/* Call the error display routine if there's not already a
277		* parse error pending. If a parser error is pending, this
278		* error is probably less important, so we just drop it. */
279	12	if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
280	24	input->parseError("Error finding next token.");
281		}
282		}
283
284	214	void AntlrInput::warning(const std::string& message) {
285	214	Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
286	214	}
287
288
289
290		/**
291		* characters considered part of a simple symbol in SMTLIB.
292		*
293		* TODO: Ideally this code shouldn't be smtlib specific (should work
294		* with CVC language too), but this per-language specialization has
295		* been left for a later point.
296		*/
297	6524	inline bool isSimpleChar(char ch) {
298	6524	return isalnum(ch) \|\| (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL);
299		}
300
301	159	size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) {
302	159	size_t st = 0;
303	159	size_t N = input.size();
304	1677	while(st < N) {
305	1759	while( st < N && (*isWordChar)(input[st]) == false ) st++;
306	819	size_t en = st;
307	4063	while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++;
308	819	if(en - st + 1 == pattern.size()) {
309	146	bool match = true;
310	393	for(size_t i = 0; match && i < pattern.size(); ++i) {
311	247	match &= (pattern[i] == input[st+i]);
312		}
313	146	if(match == true) {
314	60	return st;
315		}
316		}
317	759	st = en + 1;
318		}
319	99	return string::npos;
320		}
321
322		/**
323		* Gets part of original input and tries to visually hint where the
324		* error might be.
325		*
326		* Something like:
327		*
328		* ...nd (= alpha beta) (= beta delta))
329		* ^
330		*
331		* Implementation (as on 2014/04/24):
332		*
333		* > if suggested pointer by lexer is under a "simple char", move to
334		* start of the word and print pointer there.
335		*
336		* > in the other case, it tries to find the nearest word in the error
337		* message passed along. if it can't find it, we don't add this
338		* visual hint, as experimentally position suggested by lexer was
339		* found to be totally unhelpful. (TODO: fix this upstream to
340		* improve)
341		*/
342	121	std::string parseErrorHelper(const char* lineStart,
343		std::size_t lineLength,
344		std::size_t charPositionInLine,
345		const std::string& message)
346		{
347		// Is it a multi-line message
348	121	bool multilineMessage = (message.find('\n') != string::npos);
349		// Useful only if it is a multi-line message
350	121	int firstLineEnd = message.find('\n');
351
352	242	std::ostringstream ss, slicess;
353
354		// Keep first line of message
355	121	if(multilineMessage) {
356	6	ss << message.substr(0, firstLineEnd) << endl << endl;
357		} else {
358	115	ss << message << endl << endl;
359		}
360
361	121	std::size_t posSliceStart =
362	121	(charPositionInLine <= 50) ? 0 : charPositionInLine - 50 + 5;
363	121	std::size_t posSliceEnd = posSliceStart + 70;
364	121	std::size_t caretPos = 0;
365	121	std::size_t caretPosExtra = 0; // for inital intendation, epilipses etc.
366
367	121	ss << " "; caretPosExtra += 2;
368	121	if(posSliceStart > 0) {
369	11	ss << "..."; caretPosExtra += 3;
370		}
371
372	2483	for (std::size_t i = posSliceStart; i < lineLength && lineStart[i] != '\n';
373		++i)
374		{
375	2366	if(i == posSliceEnd) {
376	4	ss << "...";
377	4	break;
378		}
379	2362	if(i < charPositionInLine) { caretPos++; }
380
381	2362	if(!isprint(lineStart[i])) {
382		// non-printable character, something wrong, bail out
383		return message;
384		}
385
386	2362	ss << (lineStart[i]);
387	2362	slicess << (lineStart[i]);
388		}
389
390		// adjust position of caret, based on slice and message
391		{
392	121	int caretPosOrig = caretPos;
393	222	string slice = slicess.str();
394	121	if(isSimpleChar(slice[caretPos])) {
395		// if alphanumeric, try to go to beginning of word/number
396	187	while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; }
397	57	if(caretPos == 0 && posSliceStart > 0) {
398		// reached start and this is not really the start? bail out
399		return message;
400		} else {
401		// likely it is also in original message? if so, very likely
402		// we found the right place
403	114	string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1));
404	57	size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
405	57	Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
406	57	if( matchLoc != string::npos ) {
407	16	Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl;
408		}
409		}
410		} else {
411	64	bool foundCaretPos = false;
412
413	170	for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) {
414		// go to nearest alphanumeric string (before current position),
415		// see if that word can be found in original message. If so,
416		// point to that, else keep pointer where it was.
417	106	int nearestWordEn = caretPos - 1;
418	234	while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) {
419	64	--nearestWordEn;
420		}
421	106	if(isSimpleChar(slice[nearestWordEn])) {
422	102	int nearestWordSt = nearestWordEn;
423	396	while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) {
424	147	--nearestWordSt;
425		}
426	204	string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1));
427	102	size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
428	102	Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl;
429	102	Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
430	102	if( matchLoc != string::npos ) {
431	88	Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at "
432	44	<< nearestWordSt << std::endl;
433	44	foundCaretPos = true;
434		}
435	102	caretPos = nearestWordSt;
436		}
437		}
438	64	if( !foundCaretPos) {
439		// this doesn't look good. caret generally getting printed
440		// at unhelpful positions. improve upstream?
441	20	return message;
442		}
443		}
444	101	caretPos += caretPosExtra;
445		}// end of caret position computation/heuristics
446
447	101	ss << std::endl;
448	2637	while( caretPos-- > 0 ) {
449	1268	ss << ' ';
450		}
451	101	ss << '^' << endl;
452	101	if(multilineMessage) {
453	6	ss << message.substr(firstLineEnd, message.size() - firstLineEnd);;
454		}
455	101	return ss.str();
456		}
457
458	121	void AntlrInput::parseError(const std::string& message, bool eofException)
459		{
460	242	auto lineLength = d_antlr3InputStream->sizeBuf
461	121	- (static_cast<char*>(d_antlr3InputStream->currentLine)
462	121	- static_cast<char*>(d_antlr3InputStream->data));
463		std::string updatedMessage = parseErrorHelper(
464	121	(const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream),
465		lineLength,
466	121	d_lexer->getCharPositionInLine(d_lexer),
467	363	message);
468
469	242	Debug("parser") << "Throwing exception: "
470	242	<< (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
471	242	<< d_lexer->getLine(d_lexer) << "."
472	242	<< d_lexer->getCharPositionInLine(d_lexer) << ": "
473	121	<< updatedMessage << endl;
474	121	if(eofException) {
475		throw ParserEndOfFileException(message,
476	6	(const char*)d_lexer->rec->state->tokSource->fileName->chars,
477	6	d_lexer->getLine(d_lexer),
478	18	d_lexer->getCharPositionInLine(d_lexer));
479		} else {
480		throw ParserException(updatedMessage,
481	115	(const char*)d_lexer->rec->state->tokSource->fileName->chars,
482	115	d_lexer->getLine(d_lexer),
483	345	d_lexer->getCharPositionInLine(d_lexer));
484		}
485		}
486
487
488	6021	void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
489	6021	d_lexer = pLexer;
490
491	6021	pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
492	6021	if( pTokenFactory != NULL ) {
493	6021	pTokenFactory->close(pTokenFactory);
494		}
495
496		/* 2lookahead should be sufficient, but we give ourselves some breathing room. /
497	6021	pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
498	6021	if( pTokenFactory == NULL ) {
499		throw InputStreamException("Couldn't create token factory.");
500		}
501	6021	d_lexer->rec->state->tokFactory = pTokenFactory;
502
503	6021	pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
504	6021	if( buffer == NULL ) {
505		throw InputStreamException("Couldn't create token buffer.");
506		}
507
508	6021	d_tokenBuffer = buffer;
509
510		// Override default lexer error reporting
511	6021	d_lexer->rec->reportError = &lexerError;
512		// Override default nextToken function, just to prevent exceptions escaping.
513	6021	d_lexer->rec->state->tokSource->nextToken = &nextToken;
514	6021	}
515
516	6021	void AntlrInput::setParser(Parser& parser) {
517		// ANTLR isn't using super in the lexer or the parser, AFAICT.
518		// We could also use @lexer/parser::context to add a field to the generated
519		// objects, but then it would have to be declared separately in every
520		// language's grammar and we'd have to in the address of the field anyway.
521	6021	d_lexer->super = &parser;
522	6021	d_parser->super = &parser;
523	6021	}
524
525	6021	void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
526	6021	d_parser = pParser;
527		// d_parser->rec->match = &match;
528	6021	d_parser->rec->reportError = &reportError;
529		/* Don't try to recover from a parse error. */
530		// [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
531	12042	d_parser->rec->recoverFromMismatchedToken =
532		(void* ()(ANTLR3_BASE_RECOGNIZER_struct, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
533	12042	d_parser->rec->mismatch;
534	6021	}
535
536		} // namespace parser
537	27723	} // namespace cvc5