Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	src/parser/antlr_input.cpp	Lines:	199	213	93.4 %
Date:	2021-09-29	Branches:	221	462	47.8 %


/******************************************************************************
 * Top contributors (to current version):
 *   Christopher L. Conway, Kshitij Bansal, Tim King
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * A super-class for ANTLR-generated input language parsers.
 */

#include "parser/antlr_input.h"

#include <antlr3.h>
#include <limits>

#include "base/check.h"
#include "base/output.h"
#include "parser/antlr_line_buffered_input.h"
#include "parser/bounded_token_buffer.h"
#include "parser/bounded_token_factory.h"
#include "parser/input.h"
#include "parser/memory_mapped_input_buffer.h"
#include "parser/parser.h"
#include "parser/parser_exception.h"
#include "parser/smt2/smt2_input.h"
#include "parser/smt2/sygus_input.h"
#include "parser/tptp/tptp_input.h"

using namespace std;
using namespace cvc5;
using namespace cvc5::parser;
using namespace cvc5::kind;

namespace cvc5 {
namespace parser {

// These functions exactly wrap the antlr3 source inconsistencies.
// These are the only location CVC5_ANTLR3_OLD_INPUT_STREAM ifdefs appear.
// No other sanity checking happens;
pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
                                             const std::string& name,
                                             LineBuffer* line_buffer);
pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name);
pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
                                            uint32_t size,
                                            const std::string& name);

pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
                                             const std::string& name,
                                             LineBuffer* line_buffer) {
  pANTLR3_INPUT_STREAM inputStream = NULL;
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());

#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
  inputStream =
      antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer);
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT,
                                            name_duplicate, line_buffer);
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */

  free(name_duplicate);
  return inputStream;
}

pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){
  pANTLR3_INPUT_STREAM input = NULL;
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());

  // libantlr3c v3.2 isn't source-compatible with v3.4
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
  input = antlr3AsciiFileStreamNew(name_duplicate);
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT);
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */

  free(name_duplicate);
  return input;
}


pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
                                            uint32_t size,
                                            const std::string& name){
  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
  pANTLR3_INPUT_STREAM inputStream = NULL;
  /* Create an ANTLR input backed by the buffer. */
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
  inputStream =
    antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate);
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  inputStream =
    antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size,
                          name_duplicate);
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
  free(name_duplicate);
  return inputStream;
}

AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
                                   bool fileIsTemporary,
                                   pANTLR3_UINT8 inputString,
                                   LineBuffer* line_buffer)
    : InputStream(name, fileIsTemporary),
      d_input(input),
      d_inputString(inputString),
      d_line_buffer(line_buffer) {
  Assert(input != NULL);
  input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
}

AntlrInputStream::~AntlrInputStream() {
  d_input->free(d_input);
  if(d_inputString != NULL){
    free(d_inputString);
  }
  if (d_line_buffer != NULL) {
    delete d_line_buffer;
  }
}

pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
  return d_input;
}



AntlrInputStream*
AntlrInputStream::newFileInputStream(const std::string& name,
                                     bool useMmap)
{
#ifdef _WIN32
  if(useMmap) {
    useMmap = false;
  }
#endif
  pANTLR3_INPUT_STREAM input = NULL;
  if(useMmap) {
    input = MemoryMappedInputBufferNew(name);
  } else {
    input = newAntlr3FileStream(name);
  }
  if(input == NULL) {
    throw InputStreamException("Couldn't open file: " + name);
  }
  return new AntlrInputStream(name, input, false, NULL, NULL);
}

AntlrInputStream* AntlrInputStream::newStreamInputStream(
    std::istream& input, const std::string& name)
{
  pANTLR3_INPUT_STREAM inputStream = NULL;
  pANTLR3_UINT8 inputStringCopy = NULL;
  LineBuffer* line_buffer = new LineBuffer(&input);
  inputStream = newAntlr3BufferedStream(input, name, line_buffer);
  return new AntlrInputStream(name, inputStream, false, inputStringCopy,
                              line_buffer);
}


AntlrInputStream*
AntlrInputStream::newStringInputStream(const std::string& input,
                                       const std::string& name)
{
  size_t input_size = input.size();
  Assert(input_size <= std::numeric_limits<uint32_t>::max());

  // Ownership of input_duplicate  is transferred to the AntlrInputStream.
  pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str());

  if( input_duplicate == NULL ) {
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
  }

  pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name);
  if( inputStream==NULL ) {
    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
  }
  return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL);
}

AntlrInput* AntlrInput::newInput(const std::string& lang,
                                 AntlrInputStream& inputStream)
{
  if (lang == "LANG_SYGUS_V2")
  {
    return new SygusInput(inputStream);
  }
  else if (lang == "LANG_TPTP")
  {
    return new TptpInput(inputStream);
  }
  else if (lang == "LANG_SMTLIB_V2_6")
  {
    return new Smt2Input(inputStream);
  }
  else
  {
    throw InputStreamException(
        "unable to detect input file format, try --lang ");
  }
}

AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
    Input(inputStream),
    d_lookahead(lookahead),
    d_lexer(NULL),
    d_parser(NULL),
    d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
    d_tokenBuffer(NULL) {
}

/*
AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
  Parser(exprManager,name),
  d_lookahead(lookahead) {

}
*/

/*
AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
  Input(exprManager,name),
  d_lookahead(lookahead),
  d_lexer(NULL),
  d_parser(NULL),
  d_tokenStream(NULL) {

  char* inputStr = strdup(input.c_str());
  char* nameStr = strdup(name.c_str());
  if( inputStr==NULL || nameStr==NULL ) {
    throw ParserException("Couldn't initialize string input: '" + input + "'");
  }
  d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
  if( d_inputStream == NULL ) {
    throw ParserException("Couldn't create input stream for string: '" + input + "'");
  }

}
*/

AntlrInput::~AntlrInput() {
  BoundedTokenBufferFree(d_tokenBuffer);
}

pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
  return d_tokenBuffer->commonTstream;
}

void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
  pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
  Assert(lexer != NULL);
  Parser *parser = (Parser*)(lexer->super);
  Assert(parser != NULL);
  AntlrInput *input = (AntlrInput*) parser->getInput();
  Assert(input != NULL);

  /* Call the error display routine *if* there's not already a
   * parse error pending.  If a parser error is pending, this
   * error is probably less important, so we just drop it. */
  if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
    input->parseError("Error finding next token.");
  }
}

void AntlrInput::warning(const std::string& message) {
  Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
}



/**
 * characters considered part of a simple symbol in SMTLIB.
 *
 * TODO: Ideally this code shouldn't be smtlib specific (should work
 * with CVC language too), but this per-language specialization has
 * been left for a later point.
 */
inline bool isSimpleChar(char ch) {
  return isalnum(ch) || (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL);
}

size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) {
  size_t st = 0;
  size_t N = input.size();
  while(st < N) {
    while( st < N && (*isWordChar)(input[st])  == false ) st++;
    size_t en = st;
    while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++;
    if(en - st + 1 == pattern.size()) {
      bool match = true;
      for(size_t i = 0; match && i < pattern.size(); ++i) {
        match &= (pattern[i] == input[st+i]);
      }
      if(match == true) {
        return st;
      }
    }
    st = en + 1;
  }
  return string::npos;
}

/**
 * Gets part of original input and tries to visually hint where the
 * error might be.
 *
 * Something like:
 *
 *   ...nd (= alpha beta) (= beta delta))
 *                                ^
 *
 * Implementation (as on 2014/04/24):
 *
 * > if suggested pointer by lexer is under a "simple char", move to
 *   start of the word and print pointer there.
 *
 * > in the other case, it tries to find the nearest word in the error
 *   message passed along. if it can't find it, we don't add this
 *   visual hint, as experimentally position suggested by lexer was
 *   found to be totally unhelpful. (TODO: fix this upstream to
 *   improve)
 */
std::string parseErrorHelper(const char* lineStart,
                             std::size_t lineLength,
                             std::size_t charPositionInLine,
                             const std::string& message)
{
  // Is it a multi-line message
  bool multilineMessage = (message.find('\n') != string::npos);
  // Useful only if it is a multi-line message
  int firstLineEnd = message.find('\n');

  std::ostringstream ss, slicess;

  // Keep first line of message
  if(multilineMessage) {
    ss << message.substr(0, firstLineEnd) << endl << endl;
  } else {
    ss << message << endl << endl;
  }

  std::size_t posSliceStart =
      (charPositionInLine <= 50) ? 0 : charPositionInLine - 50 + 5;
  std::size_t posSliceEnd = posSliceStart + 70;
  std::size_t caretPos = 0;
  std::size_t caretPosExtra = 0;  // for inital intendation, epilipses etc.

  ss << "  "; caretPosExtra += 2;
  if(posSliceStart > 0) {
    ss << "..."; caretPosExtra += 3;
  }

  for (std::size_t i = posSliceStart; i < lineLength && lineStart[i] != '\n';
       ++i)
  {
    if(i == posSliceEnd) {
      ss << "...";
      break;
    }
    if(i < charPositionInLine) { caretPos++; }

    if(!isprint(lineStart[i])) {
      // non-printable character, something wrong, bail out
      return message;
    }

    ss << (lineStart[i]);
    slicess << (lineStart[i]);
  }

  // adjust position of caret, based on slice and message
  {
    int caretPosOrig = caretPos;
    string slice = slicess.str();
    if(isSimpleChar(slice[caretPos])) {
      // if alphanumeric, try to go to beginning of word/number
      while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; }
      if(caretPos == 0 && posSliceStart > 0) {
        // reached start and this is not really the start? bail out
        return message;
      } else {
        // likely it is also in original message? if so, very likely
        // we found the right place
        string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1));
        size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
        Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
        if( matchLoc != string::npos ) {
          Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl;
        }
      }
    } else {
      bool foundCaretPos = false;

      for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) {
        // go to nearest alphanumeric string (before current position),
        // see if that word can be found in original message. If so,
        // point to that, else keep pointer where it was.
        int nearestWordEn = caretPos - 1;
        while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) {
          --nearestWordEn;
        }
        if(isSimpleChar(slice[nearestWordEn])) {
          int nearestWordSt = nearestWordEn;
          while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) {
            --nearestWordSt;
          }
          string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1));
          size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
          Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl;
          Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
          if( matchLoc != string::npos ) {
            Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at "
                                    << nearestWordSt << std::endl;
            foundCaretPos = true;
          }
          caretPos = nearestWordSt;
        }
      }
      if( !foundCaretPos) {
        // this doesn't look good. caret generally getting printed
        // at unhelpful positions. improve upstream?
        return message;
      }
    }
    caretPos += caretPosExtra;
  }// end of caret position computation/heuristics

  ss << std::endl;
  while( caretPos-- > 0 ) {
    ss << ' ';
  }
  ss << '^' << endl;
  if(multilineMessage) {
     ss << message.substr(firstLineEnd, message.size() - firstLineEnd);;
  }
  return ss.str();
}

void AntlrInput::parseError(const std::string& message, bool eofException)
{
  auto lineLength = d_antlr3InputStream->sizeBuf
                    - (static_cast<char*>(d_antlr3InputStream->currentLine)
                       - static_cast<char*>(d_antlr3InputStream->data));
  std::string updatedMessage = parseErrorHelper(
      (const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream),
      lineLength,
      d_lexer->getCharPositionInLine(d_lexer),
      message);

  Debug("parser") << "Throwing exception: "
      << (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
      << d_lexer->getLine(d_lexer) << "."
      << d_lexer->getCharPositionInLine(d_lexer) << ": "
      << updatedMessage << endl;
  if(eofException) {
    throw ParserEndOfFileException(message,
                                   (const char*)d_lexer->rec->state->tokSource->fileName->chars,
                                   d_lexer->getLine(d_lexer),
                                   d_lexer->getCharPositionInLine(d_lexer));
  } else {
    throw ParserException(updatedMessage,
                          (const char*)d_lexer->rec->state->tokSource->fileName->chars,
                          d_lexer->getLine(d_lexer),
                          d_lexer->getCharPositionInLine(d_lexer));
  }
}


void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
  d_lexer = pLexer;

  pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
  if( pTokenFactory != NULL ) {
    pTokenFactory->close(pTokenFactory);
  }

  /* 2*lookahead should be sufficient, but we give ourselves some breathing room. */
  pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
  if( pTokenFactory == NULL ) {
    throw InputStreamException("Couldn't create token factory.");
  }
  d_lexer->rec->state->tokFactory = pTokenFactory;

  pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
  if( buffer == NULL ) {
    throw InputStreamException("Couldn't create token buffer.");
  }

  d_tokenBuffer = buffer;

  // Override default lexer error reporting
  d_lexer->rec->reportError = &lexerError;
  // Override default nextToken function, just to prevent exceptions escaping.
  d_lexer->rec->state->tokSource->nextToken = &nextToken;
}

void AntlrInput::setParser(Parser& parser) {
  // ANTLR isn't using super in the lexer or the parser, AFAICT.
  // We could also use @lexer/parser::context to add a field to the generated
  // objects, but then it would have to be declared separately in every
  // language's grammar and we'd have to in the address of the field anyway.
  d_lexer->super = &parser;
  d_parser->super = &parser;
}

void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
  d_parser = pParser;
//  d_parser->rec->match = &match;
  d_parser->rec->reportError = &reportError;
  /* Don't try to recover from a parse error. */
  // [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
  d_parser->rec->recoverFromMismatchedToken =
      (void* (*)(ANTLR3_BASE_RECOGNIZER_struct*, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
      d_parser->rec->mismatch;
}

}  // namespace parser
}  // namespace cvc5


Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/******************************************************************************
2		* Top contributors (to current version):
3		* Christopher L. Conway, Kshitij Bansal, Tim King
4		*
5		* This file is part of the cvc5 project.
6		*
7		* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8		* in the top-level source directory and their institutional affiliations.
9		* All rights reserved. See the file COPYING in the top-level source
10		* directory for licensing information.
11		* ****************************************************************************
12		*
13		* A super-class for ANTLR-generated input language parsers.
14		*/
15
16		#include "parser/antlr_input.h"
17
18		#include <antlr3.h>
19		#include <limits>
20
21		#include "base/check.h"
22		#include "base/output.h"
23		#include "parser/antlr_line_buffered_input.h"
24		#include "parser/bounded_token_buffer.h"
25		#include "parser/bounded_token_factory.h"
26		#include "parser/input.h"
27		#include "parser/memory_mapped_input_buffer.h"
28		#include "parser/parser.h"
29		#include "parser/parser_exception.h"
30		#include "parser/smt2/smt2_input.h"
31		#include "parser/smt2/sygus_input.h"
32		#include "parser/tptp/tptp_input.h"
33
34		using namespace std;
35		using namespace cvc5;
36		using namespace cvc5::parser;
37		using namespace cvc5::kind;
38
39		namespace cvc5 {
40		namespace parser {
41
42		// These functions exactly wrap the antlr3 source inconsistencies.
43		// These are the only location CVC5_ANTLR3_OLD_INPUT_STREAM ifdefs appear.
44		// No other sanity checking happens;
45		pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
46		const std::string& name,
47		LineBuffer* line_buffer);
48		pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name);
49		pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
50		uint32_t size,
51		const std::string& name);
52
53	4	pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input,
54		const std::string& name,
55		LineBuffer* line_buffer) {
56	4	pANTLR3_INPUT_STREAM inputStream = NULL;
57	4	pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
58
59		#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
60		inputStream =
61		antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer);
62		#else /* CVC5_ANTLR3_OLD_INPUT_STREAM */
63	4	inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT,
64		name_duplicate, line_buffer);
65		#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
66
67	4	free(name_duplicate);
68	4	return inputStream;
69		}
70
71	3903	pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){
72	3903	pANTLR3_INPUT_STREAM input = NULL;
73	3903	pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
74
75		// libantlr3c v3.2 isn't source-compatible with v3.4
76		#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
77		input = antlr3AsciiFileStreamNew(name_duplicate);
78		#else /* CVC5_ANTLR3_OLD_INPUT_STREAM */
79	3903	input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT);
80		#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
81
82	3903	free(name_duplicate);
83	3903	return input;
84		}
85
86
87	163	pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
88		uint32_t size,
89		const std::string& name){
90	163	pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
91	163	pANTLR3_INPUT_STREAM inputStream = NULL;
92		/* Create an ANTLR input backed by the buffer. */
93		#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
94		inputStream =
95		antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate);
96		#else /* CVC5_ANTLR3_OLD_INPUT_STREAM */
97	163	inputStream =
98		antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size,
99		name_duplicate);
100		#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
101	163	free(name_duplicate);
102	163	return inputStream;
103		}
104
105	4070	AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input,
106		bool fileIsTemporary,
107		pANTLR3_UINT8 inputString,
108	4070	LineBuffer* line_buffer)
109		: InputStream(name, fileIsTemporary),
110		d_input(input),
111		d_inputString(inputString),
112	4070	d_line_buffer(line_buffer) {
113	4070	Assert(input != NULL);
114	4070	input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
115	4070	}
116
117	12210	AntlrInputStream::~AntlrInputStream() {
118	4070	d_input->free(d_input);
119	4070	if(d_inputString != NULL){
120	163	free(d_inputString);
121		}
122	4070	if (d_line_buffer != NULL) {
123	4	delete d_line_buffer;
124		}
125	8140	}
126
127	8140	pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
128	8140	return d_input;
129		}
130
131
132
133		AntlrInputStream*
134	3903	AntlrInputStream::newFileInputStream(const std::string& name,
135		bool useMmap)
136		{
137		#ifdef _WIN32
138		if(useMmap) {
139		useMmap = false;
140		}
141		#endif
142	3903	pANTLR3_INPUT_STREAM input = NULL;
143	3903	if(useMmap) {
144		input = MemoryMappedInputBufferNew(name);
145		} else {
146	3903	input = newAntlr3FileStream(name);
147		}
148	3903	if(input == NULL) {
149		throw InputStreamException("Couldn't open file: " + name);
150		}
151	3903	return new AntlrInputStream(name, input, false, NULL, NULL);
152		}
153
154	4	AntlrInputStream* AntlrInputStream::newStreamInputStream(
155		std::istream& input, const std::string& name)
156		{
157	4	pANTLR3_INPUT_STREAM inputStream = NULL;
158	4	pANTLR3_UINT8 inputStringCopy = NULL;
159	4	LineBuffer* line_buffer = new LineBuffer(&input);
160	4	inputStream = newAntlr3BufferedStream(input, name, line_buffer);
161		return new AntlrInputStream(name, inputStream, false, inputStringCopy,
162	4	line_buffer);
163		}
164
165
166		AntlrInputStream*
167	163	AntlrInputStream::newStringInputStream(const std::string& input,
168		const std::string& name)
169		{
170	163	size_t input_size = input.size();
171	163	Assert(input_size <= std::numeric_limits<uint32_t>::max());
172
173		// Ownership of input_duplicate is transferred to the AntlrInputStream.
174	163	pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str());
175
176	163	if( input_duplicate == NULL ) {
177		throw InputStreamException("Couldn't initialize string input: '" + input + "'");
178		}
179
180	163	pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name);
181	163	if( inputStream==NULL ) {
182		throw InputStreamException("Couldn't initialize string input: '" + input + "'");
183		}
184	163	return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL);
185		}
186
187	4070	AntlrInput* AntlrInput::newInput(const std::string& lang,
188		AntlrInputStream& inputStream)
189		{
190	4070	if (lang == "LANG_SYGUS_V2")
191		{
192	198	return new SygusInput(inputStream);
193		}
194	3872	else if (lang == "LANG_TPTP")
195		{
196	41	return new TptpInput(inputStream);
197		}
198	3831	else if (lang == "LANG_SMTLIB_V2_6")
199		{
200	3831	return new Smt2Input(inputStream);
201		}
202		else
203		{
204		throw InputStreamException(
205		"unable to detect input file format, try --lang ");
206		}
207		}
208
209	4070	AntlrInput::AntlrInput(AntlrInputStream& inputStream, unsigned int lookahead) :
210		Input(inputStream),
211		d_lookahead(lookahead),
212		d_lexer(NULL),
213		d_parser(NULL),
214	4070	d_antlr3InputStream( inputStream.getAntlr3InputStream() ),
215	8140	d_tokenBuffer(NULL) {
216	4070	}
217
218		/*
219		AntlrParser::AntlrParser(ExprManager* exprManager, std::istream& input, const std::string& name, unsigned int lookahead)
220		Parser(exprManager,name),
221		d_lookahead(lookahead) {
222
223		}
224		*/
225
226		/*
227		AntlrInput::Input(ExprManager* exprManager, const std::string& input, const std::string& name, unsigned int lookahead) :
228		Input(exprManager,name),
229		d_lookahead(lookahead),
230		d_lexer(NULL),
231		d_parser(NULL),
232		d_tokenStream(NULL) {
233
234		char* inputStr = strdup(input.c_str());
235		char* nameStr = strdup(name.c_str());
236		if( inputStr==NULL \|\| nameStr==NULL ) {
237		throw ParserException("Couldn't initialize string input: '" + input + "'");
238		}
239		d_inputStream = antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8)inputStr,input.size(),(pANTLR3_UINT8)nameStr);
240		if( d_inputStream == NULL ) {
241		throw ParserException("Couldn't create input stream for string: '" + input + "'");
242		}
243
244		}
245		*/
246
247	8140	AntlrInput::~AntlrInput() {
248	4070	BoundedTokenBufferFree(d_tokenBuffer);
249	4070	}
250
251	4070	pANTLR3_COMMON_TOKEN_STREAM AntlrInput::getTokenStream() {
252	4070	return d_tokenBuffer->commonTstream;
253		}
254
255	12	void AntlrInput::lexerError(pANTLR3_BASE_RECOGNIZER recognizer) {
256	12	pANTLR3_LEXER lexer = (pANTLR3_LEXER)(recognizer->super);
257	12	Assert(lexer != NULL);
258	12	Parser parser = (Parser)(lexer->super);
259	12	Assert(parser != NULL);
260	12	AntlrInput input = (AntlrInput) parser->getInput();
261	12	Assert(input != NULL);
262
263		/* Call the error display routine if there's not already a
264		* parse error pending. If a parser error is pending, this
265		* error is probably less important, so we just drop it. */
266	12	if( input->d_parser->rec->state->error == ANTLR3_FALSE ) {
267	24	input->parseError("Error finding next token.");
268		}
269		}
270
271	235	void AntlrInput::warning(const std::string& message) {
272	235	Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
273	235	}
274
275
276
277		/**
278		* characters considered part of a simple symbol in SMTLIB.
279		*
280		* TODO: Ideally this code shouldn't be smtlib specific (should work
281		* with CVC language too), but this per-language specialization has
282		* been left for a later point.
283		*/
284	4367	inline bool isSimpleChar(char ch) {
285	4367	return isalnum(ch) \|\| (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL);
286		}
287
288	104	size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) {
289	104	size_t st = 0;
290	104	size_t N = input.size();
291	1136	while(st < N) {
292	1159	while( st < N && (*isWordChar)(input[st]) == false ) st++;
293	555	size_t en = st;
294	2745	while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++;
295	555	if(en - st + 1 == pattern.size()) {
296	93	bool match = true;
297	258	for(size_t i = 0; match && i < pattern.size(); ++i) {
298	165	match &= (pattern[i] == input[st+i]);
299		}
300	93	if(match == true) {
301	39	return st;
302		}
303		}
304	516	st = en + 1;
305		}
306	65	return string::npos;
307		}
308
309		/**
310		* Gets part of original input and tries to visually hint where the
311		* error might be.
312		*
313		* Something like:
314		*
315		* ...nd (= alpha beta) (= beta delta))
316		* ^
317		*
318		* Implementation (as on 2014/04/24):
319		*
320		* > if suggested pointer by lexer is under a "simple char", move to
321		* start of the word and print pointer there.
322		*
323		* > in the other case, it tries to find the nearest word in the error
324		* message passed along. if it can't find it, we don't add this
325		* visual hint, as experimentally position suggested by lexer was
326		* found to be totally unhelpful. (TODO: fix this upstream to
327		* improve)
328		*/
329	76	std::string parseErrorHelper(const char* lineStart,
330		std::size_t lineLength,
331		std::size_t charPositionInLine,
332		const std::string& message)
333		{
334		// Is it a multi-line message
335	76	bool multilineMessage = (message.find('\n') != string::npos);
336		// Useful only if it is a multi-line message
337	76	int firstLineEnd = message.find('\n');
338
339	152	std::ostringstream ss, slicess;
340
341		// Keep first line of message
342	76	if(multilineMessage) {
343		ss << message.substr(0, firstLineEnd) << endl << endl;
344		} else {
345	76	ss << message << endl << endl;
346		}
347
348	76	std::size_t posSliceStart =
349	76	(charPositionInLine <= 50) ? 0 : charPositionInLine - 50 + 5;
350	76	std::size_t posSliceEnd = posSliceStart + 70;
351	76	std::size_t caretPos = 0;
352	76	std::size_t caretPosExtra = 0; // for inital intendation, epilipses etc.
353
354	76	ss << " "; caretPosExtra += 2;
355	76	if(posSliceStart > 0) {
356	5	ss << "..."; caretPosExtra += 3;
357		}
358
359	1493	for (std::size_t i = posSliceStart; i < lineLength && lineStart[i] != '\n';
360		++i)
361		{
362	1417	if(i == posSliceEnd) {
363		ss << "...";
364		break;
365		}
366	1417	if(i < charPositionInLine) { caretPos++; }
367
368	1417	if(!isprint(lineStart[i])) {
369		// non-printable character, something wrong, bail out
370		return message;
371		}
372
373	1417	ss << (lineStart[i]);
374	1417	slicess << (lineStart[i]);
375		}
376
377		// adjust position of caret, based on slice and message
378		{
379	76	int caretPosOrig = caretPos;
380	136	string slice = slicess.str();
381	76	if(isSimpleChar(slice[caretPos])) {
382		// if alphanumeric, try to go to beginning of word/number
383	93	while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; }
384	31	if(caretPos == 0 && posSliceStart > 0) {
385		// reached start and this is not really the start? bail out
386		return message;
387		} else {
388		// likely it is also in original message? if so, very likely
389		// we found the right place
390	62	string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1));
391	31	size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
392	31	Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
393	31	if( matchLoc != string::npos ) {
394	10	Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl;
395		}
396		}
397		} else {
398	45	bool foundCaretPos = false;
399
400	122	for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) {
401		// go to nearest alphanumeric string (before current position),
402		// see if that word can be found in original message. If so,
403		// point to that, else keep pointer where it was.
404	77	int nearestWordEn = caretPos - 1;
405	165	while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) {
406	44	--nearestWordEn;
407		}
408	77	if(isSimpleChar(slice[nearestWordEn])) {
409	73	int nearestWordSt = nearestWordEn;
410	281	while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) {
411	104	--nearestWordSt;
412		}
413	146	string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1));
414	73	size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
415	73	Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl;
416	73	Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
417	73	if( matchLoc != string::npos ) {
418	58	Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at "
419	29	<< nearestWordSt << std::endl;
420	29	foundCaretPos = true;
421		}
422	73	caretPos = nearestWordSt;
423		}
424		}
425	45	if( !foundCaretPos) {
426		// this doesn't look good. caret generally getting printed
427		// at unhelpful positions. improve upstream?
428	16	return message;
429		}
430		}
431	60	caretPos += caretPosExtra;
432		}// end of caret position computation/heuristics
433
434	60	ss << std::endl;
435	1404	while( caretPos-- > 0 ) {
436	672	ss << ' ';
437		}
438	60	ss << '^' << endl;
439	60	if(multilineMessage) {
440		ss << message.substr(firstLineEnd, message.size() - firstLineEnd);;
441		}
442	60	return ss.str();
443		}
444
445	76	void AntlrInput::parseError(const std::string& message, bool eofException)
446		{
447	152	auto lineLength = d_antlr3InputStream->sizeBuf
448	76	- (static_cast<char*>(d_antlr3InputStream->currentLine)
449	76	- static_cast<char*>(d_antlr3InputStream->data));
450		std::string updatedMessage = parseErrorHelper(
451	76	(const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream),
452		lineLength,
453	76	d_lexer->getCharPositionInLine(d_lexer),
454	228	message);
455
456	152	Debug("parser") << "Throwing exception: "
457	152	<< (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
458	152	<< d_lexer->getLine(d_lexer) << "."
459	152	<< d_lexer->getCharPositionInLine(d_lexer) << ": "
460	76	<< updatedMessage << endl;
461	76	if(eofException) {
462		throw ParserEndOfFileException(message,
463	2	(const char*)d_lexer->rec->state->tokSource->fileName->chars,
464	2	d_lexer->getLine(d_lexer),
465	6	d_lexer->getCharPositionInLine(d_lexer));
466		} else {
467		throw ParserException(updatedMessage,
468	74	(const char*)d_lexer->rec->state->tokSource->fileName->chars,
469	74	d_lexer->getLine(d_lexer),
470	222	d_lexer->getCharPositionInLine(d_lexer));
471		}
472		}
473
474
475	4070	void AntlrInput::setAntlr3Lexer(pANTLR3_LEXER pLexer) {
476	4070	d_lexer = pLexer;
477
478	4070	pANTLR3_TOKEN_FACTORY pTokenFactory = d_lexer->rec->state->tokFactory;
479	4070	if( pTokenFactory != NULL ) {
480	4070	pTokenFactory->close(pTokenFactory);
481		}
482
483		/* 2lookahead should be sufficient, but we give ourselves some breathing room. /
484	4070	pTokenFactory = BoundedTokenFactoryNew(d_antlr3InputStream, 2*d_lookahead);
485	4070	if( pTokenFactory == NULL ) {
486		throw InputStreamException("Couldn't create token factory.");
487		}
488	4070	d_lexer->rec->state->tokFactory = pTokenFactory;
489
490	4070	pBOUNDED_TOKEN_BUFFER buffer = BoundedTokenBufferSourceNew(d_lookahead, d_lexer->rec->state->tokSource);
491	4070	if( buffer == NULL ) {
492		throw InputStreamException("Couldn't create token buffer.");
493		}
494
495	4070	d_tokenBuffer = buffer;
496
497		// Override default lexer error reporting
498	4070	d_lexer->rec->reportError = &lexerError;
499		// Override default nextToken function, just to prevent exceptions escaping.
500	4070	d_lexer->rec->state->tokSource->nextToken = &nextToken;
501	4070	}
502
503	4070	void AntlrInput::setParser(Parser& parser) {
504		// ANTLR isn't using super in the lexer or the parser, AFAICT.
505		// We could also use @lexer/parser::context to add a field to the generated
506		// objects, but then it would have to be declared separately in every
507		// language's grammar and we'd have to in the address of the field anyway.
508	4070	d_lexer->super = &parser;
509	4070	d_parser->super = &parser;
510	4070	}
511
512	4070	void AntlrInput::setAntlr3Parser(pANTLR3_PARSER pParser) {
513	4070	d_parser = pParser;
514		// d_parser->rec->match = &match;
515	4070	d_parser->rec->reportError = &reportError;
516		/* Don't try to recover from a parse error. */
517		// [chris 4/5/2010] Not clear on why this cast is necessary, but I get an error if I remove it.
518	8140	d_parser->rec->recoverFromMismatchedToken =
519		(void* ()(ANTLR3_BASE_RECOGNIZER_struct, ANTLR3_UINT32, ANTLR3_BITSET_LIST_struct*))
520	8140	d_parser->rec->mismatch;
521	4070	}
522
523		} // namespace parser
524	22731	} // namespace cvc5