GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/parser/antlr_line_buffered_input.cpp Lines: 1 88 1.1 %
Date: 2021-03-23 Branches: 2 88 2.3 %

Line Exec Source
1
/*********************                                                        */
2
/*! \file antlr_line_buffered_input.cpp
3
 ** \verbatim
4
 ** Top contributors (to current version):
5
 **   Morgan Deters, Andres Noetzli, Tim King
6
 ** This file is part of the CVC4 project.
7
 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 ** in the top-level source directory and their institutional affiliations.
9
 ** All rights reserved.  See the file COPYING in the top-level source
10
 ** directory for licensing information.\endverbatim
11
 **
12
 ** \brief A custom ANTLR input stream that reads from the input stream lazily
13
 **
14
 ** WARNING: edits to this and related files should be done carefully due to the
15
 *interaction with ANTLR internals.
16
 **
17
 ** This overwrites the _LA and the consume functions of the ANTLR input stream
18
 ** to use a LineBuffer instead of accessing a buffer. The lines are kept in
19
 ** memory to make sure that existing tokens remain valid (tokens store pointers
20
 ** to the corresponding input). We do not overwrite mark(), etc.
21
 *because
22
 ** we can use the line number and the position within that line to index into
23
 *the
24
 ** line buffer and the default markers already store and restore that
25
 ** information. The line buffer guarantees that lines are consecutive in
26
 ** memory, so ANTLR3_INPUT_STREAM::getLineBuf() should work as intended and
27
 ** tokens themselves are consecutive in memory (we are assuming that tokens
28
 ** are not split across multiple lines).
29
 **/
30
31
#include "parser/antlr_line_buffered_input.h"
32
33
#include <antlr3.h>
34
35
#include <iostream>
36
#include <string>
37
38
#include "base/check.h"
39
#include "base/output.h"
40
41
namespace CVC4 {
42
namespace parser {
43
44
static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream(
45
    std::istream& in, LineBuffer* line_buffer);
46
47
static void
48
setupInputStream(pANTLR3_INPUT_STREAM input)
49
{
50
#if 0
51
    ANTLR3_BOOLEAN  isBigEndian;
52
53
    // Used to determine the endianness of the machine we are currently
54
    // running on.
55
    //
56
    ANTLR3_UINT16 bomTest = 0xFEFF;
57
58
    // What endianess is the machine we are running on? If the incoming
59
    // encoding endianess is the same as this machine's natural byte order
60
    // then we can use more efficient API calls.
61
    //
62
    if  (*((pANTLR3_UINT8)(&bomTest)) == 0xFE)
63
    {
64
        isBigEndian = ANTLR3_TRUE;
65
    }
66
    else
67
    {
68
        isBigEndian = ANTLR3_FALSE;
69
    }
70
71
    // What encoding did the user tell us {s}he thought it was? I am going
72
    // to get sick of the questions on antlr-interest, I know I am.
73
    //
74
    switch  (input->encoding)
75
    {
76
        case    ANTLR3_ENC_UTF8:
77
78
            // See if there is a BOM at the start of this UTF-8 sequence
79
            // and just eat it if there is. Windows .TXT files have this for instance
80
            // as it identifies UTF-8 even though it is of no consequence for byte order
81
            // as UTF-8 does not have a byte order.
82
            //
83
            if  (       (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xEF
84
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xBB
85
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2))    == 0xBF
86
                )
87
            {
88
                // The UTF8 BOM is present so skip it
89
                //
90
                input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 3);
91
            }
92
93
            // Install the UTF8 input routines
94
            //
95
            antlr3UTF8SetupStream(input);
96
            break;
97
98
        case    ANTLR3_ENC_UTF16:
99
100
            // See if there is a BOM at the start of the input. If not then
101
            // we assume that the byte order is the natural order of this
102
            // machine (or it is really UCS2). If there is a BOM we determine if the encoding
103
            // is the same as the natural order of this machine.
104
            //
105
            if  (       (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xFE
106
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xFF
107
                )
108
            {
109
                // BOM Present, indicates Big Endian
110
                //
111
                input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2);
112
113
                antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE);
114
            }
115
            else if  (      (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xFF
116
                        &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xFE
117
                )
118
            {
119
                // BOM present, indicates Little Endian
120
                //
121
                input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2);
122
123
                antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE);
124
            }
125
            else
126
            {
127
                // No BOM present, assume local computer byte order
128
                //
129
                antlr3UTF16SetupStream(input, isBigEndian, isBigEndian);
130
            }
131
            break;
132
133
        case    ANTLR3_ENC_UTF32:
134
135
            // See if there is a BOM at the start of the input. If not then
136
            // we assume that the byte order is the natural order of this
137
            // machine. If there is we determine if the encoding
138
            // is the same as the natural order of this machine.
139
            //
140
            if  (       (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0x00
141
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0x00
142
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2))    == 0xFE
143
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+3))    == 0xFF
144
                )
145
            {
146
                // BOM Present, indicates Big Endian
147
                //
148
                input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4);
149
150
                antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE);
151
            }
152
            else if  (      (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xFF
153
                        &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xFE
154
                        &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0x00
155
                        &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0x00
156
                )
157
            {
158
                // BOM present, indicates Little Endian
159
                //
160
                input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4);
161
162
                antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE);
163
            }
164
            else
165
            {
166
                // No BOM present, assume local computer byte order
167
                //
168
                antlr3UTF32SetupStream(input, isBigEndian, isBigEndian);
169
            }
170
            break;
171
172
        case    ANTLR3_ENC_UTF16BE:
173
174
            // Encoding is definately Big Endian with no BOM
175
            //
176
            antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE);
177
            break;
178
179
        case    ANTLR3_ENC_UTF16LE:
180
181
            // Encoding is definately Little Endian with no BOM
182
            //
183
            antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE);
184
            break;
185
186
        case    ANTLR3_ENC_UTF32BE:
187
188
            // Encoding is definately Big Endian with no BOM
189
            //
190
            antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE);
191
            break;
192
193
        case    ANTLR3_ENC_UTF32LE:
194
195
            // Encoding is definately Little Endian with no BOM
196
            //
197
            antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE);
198
            break;
199
200
        case    ANTLR3_ENC_EBCDIC:
201
202
            // EBCDIC is basically the same as ASCII but with an on the
203
            // fly translation to ASCII
204
            //
205
            antlr3EBCDICSetupStream(input);
206
            break;
207
208
        case    ANTLR3_ENC_8BIT:
209
        default:
210
211
            // Standard 8bit/ASCII
212
            //
213
            antlr38BitSetupStream(input);
214
            break;
215
    }
216
#endif /* 0 */
217
}
218
219
static ANTLR3_UCHAR bufferedInputLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) {
220
  pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super));
221
  CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input =
222
      (CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input;
223
  uint8_t* result = line_buffered_input->line_buffer->getPtrWithOffset(
224
      input->line, input->charPositionInLine, la - 1);
225
  return (result != NULL) ? *result : ANTLR3_CHARSTREAM_EOF;
226
}
227
228
static void bufferedInputRewind(pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) {
229
  // This function is essentially the same as the original
230
  // antlr38BitRewind() but does not do any seek. The seek in the
231
  // original function does not do anything and also calls
232
  // antlr38BitSeek() instead of the overloaded seek() function, which
233
  // leads to subtle bugs.
234
  pANTLR3_LEX_STATE state;
235
  pANTLR3_INPUT_STREAM input;
236
237
  input = ((pANTLR3_INPUT_STREAM)is->super);
238
239
  // Perform any clean up of the marks
240
  input->istream->release(input->istream, mark);
241
242
  // Find the supplied mark state
243
  state = (pANTLR3_LEX_STATE)input->markers->get(input->markers,
244
                                                 (ANTLR3_UINT32)(mark - 1));
245
  if (state == NULL) {
246
    return;
247
  }
248
249
  // Reset the information in the mark
250
  input->charPositionInLine = state->charPositionInLine;
251
  input->currentLine = state->currentLine;
252
  input->line = state->line;
253
  input->nextChar = state->nextChar;
254
}
255
256
static void bufferedInputConsume(pANTLR3_INT_STREAM is) {
257
  pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super));
258
  CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input =
259
      (CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input;
260
261
  uint8_t* current = line_buffered_input->line_buffer->getPtr(
262
      input->line, input->charPositionInLine);
263
  if (current != NULL) {
264
    input->charPositionInLine++;
265
266
    if (*current == LineBuffer::NewLineChar) {
267
      // Reset for start of a new line of input
268
      input->line++;
269
      input->charPositionInLine = 0;
270
      input->currentLine = line_buffered_input->line_buffer->getPtr(
271
          input->line, input->charPositionInLine);
272
      Debug("pipe") << "-- newline!" << std::endl;
273
    }
274
275
    input->nextChar = line_buffered_input->line_buffer->getPtr(
276
        input->line, input->charPositionInLine);
277
  }
278
}
279
280
static void bufferedInputSeek(pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint) {
281
  // In contrast to the original antlr38BitSeek() function, we only
282
  // support seeking forward (seeking backwards is only supported for
283
  // rewinding in the original code, which we do not do when rewinding,
284
  // so this should be fine).
285
  pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super));
286
287
  // Check that we are not seeking backwards.
288
  Assert(!((CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)
289
              ->line_buffer->isPtrBefore(
290
                  (uint8_t*)seekPoint, input->line, input->charPositionInLine));
291
292
  while ((ANTLR3_MARKER)(input->nextChar) != seekPoint) {
293
    is->consume(is);
294
  }
295
}
296
297
static ANTLR3_UINT32 bufferedInputSize(pANTLR3_INPUT_STREAM input) {
298
  // Not supported for this type of stream
299
  Assert(false);
300
  return 0;
301
}
302
303
static void bufferedInputSetNewLineChar(pANTLR3_INPUT_STREAM input,
304
                                        ANTLR3_UINT32 newlineChar) {
305
  // Not supported for this type of stream
306
  Assert(false);
307
}
308
309
static void bufferedInputSetUcaseLA(pANTLR3_INPUT_STREAM input,
310
                                    ANTLR3_BOOLEAN flag) {
311
  // Not supported for this type of stream
312
  Assert(false);
313
}
314
315
pANTLR3_INPUT_STREAM antlr3LineBufferedStreamNew(std::istream& in,
316
                                                 ANTLR3_UINT32 encoding,
317
                                                 pANTLR3_UINT8 name,
318
                                                 LineBuffer* line_buffer) {
319
  pANTLR3_INPUT_STREAM input;
320
321
  if (!in) {
322
    return NULL;
323
  }
324
325
  // First order of business is to set up the stream and install the data
326
  // pointer.
327
  // Then we will work out the encoding and byte order and adjust the API
328
  // functions that are installed for the
329
  // default 8Bit stream accordingly.
330
  //
331
  input = antlr3CreateLineBufferedStream(in, line_buffer);
332
  if (input == NULL) {
333
    return NULL;
334
  }
335
336
  input->istream->_LA = bufferedInputLA;
337
  input->istream->consume = bufferedInputConsume;
338
  input->istream->seek = bufferedInputSeek;
339
  input->istream->rewind = bufferedInputRewind;
340
  input->size = bufferedInputSize;
341
  input->SetNewLineChar = bufferedInputSetNewLineChar;
342
  input->setUcaseLA = bufferedInputSetUcaseLA;
343
344
#ifndef CVC4_ANTLR3_OLD_INPUT_STREAM
345
    // We have the data in memory now so we can deal with it according to
346
    // the encoding scheme we were given by the user.
347
    //
348
    input->encoding = encoding;
349
#endif /* ! CVC4_ANTLR3_OLD_INPUT_STREAM */
350
351
    // Now we need to work out the endian type and install any
352
    // API functions that differ from 8Bit
353
    //
354
    setupInputStream(input);
355
356
    // Now we can set up the file name
357
    //
358
    input->istream->streamName =
359
        input->strFactory->newStr8(input->strFactory, name);
360
    input->fileName = input->istream->streamName;
361
362
    return input;
363
}
364
365
static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream(
366
    std::istream& in, LineBuffer* line_buffer) {
367
  // Pointer to the input stream we are going to create
368
  //
369
  pANTLR3_INPUT_STREAM input;
370
371
  if (!in) {
372
    return NULL;
373
  }
374
375
  // Allocate memory for the input stream structure
376
  //
377
  input = (pANTLR3_INPUT_STREAM)ANTLR3_CALLOC(
378
      1, sizeof(ANTLR3_LINE_BUFFERED_INPUT_STREAM));
379
380
  if (input == NULL) {
381
    return NULL;
382
  }
383
384
  // Structure was allocated correctly, now we can install the pointer
385
  //
386
  input->data = NULL;
387
  input->isAllocated = ANTLR3_FALSE;
388
389
  ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in = &in;
390
  ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->line_buffer = line_buffer;
391
// Call the common 8 bit input stream handler
392
// initialization.
393
//
394
#ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
395
  antlr3AsciiSetupStream(input, ANTLR3_CHARSTREAM);
396
#else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
397
  antlr38BitSetupStream(input);
398
  // In some libantlr3c 3.4-beta versions, this call is not included in the
399
  // above.
400
  // This is probably an erroneously-deleted line in the libantlr3c source since
401
  // 3.2.
402
  antlr3GenericSetupStream(input);
403
#endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
404
405
  input->sizeBuf = 0;
406
  input->newlineChar = LineBuffer::NewLineChar;
407
  input->charPositionInLine = 0;
408
  input->line = 0;
409
  input->nextChar = line_buffer->getPtr(0, 0);
410
  input->currentLine = line_buffer->getPtr(0, 0);
411
  return input;
412
}
413
414
}/* CVC4::parser namespace */
415
26673
}/* CVC4 namespace */