GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/parser/antlr_line_buffered_input.cpp Lines: 59 88 67.0 %
Date: 2021-09-10 Branches: 15 88 17.0 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Morgan Deters, Andres Noetzli, Mathias Preiner
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * A custom ANTLR input stream that reads from the input stream lazily
14
 *
15
 * WARNING: Edits to this and related files should be done carefully due to the
16
 *          interaction with ANTLR internals.
17
 *
18
 * This overwrites the _LA and the consume functions of the ANTLR input stream
19
 * to use a LineBuffer instead of accessing a buffer. The lines are kept in
20
 * memory to make sure that existing tokens remain valid (tokens store pointers
21
 * to the corresponding input). We do not overwrite mark(), etc. because
22
 * we can use the line number and the position within that line to index into
23
 * the line buffer and the default markers already store and restore that
24
 * information. The line buffer guarantees that lines are consecutive in
25
 * memory, so ANTLR3_INPUT_STREAM::getLineBuf() should work as intended and
26
 * tokens themselves are consecutive in memory (we are assuming that tokens
27
 * are not split across multiple lines).
28
 */
29
30
#include "parser/antlr_line_buffered_input.h"
31
32
#include <antlr3.h>
33
34
#include <iostream>
35
#include <string>
36
37
#include "base/check.h"
38
#include "base/output.h"
39
40
namespace cvc5 {
41
namespace parser {
42
43
static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream(
44
    std::istream& in, LineBuffer* line_buffer);
45
46
static void
47
4
setupInputStream(pANTLR3_INPUT_STREAM input)
48
{
49
#if 0
50
    ANTLR3_BOOLEAN  isBigEndian;
51
52
    // Used to determine the endianness of the machine we are currently
53
    // running on.
54
    //
55
    ANTLR3_UINT16 bomTest = 0xFEFF;
56
57
    // What endianess is the machine we are running on? If the incoming
58
    // encoding endianess is the same as this machine's natural byte order
59
    // then we can use more efficient API calls.
60
    //
61
    if  (*((pANTLR3_UINT8)(&bomTest)) == 0xFE)
62
    {
63
        isBigEndian = ANTLR3_TRUE;
64
    }
65
    else
66
    {
67
        isBigEndian = ANTLR3_FALSE;
68
    }
69
70
    // What encoding did the user tell us {s}he thought it was? I am going
71
    // to get sick of the questions on antlr-interest, I know I am.
72
    //
73
    switch  (input->encoding)
74
    {
75
        case    ANTLR3_ENC_UTF8:
76
77
            // See if there is a BOM at the start of this UTF-8 sequence
78
            // and just eat it if there is. Windows .TXT files have this for instance
79
            // as it identifies UTF-8 even though it is of no consequence for byte order
80
            // as UTF-8 does not have a byte order.
81
            //
82
            if  (       (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xEF
83
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xBB
84
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2))    == 0xBF
85
                )
86
            {
87
                // The UTF8 BOM is present so skip it
88
                //
89
                input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 3);
90
            }
91
92
            // Install the UTF8 input routines
93
            //
94
            antlr3UTF8SetupStream(input);
95
            break;
96
97
        case    ANTLR3_ENC_UTF16:
98
99
            // See if there is a BOM at the start of the input. If not then
100
            // we assume that the byte order is the natural order of this
101
            // machine (or it is really UCS2). If there is a BOM we determine if the encoding
102
            // is the same as the natural order of this machine.
103
            //
104
            if  (       (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xFE
105
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xFF
106
                )
107
            {
108
                // BOM Present, indicates Big Endian
109
                //
110
                input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2);
111
112
                antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE);
113
            }
114
            else if  (      (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xFF
115
                        &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xFE
116
                )
117
            {
118
                // BOM present, indicates Little Endian
119
                //
120
                input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2);
121
122
                antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE);
123
            }
124
            else
125
            {
126
                // No BOM present, assume local computer byte order
127
                //
128
                antlr3UTF16SetupStream(input, isBigEndian, isBigEndian);
129
            }
130
            break;
131
132
        case    ANTLR3_ENC_UTF32:
133
134
            // See if there is a BOM at the start of the input. If not then
135
            // we assume that the byte order is the natural order of this
136
            // machine. If there is we determine if the encoding
137
            // is the same as the natural order of this machine.
138
            //
139
            if  (       (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0x00
140
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0x00
141
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2))    == 0xFE
142
                    &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+3))    == 0xFF
143
                )
144
            {
145
                // BOM Present, indicates Big Endian
146
                //
147
                input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4);
148
149
                antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE);
150
            }
151
            else if  (      (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xFF
152
                        &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xFE
153
                        &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0x00
154
                        &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0x00
155
                )
156
            {
157
                // BOM present, indicates Little Endian
158
                //
159
                input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4);
160
161
                antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE);
162
            }
163
            else
164
            {
165
                // No BOM present, assume local computer byte order
166
                //
167
                antlr3UTF32SetupStream(input, isBigEndian, isBigEndian);
168
            }
169
            break;
170
171
        case    ANTLR3_ENC_UTF16BE:
172
173
            // Encoding is definately Big Endian with no BOM
174
            //
175
            antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE);
176
            break;
177
178
        case    ANTLR3_ENC_UTF16LE:
179
180
            // Encoding is definately Little Endian with no BOM
181
            //
182
            antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE);
183
            break;
184
185
        case    ANTLR3_ENC_UTF32BE:
186
187
            // Encoding is definately Big Endian with no BOM
188
            //
189
            antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE);
190
            break;
191
192
        case    ANTLR3_ENC_UTF32LE:
193
194
            // Encoding is definately Little Endian with no BOM
195
            //
196
            antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE);
197
            break;
198
199
        case    ANTLR3_ENC_EBCDIC:
200
201
            // EBCDIC is basically the same as ASCII but with an on the
202
            // fly translation to ASCII
203
            //
204
            antlr3EBCDICSetupStream(input);
205
            break;
206
207
        case    ANTLR3_ENC_8BIT:
208
        default:
209
210
            // Standard 8bit/ASCII
211
            //
212
            antlr38BitSetupStream(input);
213
            break;
214
    }
215
#endif /* 0 */
216
4
}
217
218
54
static ANTLR3_UCHAR bufferedInputLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) {
219
54
  pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super));
220
54
  cvc5::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input =
221
      (cvc5::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input;
222
216
  uint8_t* result = line_buffered_input->line_buffer->getPtrWithOffset(
223
216
      input->line, input->charPositionInLine, la - 1);
224
54
  return (result != NULL) ? *result : ANTLR3_CHARSTREAM_EOF;
225
}
226
227
static void bufferedInputRewind(pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) {
228
  // This function is essentially the same as the original
229
  // antlr38BitRewind() but does not do any seek. The seek in the
230
  // original function does not do anything and also calls
231
  // antlr38BitSeek() instead of the overloaded seek() function, which
232
  // leads to subtle bugs.
233
  pANTLR3_LEX_STATE state;
234
  pANTLR3_INPUT_STREAM input;
235
236
  input = ((pANTLR3_INPUT_STREAM)is->super);
237
238
  // Perform any clean up of the marks
239
  input->istream->release(input->istream, mark);
240
241
  // Find the supplied mark state
242
  state = (pANTLR3_LEX_STATE)input->markers->get(input->markers,
243
                                                 (ANTLR3_UINT32)(mark - 1));
244
  if (state == NULL) {
245
    return;
246
  }
247
248
  // Reset the information in the mark
249
  input->charPositionInLine = state->charPositionInLine;
250
  input->currentLine = state->currentLine;
251
  input->line = state->line;
252
  input->nextChar = state->nextChar;
253
}
254
255
14
static void bufferedInputConsume(pANTLR3_INT_STREAM is) {
256
14
  pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super));
257
14
  cvc5::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input =
258
      (cvc5::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input;
259
260
42
  uint8_t* current = line_buffered_input->line_buffer->getPtr(
261
42
      input->line, input->charPositionInLine);
262
14
  if (current != NULL) {
263
14
    input->charPositionInLine++;
264
265
14
    if (*current == LineBuffer::NewLineChar) {
266
      // Reset for start of a new line of input
267
6
      input->line++;
268
6
      input->charPositionInLine = 0;
269
18
      input->currentLine = line_buffered_input->line_buffer->getPtr(
270
12
          input->line, input->charPositionInLine);
271
6
      Debug("pipe") << "-- newline!" << std::endl;
272
    }
273
274
42
    input->nextChar = line_buffered_input->line_buffer->getPtr(
275
28
        input->line, input->charPositionInLine);
276
  }
277
14
}
278
279
static void bufferedInputSeek(pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint) {
280
  // In contrast to the original antlr38BitSeek() function, we only
281
  // support seeking forward (seeking backwards is only supported for
282
  // rewinding in the original code, which we do not do when rewinding,
283
  // so this should be fine).
284
  pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super));
285
286
  // Check that we are not seeking backwards.
287
  Assert(!((cvc5::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)
288
              ->line_buffer->isPtrBefore(
289
                  (uint8_t*)seekPoint, input->line, input->charPositionInLine));
290
291
  while ((ANTLR3_MARKER)(input->nextChar) != seekPoint) {
292
    is->consume(is);
293
  }
294
}
295
296
static ANTLR3_UINT32 bufferedInputSize(pANTLR3_INPUT_STREAM input) {
297
  // Not supported for this type of stream
298
  Assert(false);
299
  return 0;
300
}
301
302
static void bufferedInputSetNewLineChar(pANTLR3_INPUT_STREAM input,
303
                                        ANTLR3_UINT32 newlineChar) {
304
  // Not supported for this type of stream
305
  Assert(false);
306
}
307
308
static void bufferedInputSetUcaseLA(pANTLR3_INPUT_STREAM input,
309
                                    ANTLR3_BOOLEAN flag) {
310
  // Not supported for this type of stream
311
  Assert(false);
312
}
313
314
4
pANTLR3_INPUT_STREAM antlr3LineBufferedStreamNew(std::istream& in,
315
                                                 ANTLR3_UINT32 encoding,
316
                                                 pANTLR3_UINT8 name,
317
                                                 LineBuffer* line_buffer) {
318
  pANTLR3_INPUT_STREAM input;
319
320
4
  if (!in) {
321
    return NULL;
322
  }
323
324
  // First order of business is to set up the stream and install the data
325
  // pointer.
326
  // Then we will work out the encoding and byte order and adjust the API
327
  // functions that are installed for the
328
  // default 8Bit stream accordingly.
329
  //
330
4
  input = antlr3CreateLineBufferedStream(in, line_buffer);
331
4
  if (input == NULL) {
332
    return NULL;
333
  }
334
335
4
  input->istream->_LA = bufferedInputLA;
336
4
  input->istream->consume = bufferedInputConsume;
337
4
  input->istream->seek = bufferedInputSeek;
338
4
  input->istream->rewind = bufferedInputRewind;
339
4
  input->size = bufferedInputSize;
340
4
  input->SetNewLineChar = bufferedInputSetNewLineChar;
341
4
  input->setUcaseLA = bufferedInputSetUcaseLA;
342
343
#ifndef CVC5_ANTLR3_OLD_INPUT_STREAM
344
  // We have the data in memory now so we can deal with it according to
345
  // the encoding scheme we were given by the user.
346
  //
347
4
  input->encoding = encoding;
348
#endif /* ! CVC5_ANTLR3_OLD_INPUT_STREAM */
349
350
  // Now we need to work out the endian type and install any
351
  // API functions that differ from 8Bit
352
  //
353
4
  setupInputStream(input);
354
355
  // Now we can set up the file name
356
  //
357
8
  input->istream->streamName =
358
8
      input->strFactory->newStr8(input->strFactory, name);
359
4
  input->fileName = input->istream->streamName;
360
361
4
  return input;
362
}
363
364
4
static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream(
365
    std::istream& in, LineBuffer* line_buffer) {
366
  // Pointer to the input stream we are going to create
367
  //
368
  pANTLR3_INPUT_STREAM input;
369
370
4
  if (!in) {
371
    return NULL;
372
  }
373
374
  // Allocate memory for the input stream structure
375
  //
376
4
  input = (pANTLR3_INPUT_STREAM)ANTLR3_CALLOC(
377
      1, sizeof(ANTLR3_LINE_BUFFERED_INPUT_STREAM));
378
379
4
  if (input == NULL) {
380
    return NULL;
381
  }
382
383
  // Structure was allocated correctly, now we can install the pointer
384
  //
385
4
  input->data = NULL;
386
4
  input->isAllocated = ANTLR3_FALSE;
387
388
4
  ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in = &in;
389
4
  ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->line_buffer = line_buffer;
390
// Call the common 8 bit input stream handler
391
// initialization.
392
//
393
#ifdef CVC5_ANTLR3_OLD_INPUT_STREAM
394
  antlr3AsciiSetupStream(input, ANTLR3_CHARSTREAM);
395
#else  /* CVC5_ANTLR3_OLD_INPUT_STREAM */
396
4
  antlr38BitSetupStream(input);
397
  // In some libantlr3c 3.4-beta versions, this call is not included in the
398
  // above.
399
  // This is probably an erroneously-deleted line in the libantlr3c source since
400
  // 3.2.
401
4
  antlr3GenericSetupStream(input);
402
#endif /* CVC5_ANTLR3_OLD_INPUT_STREAM */
403
404
4
  input->sizeBuf = 0;
405
4
  input->newlineChar = LineBuffer::NewLineChar;
406
4
  input->charPositionInLine = 0;
407
4
  input->line = 0;
408
4
  input->nextChar = line_buffer->getPtr(0, 0);
409
4
  input->currentLine = line_buffer->getPtr(0, 0);
410
4
  return input;
411
}
412
413
}  // namespace parser
414
29487
}  // namespace cvc5