1 | ;
|
2 | /*!
|
3 | * Copyright 2016 The ANTLR Project. All rights reserved.
|
4 | * Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information.
|
5 | */
|
6 | Object.defineProperty(exports, "__esModule", { value: true });
|
7 | exports.CharStreams = void 0;
|
8 | const CodePointBuffer_1 = require("./CodePointBuffer");
|
9 | const CodePointCharStream_1 = require("./CodePointCharStream");
|
10 | const IntStream_1 = require("./IntStream");
|
11 | // const DEFAULT_BUFFER_SIZE: number = 4096;
|
12 | /** This class represents the primary interface for creating {@link CharStream}s
|
13 | * from a variety of sources as of 4.7. The motivation was to support
|
14 | * Unicode code points > U+FFFF. {@link ANTLRInputStream} and
|
15 | * {@link ANTLRFileStream} are now deprecated in favor of the streams created
|
16 | * by this interface.
|
17 | *
|
18 | * DEPRECATED: {@code new ANTLRFileStream("myinputfile")}
|
19 | * NEW: {@code CharStreams.fromFileName("myinputfile")}
|
20 | *
|
21 | * WARNING: If you use both the deprecated and the new streams, you will see
|
22 | * a nontrivial performance degradation. This speed hit is because the
|
23 | * {@link Lexer}'s internal code goes from a monomorphic to megamorphic
|
24 | * dynamic dispatch to get characters from the input stream. Java's
|
25 | * on-the-fly compiler (JIT) is unable to perform the same optimizations
|
26 | * so stick with either the old or the new streams, if performance is
|
27 | * a primary concern. See the extreme debugging and spelunking
|
28 | * needed to identify this issue in our timing rig:
|
29 | *
|
30 | * https://github.com/antlr/antlr4/pull/1781
|
31 | *
|
32 | * The ANTLR character streams still buffer all the input when you create
|
33 | * the stream, as they have done for ~20 years. If you need unbuffered
|
34 | * access, please note that it becomes challenging to create
|
35 | * parse trees. The parse tree has to point to tokens which will either
|
36 | * point into a stale location in an unbuffered stream or you have to copy
|
37 | * the characters out of the buffer into the token. That defeats the purpose
|
38 | * of unbuffered input. Per the ANTLR book, unbuffered streams are primarily
|
39 | * useful for processing infinite streams *during the parse.*
|
40 | *
|
41 | * The new streams also use 8-bit buffers when possible so this new
|
42 | * interface supports character streams that use half as much memory
|
43 | * as the old {@link ANTLRFileStream}, which assumed 16-bit characters.
|
44 | *
|
45 | * A big shout out to Ben Hamilton (github bhamiltoncx) for his superhuman
|
46 | * efforts across all targets to get true Unicode 3.1 support for U+10FFFF.
|
47 | *
|
48 | * @since 4.7
|
49 | */
|
50 | var CharStreams;
|
51 | (function (CharStreams) {
|
52 | // /**
|
53 | // * Creates a {@link CharStream} given a path to a UTF-8
|
54 | // * encoded file on disk.
|
55 | // *
|
56 | // * Reads the entire contents of the file into the result before returning.
|
57 | // */
|
58 | // export function fromFile(file: File): CharStream;
|
59 | // export function fromFile(file: File, charset: Charset): CharStream;
|
60 | // export function fromFile(file: File, charset?: Charset): CharStream {
|
61 | // if (charset === undefined) {
|
62 | // charset = Charset.forName("UTF-8");
|
63 | // }
|
64 | function fromString(s, sourceName) {
|
65 | if (sourceName === undefined || sourceName.length === 0) {
|
66 | sourceName = IntStream_1.IntStream.UNKNOWN_SOURCE_NAME;
|
67 | }
|
68 | // Initial guess assumes no code points > U+FFFF: one code
|
69 | // point for each code unit in the string
|
70 | let codePointBufferBuilder = CodePointBuffer_1.CodePointBuffer.builder(s.length);
|
71 | // TODO: CharBuffer.wrap(String) rightfully returns a read-only buffer
|
72 | // which doesn't expose its array, so we make a copy.
|
73 | let cb = new Uint16Array(s.length);
|
74 | for (let i = 0; i < s.length; i++) {
|
75 | cb[i] = s.charCodeAt(i);
|
76 | }
|
77 | codePointBufferBuilder.append(cb);
|
78 | return CodePointCharStream_1.CodePointCharStream.fromBuffer(codePointBufferBuilder.build(), sourceName);
|
79 | }
|
80 | CharStreams.fromString = fromString;
|
81 | // export function bufferFromChannel(
|
82 | // channel: ReadableByteChannel,
|
83 | // charset: Charset,
|
84 | // bufferSize: number,
|
85 | // decodingErrorAction: CodingErrorAction,
|
86 | // inputSize: number): CodePointBuffer {
|
87 | // try {
|
88 | // let utf8BytesIn: Uint8Array = new Uint8Array(bufferSize);
|
89 | // let utf16CodeUnitsOut: Uint16Array = new Uint16Array(bufferSize);
|
90 | // if (inputSize === -1) {
|
91 | // inputSize = bufferSize;
|
92 | // } else if (inputSize > Integer.MAX_VALUE) {
|
93 | // // ByteBuffer et al don't support long sizes
|
94 | // throw new RangeError(`inputSize ${inputSize} larger than max ${Integer.MAX_VALUE}`);
|
95 | // }
|
96 | // let codePointBufferBuilder: CodePointBuffer.Builder = CodePointBuffer.builder(inputSize);
|
97 | // let decoder: CharsetDecoder = charset
|
98 | // .newDecoder()
|
99 | // .onMalformedInput(decodingErrorAction)
|
100 | // .onUnmappableCharacter(decodingErrorAction);
|
101 | // let endOfInput: boolean = false;
|
102 | // while (!endOfInput) {
|
103 | // let bytesRead: number = channel.read(utf8BytesIn);
|
104 | // endOfInput = (bytesRead === -1);
|
105 | // utf8BytesIn.flip();
|
106 | // let result: CoderResult = decoder.decode(
|
107 | // utf8BytesIn,
|
108 | // utf16CodeUnitsOut,
|
109 | // endOfInput);
|
110 | // if (result.isError() && decodingErrorAction === CodingErrorAction.REPORT) {
|
111 | // result.throwException();
|
112 | // }
|
113 | // utf16CodeUnitsOut.flip();
|
114 | // codePointBufferBuilder.append(utf16CodeUnitsOut);
|
115 | // utf8BytesIn.compact();
|
116 | // utf16CodeUnitsOut.compact();
|
117 | // }
|
118 | // // Handle any bytes at the end of the file which need to
|
119 | // // be represented as errors or substitution characters.
|
120 | // let flushResult: CoderResult = decoder.flush(utf16CodeUnitsOut);
|
121 | // if (flushResult.isError() && decodingErrorAction === CodingErrorAction.REPORT) {
|
122 | // flushResult.throwException();
|
123 | // }
|
124 | // utf16CodeUnitsOut.flip();
|
125 | // codePointBufferBuilder.append(utf16CodeUnitsOut);
|
126 | // return codePointBufferBuilder.build();
|
127 | // }
|
128 | // finally {
|
129 | // channel.close();
|
130 | // }
|
131 | // }
|
132 | })(CharStreams = exports.CharStreams || (exports.CharStreams = {}));
|
133 | //# sourceMappingURL=CharStreams.js.map |
\ | No newline at end of file |