UNPKG

6.33 kBJavaScriptView Raw
1"use strict";
2/*!
3 * Copyright 2016 The ANTLR Project. All rights reserved.
4 * Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information.
5 */
6Object.defineProperty(exports, "__esModule", { value: true });
7exports.CharStreams = void 0;
8const CodePointBuffer_1 = require("./CodePointBuffer");
9const CodePointCharStream_1 = require("./CodePointCharStream");
10const IntStream_1 = require("./IntStream");
11// const DEFAULT_BUFFER_SIZE: number = 4096;
12/** This class represents the primary interface for creating {@link CharStream}s
13 * from a variety of sources as of 4.7. The motivation was to support
14 * Unicode code points > U+FFFF. {@link ANTLRInputStream} and
15 * {@link ANTLRFileStream} are now deprecated in favor of the streams created
16 * by this interface.
17 *
18 * DEPRECATED: {@code new ANTLRFileStream("myinputfile")}
19 * NEW: {@code CharStreams.fromFileName("myinputfile")}
20 *
21 * WARNING: If you use both the deprecated and the new streams, you will see
22 * a nontrivial performance degradation. This speed hit is because the
23 * {@link Lexer}'s internal code goes from a monomorphic to megamorphic
24 * dynamic dispatch to get characters from the input stream. Java's
25 * on-the-fly compiler (JIT) is unable to perform the same optimizations
26 * so stick with either the old or the new streams, if performance is
27 * a primary concern. See the extreme debugging and spelunking
28 * needed to identify this issue in our timing rig:
29 *
30 * https://github.com/antlr/antlr4/pull/1781
31 *
32 * The ANTLR character streams still buffer all the input when you create
33 * the stream, as they have done for ~20 years. If you need unbuffered
34 * access, please note that it becomes challenging to create
35 * parse trees. The parse tree has to point to tokens which will either
36 * point into a stale location in an unbuffered stream or you have to copy
37 * the characters out of the buffer into the token. That defeats the purpose
38 * of unbuffered input. Per the ANTLR book, unbuffered streams are primarily
39 * useful for processing infinite streams *during the parse.*
40 *
41 * The new streams also use 8-bit buffers when possible so this new
42 * interface supports character streams that use half as much memory
43 * as the old {@link ANTLRFileStream}, which assumed 16-bit characters.
44 *
45 * A big shout out to Ben Hamilton (github bhamiltoncx) for his superhuman
46 * efforts across all targets to get true Unicode 3.1 support for U+10FFFF.
47 *
48 * @since 4.7
49 */
50var CharStreams;
51(function (CharStreams) {
52 // /**
53 // * Creates a {@link CharStream} given a path to a UTF-8
54 // * encoded file on disk.
55 // *
56 // * Reads the entire contents of the file into the result before returning.
57 // */
58 // export function fromFile(file: File): CharStream;
59 // export function fromFile(file: File, charset: Charset): CharStream;
60 // export function fromFile(file: File, charset?: Charset): CharStream {
61 // if (charset === undefined) {
62 // charset = Charset.forName("UTF-8");
63 // }
64 function fromString(s, sourceName) {
65 if (sourceName === undefined || sourceName.length === 0) {
66 sourceName = IntStream_1.IntStream.UNKNOWN_SOURCE_NAME;
67 }
68 // Initial guess assumes no code points > U+FFFF: one code
69 // point for each code unit in the string
70 let codePointBufferBuilder = CodePointBuffer_1.CodePointBuffer.builder(s.length);
71 // TODO: CharBuffer.wrap(String) rightfully returns a read-only buffer
72 // which doesn't expose its array, so we make a copy.
73 let cb = new Uint16Array(s.length);
74 for (let i = 0; i < s.length; i++) {
75 cb[i] = s.charCodeAt(i);
76 }
77 codePointBufferBuilder.append(cb);
78 return CodePointCharStream_1.CodePointCharStream.fromBuffer(codePointBufferBuilder.build(), sourceName);
79 }
80 CharStreams.fromString = fromString;
81 // export function bufferFromChannel(
82 // channel: ReadableByteChannel,
83 // charset: Charset,
84 // bufferSize: number,
85 // decodingErrorAction: CodingErrorAction,
86 // inputSize: number): CodePointBuffer {
87 // try {
88 // let utf8BytesIn: Uint8Array = new Uint8Array(bufferSize);
89 // let utf16CodeUnitsOut: Uint16Array = new Uint16Array(bufferSize);
90 // if (inputSize === -1) {
91 // inputSize = bufferSize;
92 // } else if (inputSize > Integer.MAX_VALUE) {
93 // // ByteBuffer et al don't support long sizes
94 // throw new RangeError(`inputSize ${inputSize} larger than max ${Integer.MAX_VALUE}`);
95 // }
96 // let codePointBufferBuilder: CodePointBuffer.Builder = CodePointBuffer.builder(inputSize);
97 // let decoder: CharsetDecoder = charset
98 // .newDecoder()
99 // .onMalformedInput(decodingErrorAction)
100 // .onUnmappableCharacter(decodingErrorAction);
101 // let endOfInput: boolean = false;
102 // while (!endOfInput) {
103 // let bytesRead: number = channel.read(utf8BytesIn);
104 // endOfInput = (bytesRead === -1);
105 // utf8BytesIn.flip();
106 // let result: CoderResult = decoder.decode(
107 // utf8BytesIn,
108 // utf16CodeUnitsOut,
109 // endOfInput);
110 // if (result.isError() && decodingErrorAction === CodingErrorAction.REPORT) {
111 // result.throwException();
112 // }
113 // utf16CodeUnitsOut.flip();
114 // codePointBufferBuilder.append(utf16CodeUnitsOut);
115 // utf8BytesIn.compact();
116 // utf16CodeUnitsOut.compact();
117 // }
118 // // Handle any bytes at the end of the file which need to
119 // // be represented as errors or substitution characters.
120 // let flushResult: CoderResult = decoder.flush(utf16CodeUnitsOut);
121 // if (flushResult.isError() && decodingErrorAction === CodingErrorAction.REPORT) {
122 // flushResult.throwException();
123 // }
124 // utf16CodeUnitsOut.flip();
125 // codePointBufferBuilder.append(utf16CodeUnitsOut);
126 // return codePointBufferBuilder.build();
127 // }
128 // finally {
129 // channel.close();
130 // }
131 // }
132})(CharStreams = exports.CharStreams || (exports.CharStreams = {}));
133//# sourceMappingURL=CharStreams.js.map
\No newline at end of file