UNPKG

antlr4ts/CharStreams.js

Version:

6.33 kBJavaScriptView Raw

1"use strict";
2/*!
* Copyright 2016 The ANTLR Project. All rights reserved.
* Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information.
*/
6Object.defineProperty(exports, "__esModule", { value: true });
7exports.CharStreams = void 0;
8const CodePointBuffer_1 = require("./CodePointBuffer");
9const CodePointCharStream_1 = require("./CodePointCharStream");
10const IntStream_1 = require("./IntStream");
11// const DEFAULT_BUFFER_SIZE: number = 4096;
12/** This class represents the primary interface for creating {@link CharStream}s
*  from a variety of sources as of 4.7.  The motivation was to support
*  Unicode code points > U+FFFF.  {@link ANTLRInputStream} and
*  {@link ANTLRFileStream} are now deprecated in favor of the streams created
*  by this interface.
*
*  DEPRECATED: {@code new ANTLRFileStream("myinputfile")}
*  NEW:        {@code CharStreams.fromFileName("myinputfile")}
*
*  WARNING: If you use both the deprecated and the new streams, you will see
*  a nontrivial performance degradation. This speed hit is because the
*  {@link Lexer}'s internal code goes from a monomorphic to megamorphic
*  dynamic dispatch to get characters from the input stream. Java's
*  on-the-fly compiler (JIT) is unable to perform the same optimizations
*  so stick with either the old or the new streams, if performance is
*  a primary concern. See the extreme debugging and spelunking
*  needed to identify this issue in our timing rig:
*
*      https://github.com/antlr/antlr4/pull/1781
*
*  The ANTLR character streams still buffer all the input when you create
*  the stream, as they have done for ~20 years. If you need unbuffered
*  access, please note that it becomes challenging to create
*  parse trees. The parse tree has to point to tokens which will either
*  point into a stale location in an unbuffered stream or you have to copy
*  the characters out of the buffer into the token. That defeats the purpose
*  of unbuffered input. Per the ANTLR book, unbuffered streams are primarily
*  useful for processing infinite streams *during the parse.*
*
*  The new streams also use 8-bit buffers when possible so this new
*  interface supports character streams that use half as much memory
*  as the old {@link ANTLRFileStream}, which assumed 16-bit characters.
*
*  A big shout out to Ben Hamilton (github bhamiltoncx) for his superhuman
*  efforts across all targets to get true Unicode 3.1 support for U+10FFFF.
*
*  @since 4.7
*/
50var CharStreams;
51(function (CharStreams) {
  // /**
  //  * Creates a {@link CharStream} given a path to a UTF-8
  //  * encoded file on disk.
  //  *
  //  * Reads the entire contents of the file into the result before returning.
  //  */
  // export function fromFile(file: File): CharStream;
  // export function fromFile(file: File, charset: Charset): CharStream;
  // export function fromFile(file: File, charset?: Charset): CharStream {
  // 	if (charset === undefined) {
  // 		charset = Charset.forName("UTF-8");
  // 	}
  function fromString(s, sourceName) {
      if (sourceName === undefined || sourceName.length === 0) {
          sourceName = IntStream_1.IntStream.UNKNOWN_SOURCE_NAME;
      }
      // Initial guess assumes no code points > U+FFFF: one code
      // point for each code unit in the string
      let codePointBufferBuilder = CodePointBuffer_1.CodePointBuffer.builder(s.length);
      // TODO: CharBuffer.wrap(String) rightfully returns a read-only buffer
      // which doesn't expose its array, so we make a copy.
      let cb = new Uint16Array(s.length);
      for (let i = 0; i < s.length; i++) {
          cb[i] = s.charCodeAt(i);
      }
      codePointBufferBuilder.append(cb);
      return CodePointCharStream_1.CodePointCharStream.fromBuffer(codePointBufferBuilder.build(), sourceName);
  }
  CharStreams.fromString = fromString;
  // export function bufferFromChannel(
  // 	channel: ReadableByteChannel,
  // 	charset: Charset,
  // 	bufferSize: number,
  // 	decodingErrorAction: CodingErrorAction,
  // 	inputSize: number): CodePointBuffer {
  // 	try {
  // 		let utf8BytesIn: Uint8Array = new Uint8Array(bufferSize);
  // 		let utf16CodeUnitsOut: Uint16Array = new Uint16Array(bufferSize);
  // 		if (inputSize === -1) {
  // 			inputSize = bufferSize;
  // 		} else if (inputSize > Integer.MAX_VALUE) {
  // 			// ByteBuffer et al don't support long sizes
  // 			throw new RangeError(`inputSize ${inputSize} larger than max ${Integer.MAX_VALUE}`);
  // 		}
  // 		let codePointBufferBuilder: CodePointBuffer.Builder = CodePointBuffer.builder(inputSize);
  // 		let decoder: CharsetDecoder = charset
  // 				.newDecoder()
  // 				.onMalformedInput(decodingErrorAction)
  // 				.onUnmappableCharacter(decodingErrorAction);
  // 		let endOfInput: boolean = false;
  // 		while (!endOfInput) {
  // 			let bytesRead: number = channel.read(utf8BytesIn);
  // 			endOfInput = (bytesRead === -1);
  // 			utf8BytesIn.flip();
  // 			let result: CoderResult = decoder.decode(
  // 				utf8BytesIn,
  // 				utf16CodeUnitsOut,
  // 				endOfInput);
  // 			if (result.isError() && decodingErrorAction === CodingErrorAction.REPORT) {
  // 				result.throwException();
  // 			}
  // 			utf16CodeUnitsOut.flip();
  // 			codePointBufferBuilder.append(utf16CodeUnitsOut);
  // 			utf8BytesIn.compact();
  // 			utf16CodeUnitsOut.compact();
  // 		}
  // 		// Handle any bytes at the end of the file which need to
  // 		// be represented as errors or substitution characters.
  // 		let flushResult: CoderResult = decoder.flush(utf16CodeUnitsOut);
  // 		if (flushResult.isError() && decodingErrorAction === CodingErrorAction.REPORT) {
  // 			flushResult.throwException();
  // 		}
  // 		utf16CodeUnitsOut.flip();
  // 		codePointBufferBuilder.append(utf16CodeUnitsOut);
  // 		return codePointBufferBuilder.build();
  // 	}
  // 	finally {
  // 		channel.close();
  // 	}
  // }
132})(CharStreams = exports.CharStreams || (exports.CharStreams = {}));
133//# sourceMappingURL=CharStreams.js.map
\No newline at end of file

1	`"use strict";`
2	`/*!`
3	`* Copyright 2016 The ANTLR Project. All rights reserved.`
4	`* Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information.`
5	`*/`
6	`Object.defineProperty(exports, "__esModule", { value: true });`
7	`exports.CharStreams = void 0;`
8	`const CodePointBuffer_1 = require("./CodePointBuffer");`
9	`const CodePointCharStream_1 = require("./CodePointCharStream");`
10	`const IntStream_1 = require("./IntStream");`
11	`// const DEFAULT_BUFFER_SIZE: number = 4096;`
12	`/** This class represents the primary interface for creating {@link CharStream}s`
13	`* from a variety of sources as of 4.7. The motivation was to support`
14	`* Unicode code points > U+FFFF. {@link ANTLRInputStream} and`
15	`* {@link ANTLRFileStream} are now deprecated in favor of the streams created`
16	`* by this interface.`
17	`*`
18	`* DEPRECATED: {@code new ANTLRFileStream("myinputfile")}`
19	`* NEW: {@code CharStreams.fromFileName("myinputfile")}`
20	`*`
21	`* WARNING: If you use both the deprecated and the new streams, you will see`
22	`* a nontrivial performance degradation. This speed hit is because the`
23	`* {@link Lexer}'s internal code goes from a monomorphic to megamorphic`
24	`* dynamic dispatch to get characters from the input stream. Java's`
25	`* on-the-fly compiler (JIT) is unable to perform the same optimizations`
26	`* so stick with either the old or the new streams, if performance is`
27	`* a primary concern. See the extreme debugging and spelunking`
28	`* needed to identify this issue in our timing rig:`
29	`*`
30	`* https://github.com/antlr/antlr4/pull/1781`
31	`*`
32	`* The ANTLR character streams still buffer all the input when you create`
33	`* the stream, as they have done for ~20 years. If you need unbuffered`
34	`* access, please note that it becomes challenging to create`
35	`* parse trees. The parse tree has to point to tokens which will either`
36	`* point into a stale location in an unbuffered stream or you have to copy`
37	`* the characters out of the buffer into the token. That defeats the purpose`
38	`* of unbuffered input. Per the ANTLR book, unbuffered streams are primarily`
39	`* useful for processing infinite streams during the parse.`
40	`*`
41	`* The new streams also use 8-bit buffers when possible so this new`
42	`* interface supports character streams that use half as much memory`
43	`* as the old {@link ANTLRFileStream}, which assumed 16-bit characters.`
44	`*`
45	`* A big shout out to Ben Hamilton (github bhamiltoncx) for his superhuman`
46	`* efforts across all targets to get true Unicode 3.1 support for U+10FFFF.`
47	`*`
48	`* @since 4.7`
49	`*/`
50	`var CharStreams;`
51	`(function (CharStreams) {`
52	`// /**`
53	`// * Creates a {@link CharStream} given a path to a UTF-8`
54	`// * encoded file on disk.`
55	`// *`
56	`// * Reads the entire contents of the file into the result before returning.`
57	`// */`
58	`// export function fromFile(file: File): CharStream;`
59	`// export function fromFile(file: File, charset: Charset): CharStream;`
60	`// export function fromFile(file: File, charset?: Charset): CharStream {`
61	`// if (charset === undefined) {`
62	`// charset = Charset.forName("UTF-8");`
63	`// }`
64	`function fromString(s, sourceName) {`
65	`if (sourceName === undefined \|\| sourceName.length === 0) {`
66	`sourceName = IntStream_1.IntStream.UNKNOWN_SOURCE_NAME;`
67	`}`
68	`// Initial guess assumes no code points > U+FFFF: one code`
69	`// point for each code unit in the string`
70	`let codePointBufferBuilder = CodePointBuffer_1.CodePointBuffer.builder(s.length);`
71	`// TODO: CharBuffer.wrap(String) rightfully returns a read-only buffer`
72	`// which doesn't expose its array, so we make a copy.`
73	`let cb = new Uint16Array(s.length);`
74	`for (let i = 0; i < s.length; i++) {`
75	`cb[i] = s.charCodeAt(i);`
76	`}`
77	`codePointBufferBuilder.append(cb);`
78	`return CodePointCharStream_1.CodePointCharStream.fromBuffer(codePointBufferBuilder.build(), sourceName);`
79	`}`
80	`CharStreams.fromString = fromString;`
81	`// export function bufferFromChannel(`
82	`// channel: ReadableByteChannel,`
83	`// charset: Charset,`
84	`// bufferSize: number,`
85	`// decodingErrorAction: CodingErrorAction,`
86	`// inputSize: number): CodePointBuffer {`
87	`// try {`
88	`// let utf8BytesIn: Uint8Array = new Uint8Array(bufferSize);`
89	`// let utf16CodeUnitsOut: Uint16Array = new Uint16Array(bufferSize);`
90	`// if (inputSize === -1) {`
91	`// inputSize = bufferSize;`
92	`// } else if (inputSize > Integer.MAX_VALUE) {`
93	`// // ByteBuffer et al don't support long sizes`
94	// throw new RangeError(`inputSize ${inputSize} larger than max ${Integer.MAX_VALUE}`);
95	`// }`
96	`// let codePointBufferBuilder: CodePointBuffer.Builder = CodePointBuffer.builder(inputSize);`
97	`// let decoder: CharsetDecoder = charset`
98	`// .newDecoder()`
99	`// .onMalformedInput(decodingErrorAction)`
100	`// .onUnmappableCharacter(decodingErrorAction);`
101	`// let endOfInput: boolean = false;`
102	`// while (!endOfInput) {`
103	`// let bytesRead: number = channel.read(utf8BytesIn);`
104	`// endOfInput = (bytesRead === -1);`
105	`// utf8BytesIn.flip();`
106	`// let result: CoderResult = decoder.decode(`
107	`// utf8BytesIn,`
108	`// utf16CodeUnitsOut,`
109	`// endOfInput);`
110	`// if (result.isError() && decodingErrorAction === CodingErrorAction.REPORT) {`
111	`// result.throwException();`
112	`// }`
113	`// utf16CodeUnitsOut.flip();`
114	`// codePointBufferBuilder.append(utf16CodeUnitsOut);`
115	`// utf8BytesIn.compact();`
116	`// utf16CodeUnitsOut.compact();`
117	`// }`
118	`// // Handle any bytes at the end of the file which need to`
119	`// // be represented as errors or substitution characters.`
120	`// let flushResult: CoderResult = decoder.flush(utf16CodeUnitsOut);`
121	`// if (flushResult.isError() && decodingErrorAction === CodingErrorAction.REPORT) {`
122	`// flushResult.throwException();`
123	`// }`
124	`// utf16CodeUnitsOut.flip();`
125	`// codePointBufferBuilder.append(utf16CodeUnitsOut);`
126	`// return codePointBufferBuilder.build();`
127	`// }`
128	`// finally {`
129	`// channel.close();`
130	`// }`
131	`// }`
132	`})(CharStreams = exports.CharStreams \|\| (exports.CharStreams = {}));`
133	`//# sourceMappingURL=CharStreams.js.map`
\	No newline at end of file