UNPKG

6.25 kBTypeScriptView Raw
1/*!
2 * Copyright 2016 The ANTLR Project. All rights reserved.
3 * Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information.
4 */
5import { CharStream } from "./CharStream";
6import { IntegerStack } from "./misc/IntegerStack";
7import { LexerATNSimulator } from "./atn/LexerATNSimulator";
8import { LexerNoViableAltException } from "./LexerNoViableAltException";
9import { RecognitionException } from "./RecognitionException";
10import { Recognizer } from "./Recognizer";
11import { Token } from "./Token";
12import { TokenFactory } from "./TokenFactory";
13import { TokenSource } from "./TokenSource";
14/** A lexer is recognizer that draws input symbols from a character stream.
15 * lexer grammars result in a subclass of this object. A Lexer object
16 * uses simplified match() and error recovery mechanisms in the interest
17 * of speed.
18 */
19export declare abstract class Lexer extends Recognizer<number, LexerATNSimulator> implements TokenSource {
20 static readonly DEFAULT_MODE: number;
21 static readonly MORE: number;
22 static readonly SKIP: number;
23 static get DEFAULT_TOKEN_CHANNEL(): number;
24 static get HIDDEN(): number;
25 static readonly MIN_CHAR_VALUE: number;
26 static readonly MAX_CHAR_VALUE: number;
27 _input: CharStream;
28 protected _tokenFactorySourcePair: {
29 source: TokenSource;
30 stream: CharStream;
31 };
32 /** How to create token objects */
33 protected _factory: TokenFactory;
34 /** The goal of all lexer rules/methods is to create a token object.
35 * This is an instance variable as multiple rules may collaborate to
36 * create a single token. nextToken will return this object after
37 * matching lexer rule(s). If you subclass to allow multiple token
38 * emissions, then set this to the last token to be matched or
39 * something non-undefined so that the auto token emit mechanism will not
40 * emit another token.
41 */
42 _token: Token | undefined;
43 /** What character index in the stream did the current token start at?
44 * Needed, for example, to get the text for current token. Set at
45 * the start of nextToken.
46 */
47 _tokenStartCharIndex: number;
48 /** The line on which the first character of the token resides */
49 _tokenStartLine: number;
50 /** The character position of first character within the line */
51 _tokenStartCharPositionInLine: number;
52 /** Once we see EOF on char stream, next token will be EOF.
53 * If you have DONE : EOF ; then you see DONE EOF.
54 */
55 _hitEOF: boolean;
56 /** The channel number for the current token */
57 _channel: number;
58 /** The token type for the current token */
59 _type: number;
60 readonly _modeStack: IntegerStack;
61 _mode: number;
62 /** You can set the text for the current token to override what is in
63 * the input char buffer. Set `text` or can set this instance var.
64 */
65 _text: string | undefined;
66 constructor(input: CharStream);
67 reset(): void;
68 reset(resetInput: boolean): void;
69 /** Return a token from this source; i.e., match a token on the char
70 * stream.
71 */
72 nextToken(): Token;
73 /** Instruct the lexer to skip creating a token for current lexer rule
74 * and look for another token. nextToken() knows to keep looking when
75 * a lexer rule finishes with token set to SKIP_TOKEN. Recall that
76 * if token==undefined at end of any token rule, it creates one for you
77 * and emits it.
78 */
79 skip(): void;
80 more(): void;
81 mode(m: number): void;
82 pushMode(m: number): void;
83 popMode(): number;
84 get tokenFactory(): TokenFactory;
85 set tokenFactory(factory: TokenFactory);
86 get inputStream(): CharStream;
87 /** Set the char stream and reset the lexer */
88 set inputStream(input: CharStream);
89 get sourceName(): string;
90 /** The standard method called to automatically emit a token at the
91 * outermost lexical rule. The token object should point into the
92 * char buffer start..stop. If there is a text override in 'text',
93 * use that to set the token's text. Override this method to emit
94 * custom Token objects or provide a new factory.
95 */
96 emit(token: Token): Token;
97 /** By default does not support multiple emits per nextToken invocation
98 * for efficiency reasons. Subclass and override this method, nextToken,
99 * and getToken (to push tokens into a list and pull from that list
100 * rather than a single variable as this implementation does).
101 */
102 emit(): Token;
103 emitEOF(): Token;
104 get line(): number;
105 set line(line: number);
106 get charPositionInLine(): number;
107 set charPositionInLine(charPositionInLine: number);
108 /** What is the index of the current character of lookahead? */
109 get charIndex(): number;
110 /** Return the text matched so far for the current token or any
111 * text override.
112 */
113 get text(): string;
114 /** Set the complete text of this token; it wipes any previous
115 * changes to the text.
116 */
117 set text(text: string);
118 /** Override if emitting multiple tokens. */
119 get token(): Token | undefined;
120 set token(_token: Token | undefined);
121 set type(ttype: number);
122 get type(): number;
123 set channel(channel: number);
124 get channel(): number;
125 abstract readonly channelNames: string[];
126 abstract readonly modeNames: string[];
127 /** Return a list of all Token objects in input char stream.
128 * Forces load of all tokens. Does not include EOF token.
129 */
130 getAllTokens(): Token[];
131 notifyListeners(e: LexerNoViableAltException): void;
132 getErrorDisplay(s: string | number): string;
133 getCharErrorDisplay(c: number): string;
134 /** Lexers can normally match any char in it's vocabulary after matching
135 * a token, so do the easy thing and just kill a character and hope
136 * it all works out. You can instead use the rule invocation stack
137 * to do sophisticated error recovery if you are in a fragment rule.
138 */
139 recover(re: RecognitionException): void;
140 recover(re: LexerNoViableAltException): void;
141}