UNPKG

9.07 kBTypeScriptView Raw
1import { Preprocessor } from './preprocessor.js';
2import { type CharacterToken, type DoctypeToken, type TagToken, type EOFToken, type CommentToken } from '../common/token.js';
3import { type ParserErrorHandler } from '../common/error-codes.js';
4declare const enum State {
5 DATA = 0,
6 RCDATA = 1,
7 RAWTEXT = 2,
8 SCRIPT_DATA = 3,
9 PLAINTEXT = 4,
10 TAG_OPEN = 5,
11 END_TAG_OPEN = 6,
12 TAG_NAME = 7,
13 RCDATA_LESS_THAN_SIGN = 8,
14 RCDATA_END_TAG_OPEN = 9,
15 RCDATA_END_TAG_NAME = 10,
16 RAWTEXT_LESS_THAN_SIGN = 11,
17 RAWTEXT_END_TAG_OPEN = 12,
18 RAWTEXT_END_TAG_NAME = 13,
19 SCRIPT_DATA_LESS_THAN_SIGN = 14,
20 SCRIPT_DATA_END_TAG_OPEN = 15,
21 SCRIPT_DATA_END_TAG_NAME = 16,
22 SCRIPT_DATA_ESCAPE_START = 17,
23 SCRIPT_DATA_ESCAPE_START_DASH = 18,
24 SCRIPT_DATA_ESCAPED = 19,
25 SCRIPT_DATA_ESCAPED_DASH = 20,
26 SCRIPT_DATA_ESCAPED_DASH_DASH = 21,
27 SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 22,
28 SCRIPT_DATA_ESCAPED_END_TAG_OPEN = 23,
29 SCRIPT_DATA_ESCAPED_END_TAG_NAME = 24,
30 SCRIPT_DATA_DOUBLE_ESCAPE_START = 25,
31 SCRIPT_DATA_DOUBLE_ESCAPED = 26,
32 SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 27,
33 SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 28,
34 SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 29,
35 SCRIPT_DATA_DOUBLE_ESCAPE_END = 30,
36 BEFORE_ATTRIBUTE_NAME = 31,
37 ATTRIBUTE_NAME = 32,
38 AFTER_ATTRIBUTE_NAME = 33,
39 BEFORE_ATTRIBUTE_VALUE = 34,
40 ATTRIBUTE_VALUE_DOUBLE_QUOTED = 35,
41 ATTRIBUTE_VALUE_SINGLE_QUOTED = 36,
42 ATTRIBUTE_VALUE_UNQUOTED = 37,
43 AFTER_ATTRIBUTE_VALUE_QUOTED = 38,
44 SELF_CLOSING_START_TAG = 39,
45 BOGUS_COMMENT = 40,
46 MARKUP_DECLARATION_OPEN = 41,
47 COMMENT_START = 42,
48 COMMENT_START_DASH = 43,
49 COMMENT = 44,
50 COMMENT_LESS_THAN_SIGN = 45,
51 COMMENT_LESS_THAN_SIGN_BANG = 46,
52 COMMENT_LESS_THAN_SIGN_BANG_DASH = 47,
53 COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH = 48,
54 COMMENT_END_DASH = 49,
55 COMMENT_END = 50,
56 COMMENT_END_BANG = 51,
57 DOCTYPE = 52,
58 BEFORE_DOCTYPE_NAME = 53,
59 DOCTYPE_NAME = 54,
60 AFTER_DOCTYPE_NAME = 55,
61 AFTER_DOCTYPE_PUBLIC_KEYWORD = 56,
62 BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 57,
63 DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 58,
64 DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 59,
65 AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 60,
66 BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 61,
67 AFTER_DOCTYPE_SYSTEM_KEYWORD = 62,
68 BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 63,
69 DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 64,
70 DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 65,
71 AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 66,
72 BOGUS_DOCTYPE = 67,
73 CDATA_SECTION = 68,
74 CDATA_SECTION_BRACKET = 69,
75 CDATA_SECTION_END = 70,
76 CHARACTER_REFERENCE = 71,
77 NAMED_CHARACTER_REFERENCE = 72,
78 AMBIGUOUS_AMPERSAND = 73,
79 NUMERIC_CHARACTER_REFERENCE = 74,
80 HEXADEMICAL_CHARACTER_REFERENCE_START = 75,
81 HEXADEMICAL_CHARACTER_REFERENCE = 76,
82 DECIMAL_CHARACTER_REFERENCE = 77,
83 NUMERIC_CHARACTER_REFERENCE_END = 78
84}
85export declare const TokenizerMode: {
86 readonly DATA: State.DATA;
87 readonly RCDATA: State.RCDATA;
88 readonly RAWTEXT: State.RAWTEXT;
89 readonly SCRIPT_DATA: State.SCRIPT_DATA;
90 readonly PLAINTEXT: State.PLAINTEXT;
91 readonly CDATA_SECTION: State.CDATA_SECTION;
92};
93export interface TokenizerOptions {
94 sourceCodeLocationInfo?: boolean;
95}
96export interface TokenHandler {
97 onComment(token: CommentToken): void;
98 onDoctype(token: DoctypeToken): void;
99 onStartTag(token: TagToken): void;
100 onEndTag(token: TagToken): void;
101 onEof(token: EOFToken): void;
102 onCharacter(token: CharacterToken): void;
103 onNullCharacter(token: CharacterToken): void;
104 onWhitespaceCharacter(token: CharacterToken): void;
105 onParseError?: ParserErrorHandler | null;
106}
107export declare class Tokenizer {
108 private options;
109 private handler;
110 preprocessor: Preprocessor;
111 private paused;
112 /** Ensures that the parsing loop isn't run multiple times at once. */
113 private inLoop;
114 /**
115 * Indicates that the current adjusted node exists, is not an element in the HTML namespace,
116 * and that it is not an integration point for either MathML or HTML.
117 *
118 * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
119 */
120 inForeignNode: boolean;
121 lastStartTagName: string;
122 active: boolean;
123 state: State;
124 private returnState;
125 private charRefCode;
126 private consumedAfterSnapshot;
127 private currentLocation;
128 private currentCharacterToken;
129 private currentToken;
130 private currentAttr;
131 constructor(options: TokenizerOptions, handler: TokenHandler);
132 private _err;
133 private getCurrentLocation;
134 private _runParsingLoop;
135 pause(): void;
136 resume(writeCallback?: () => void): void;
137 write(chunk: string, isLastChunk: boolean, writeCallback?: () => void): void;
138 insertHtmlAtCurrentPos(chunk: string): void;
139 private _ensureHibernation;
140 private _consume;
141 private _unconsume;
142 private _reconsumeInState;
143 private _advanceBy;
144 private _consumeSequenceIfMatch;
145 private _createStartTagToken;
146 private _createEndTagToken;
147 private _createCommentToken;
148 private _createDoctypeToken;
149 private _createCharacterToken;
150 private _createAttr;
151 private _leaveAttrName;
152 private _leaveAttrValue;
153 private prepareToken;
154 private emitCurrentTagToken;
155 private emitCurrentComment;
156 private emitCurrentDoctype;
157 private _emitCurrentCharacterToken;
158 private _emitEOFToken;
159 private _appendCharToCurrentCharacterToken;
160 private _emitCodePoint;
161 private _emitChars;
162 private _matchNamedCharacterReference;
163 private _isCharacterReferenceInAttribute;
164 private _flushCodePointConsumedAsCharacterReference;
165 private _callState;
166 private _stateData;
167 private _stateRcdata;
168 private _stateRawtext;
169 private _stateScriptData;
170 private _statePlaintext;
171 private _stateTagOpen;
172 private _stateEndTagOpen;
173 private _stateTagName;
174 private _stateRcdataLessThanSign;
175 private _stateRcdataEndTagOpen;
176 private handleSpecialEndTag;
177 private _stateRcdataEndTagName;
178 private _stateRawtextLessThanSign;
179 private _stateRawtextEndTagOpen;
180 private _stateRawtextEndTagName;
181 private _stateScriptDataLessThanSign;
182 private _stateScriptDataEndTagOpen;
183 private _stateScriptDataEndTagName;
184 private _stateScriptDataEscapeStart;
185 private _stateScriptDataEscapeStartDash;
186 private _stateScriptDataEscaped;
187 private _stateScriptDataEscapedDash;
188 private _stateScriptDataEscapedDashDash;
189 private _stateScriptDataEscapedLessThanSign;
190 private _stateScriptDataEscapedEndTagOpen;
191 private _stateScriptDataEscapedEndTagName;
192 private _stateScriptDataDoubleEscapeStart;
193 private _stateScriptDataDoubleEscaped;
194 private _stateScriptDataDoubleEscapedDash;
195 private _stateScriptDataDoubleEscapedDashDash;
196 private _stateScriptDataDoubleEscapedLessThanSign;
197 private _stateScriptDataDoubleEscapeEnd;
198 private _stateBeforeAttributeName;
199 private _stateAttributeName;
200 private _stateAfterAttributeName;
201 private _stateBeforeAttributeValue;
202 private _stateAttributeValueDoubleQuoted;
203 private _stateAttributeValueSingleQuoted;
204 private _stateAttributeValueUnquoted;
205 private _stateAfterAttributeValueQuoted;
206 private _stateSelfClosingStartTag;
207 private _stateBogusComment;
208 private _stateMarkupDeclarationOpen;
209 private _stateCommentStart;
210 private _stateCommentStartDash;
211 private _stateComment;
212 private _stateCommentLessThanSign;
213 private _stateCommentLessThanSignBang;
214 private _stateCommentLessThanSignBangDash;
215 private _stateCommentLessThanSignBangDashDash;
216 private _stateCommentEndDash;
217 private _stateCommentEnd;
218 private _stateCommentEndBang;
219 private _stateDoctype;
220 private _stateBeforeDoctypeName;
221 private _stateDoctypeName;
222 private _stateAfterDoctypeName;
223 private _stateAfterDoctypePublicKeyword;
224 private _stateBeforeDoctypePublicIdentifier;
225 private _stateDoctypePublicIdentifierDoubleQuoted;
226 private _stateDoctypePublicIdentifierSingleQuoted;
227 private _stateAfterDoctypePublicIdentifier;
228 private _stateBetweenDoctypePublicAndSystemIdentifiers;
229 private _stateAfterDoctypeSystemKeyword;
230 private _stateBeforeDoctypeSystemIdentifier;
231 private _stateDoctypeSystemIdentifierDoubleQuoted;
232 private _stateDoctypeSystemIdentifierSingleQuoted;
233 private _stateAfterDoctypeSystemIdentifier;
234 private _stateBogusDoctype;
235 private _stateCdataSection;
236 private _stateCdataSectionBracket;
237 private _stateCdataSectionEnd;
238 private _stateCharacterReference;
239 private _stateNamedCharacterReference;
240 private _stateAmbiguousAmpersand;
241 private _stateNumericCharacterReference;
242 private _stateHexademicalCharacterReferenceStart;
243 private _stateHexademicalCharacterReference;
244 private _stateDecimalCharacterReference;
245 private _stateNumericCharacterReferenceEnd;
246}
247export {};
248//# sourceMappingURL=index.d.ts.map
\No newline at end of file