UNPKG

6.06 kBTypeScriptView Raw
1/** All the states the tokenizer can be in. */
2declare const enum State {
3 Text = 1,
4 BeforeTagName = 2,
5 InTagName = 3,
6 InSelfClosingTag = 4,
7 BeforeClosingTagName = 5,
8 InClosingTagName = 6,
9 AfterClosingTagName = 7,
10 BeforeAttributeName = 8,
11 InAttributeName = 9,
12 AfterAttributeName = 10,
13 BeforeAttributeValue = 11,
14 InAttributeValueDq = 12,
15 InAttributeValueSq = 13,
16 InAttributeValueNq = 14,
17 BeforeDeclaration = 15,
18 InDeclaration = 16,
19 InProcessingInstruction = 17,
20 BeforeComment = 18,
21 InComment = 19,
22 InSpecialComment = 20,
23 AfterComment1 = 21,
24 AfterComment2 = 22,
25 BeforeCdata1 = 23,
26 BeforeCdata2 = 24,
27 BeforeCdata3 = 25,
28 BeforeCdata4 = 26,
29 BeforeCdata5 = 27,
30 BeforeCdata6 = 28,
31 InCdata = 29,
32 AfterCdata1 = 30,
33 AfterCdata2 = 31,
34 BeforeSpecialS = 32,
35 BeforeSpecialSEnd = 33,
36 BeforeScript1 = 34,
37 BeforeScript2 = 35,
38 BeforeScript3 = 36,
39 BeforeScript4 = 37,
40 BeforeScript5 = 38,
41 AfterScript1 = 39,
42 AfterScript2 = 40,
43 AfterScript3 = 41,
44 AfterScript4 = 42,
45 AfterScript5 = 43,
46 BeforeStyle1 = 44,
47 BeforeStyle2 = 45,
48 BeforeStyle3 = 46,
49 BeforeStyle4 = 47,
50 AfterStyle1 = 48,
51 AfterStyle2 = 49,
52 AfterStyle3 = 50,
53 AfterStyle4 = 51,
54 BeforeSpecialT = 52,
55 BeforeSpecialTEnd = 53,
56 BeforeTitle1 = 54,
57 BeforeTitle2 = 55,
58 BeforeTitle3 = 56,
59 BeforeTitle4 = 57,
60 AfterTitle1 = 58,
61 AfterTitle2 = 59,
62 AfterTitle3 = 60,
63 AfterTitle4 = 61,
64 BeforeEntity = 62,
65 BeforeNumericEntity = 63,
66 InNamedEntity = 64,
67 InNumericEntity = 65,
68 InHexEntity = 66
69}
70export interface Callbacks {
71 onattribdata(value: string): void;
72 onattribend(quote: string | undefined | null): void;
73 onattribname(name: string): void;
74 oncdata(data: string): void;
75 onclosetag(name: string): void;
76 oncomment(data: string): void;
77 ondeclaration(content: string): void;
78 onend(): void;
79 onerror(error: Error, state?: State): void;
80 onopentagend(): void;
81 onopentagname(name: string): void;
82 onprocessinginstruction(instruction: string): void;
83 onselfclosingtag(): void;
84 ontext(value: string): void;
85}
86export default class Tokenizer {
87 private readonly cbs;
88 /** The current state the tokenizer is in. */
89 _state: State;
90 /** The read buffer. */
91 private buffer;
92 /** The beginning of the section that is currently being read. */
93 sectionStart: number;
94 /** The index within the buffer that we are currently looking at. */
95 _index: number;
96 /**
97 * Data that has already been processed will be removed from the buffer occasionally.
98 * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
99 */
100 private bufferOffset;
101 /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
102 private baseState;
103 /** For special parsing behavior inside of script and style tags. */
104 private special;
105 /** Indicates whether the tokenizer has been paused. */
106 private running;
107 /** Indicates whether the tokenizer has finished running / `.end` has been called. */
108 private ended;
109 private readonly xmlMode;
110 private readonly decodeEntities;
111 private readonly entityTrie;
112 constructor({ xmlMode, decodeEntities, }: {
113 xmlMode?: boolean;
114 decodeEntities?: boolean;
115 }, cbs: Callbacks);
116 reset(): void;
117 write(chunk: string): void;
118 end(chunk?: string): void;
119 pause(): void;
120 resume(): void;
121 /**
122 * The start of the current section.
123 */
124 getAbsoluteSectionStart(): number;
125 /**
126 * The current index within all of the written data.
127 */
128 getAbsoluteIndex(): number;
129 private stateText;
130 /**
131 * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
132 *
133 * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
134 * We allow anything that wouldn't end the tag.
135 */
136 private isTagStartChar;
137 private stateBeforeTagName;
138 private stateInTagName;
139 private stateBeforeClosingTagName;
140 private stateInClosingTagName;
141 private stateAfterClosingTagName;
142 private stateBeforeAttributeName;
143 private stateInSelfClosingTag;
144 private stateInAttributeName;
145 private stateAfterAttributeName;
146 private stateBeforeAttributeValue;
147 private handleInAttributeValue;
148 private stateInAttributeValueDoubleQuotes;
149 private stateInAttributeValueSingleQuotes;
150 private stateInAttributeValueNoQuotes;
151 private stateBeforeDeclaration;
152 private stateInDeclaration;
153 private stateInProcessingInstruction;
154 private stateBeforeComment;
155 private stateInComment;
156 private stateInSpecialComment;
157 private stateAfterComment1;
158 private stateAfterComment2;
159 private stateBeforeCdata6;
160 private stateInCdata;
161 private stateAfterCdata1;
162 private stateAfterCdata2;
163 private stateBeforeSpecialS;
164 private stateBeforeSpecialSEnd;
165 private stateBeforeSpecialLast;
166 private stateAfterSpecialLast;
167 private trieIndex;
168 private trieCurrent;
169 private trieResult;
170 private trieExcess;
171 private stateBeforeEntity;
172 private stateInNamedEntity;
173 private emitNamedEntity;
174 private decodeNumericEntity;
175 private stateInNumericEntity;
176 private stateInHexEntity;
177 private allowLegacyEntity;
178 /**
179 * Remove data that has already been consumed from the buffer.
180 */
181 private cleanup;
182 /**
183 * Iterates through the buffer, calling the function corresponding to the current state.
184 *
185 * States that are more likely to be hit are higher up, as a performance improvement.
186 */
187 private parse;
188 private finish;
189 /** Handle any trailing data. */
190 private handleTrailingData;
191 private getSection;
192 private emitPartial;
193}
194export {};
195//# sourceMappingURL=Tokenizer.d.ts.map
\No newline at end of file