UNPKG

4.64 kBTypeScriptView Raw
1export declare enum QuoteType {
2 NoValue = 0,
3 Unquoted = 1,
4 Single = 2,
5 Double = 3
6}
7export interface Callbacks {
8 onattribdata(start: number, endIndex: number): void;
9 onattribentity(codepoint: number): void;
10 onattribend(quote: QuoteType, endIndex: number): void;
11 onattribname(start: number, endIndex: number): void;
12 oncdata(start: number, endIndex: number, endOffset: number): void;
13 onclosetag(start: number, endIndex: number): void;
14 oncomment(start: number, endIndex: number, endOffset: number): void;
15 ondeclaration(start: number, endIndex: number): void;
16 onend(): void;
17 onopentagend(endIndex: number): void;
18 onopentagname(start: number, endIndex: number): void;
19 onprocessinginstruction(start: number, endIndex: number): void;
20 onselfclosingtag(endIndex: number): void;
21 ontext(start: number, endIndex: number): void;
22 ontextentity(codepoint: number, endIndex: number): void;
23}
24export default class Tokenizer {
25 private readonly cbs;
26 /** The current state the tokenizer is in. */
27 private state;
28 /** The read buffer. */
29 private buffer;
30 /** The beginning of the section that is currently being read. */
31 private sectionStart;
32 /** The index within the buffer that we are currently looking at. */
33 private index;
34 /** The start of the last entity. */
35 private entityStart;
36 /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
37 private baseState;
38 /** For special parsing behavior inside of script and style tags. */
39 private isSpecial;
40 /** Indicates whether the tokenizer has been paused. */
41 running: boolean;
42 /** The offset of the current buffer. */
43 private offset;
44 private readonly xmlMode;
45 private readonly decodeEntities;
46 private readonly entityDecoder;
47 constructor({ xmlMode, decodeEntities, }: {
48 xmlMode?: boolean;
49 decodeEntities?: boolean;
50 }, cbs: Callbacks);
51 reset(): void;
52 write(chunk: string): void;
53 end(): void;
54 pause(): void;
55 resume(): void;
56 private stateText;
57 private currentSequence;
58 private sequenceIndex;
59 private stateSpecialStartSequence;
60 /** Look for an end tag. For <title> tags, also decode entities. */
61 private stateInSpecialTag;
62 private stateCDATASequence;
63 /**
64 * When we wait for one specific character, we can speed things up
65 * by skipping through the buffer until we find it.
66 *
67 * @returns Whether the character was found.
68 */
69 private fastForwardTo;
70 /**
71 * Comments and CDATA end with `-->` and `]]>`.
72 *
73 * Their common qualities are:
74 * - Their end sequences have a distinct character they start with.
75 * - That character is then repeated, so we have to check multiple repeats.
76 * - All characters but the start character of the sequence can be skipped.
77 */
78 private stateInCommentLike;
79 /**
80 * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
81 *
82 * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
83 * We allow anything that wouldn't end the tag.
84 */
85 private isTagStartChar;
86 private startSpecial;
87 private stateBeforeTagName;
88 private stateInTagName;
89 private stateBeforeClosingTagName;
90 private stateInClosingTagName;
91 private stateAfterClosingTagName;
92 private stateBeforeAttributeName;
93 private stateInSelfClosingTag;
94 private stateInAttributeName;
95 private stateAfterAttributeName;
96 private stateBeforeAttributeValue;
97 private handleInAttributeValue;
98 private stateInAttributeValueDoubleQuotes;
99 private stateInAttributeValueSingleQuotes;
100 private stateInAttributeValueNoQuotes;
101 private stateBeforeDeclaration;
102 private stateInDeclaration;
103 private stateInProcessingInstruction;
104 private stateBeforeComment;
105 private stateInSpecialComment;
106 private stateBeforeSpecialS;
107 private stateBeforeSpecialT;
108 private startEntity;
109 private stateInEntity;
110 /**
111 * Remove data that has already been consumed from the buffer.
112 */
113 private cleanup;
114 private shouldContinue;
115 /**
116 * Iterates through the buffer, calling the function corresponding to the current state.
117 *
118 * States that are more likely to be hit are higher up, as a performance improvement.
119 */
120 private parse;
121 private finish;
122 /** Handle any trailing data. */
123 private handleTrailingData;
124 private emitCodePoint;
125}
126//# sourceMappingURL=Tokenizer.d.ts.map
\No newline at end of file