UNPKG

htmlparser2/lib/Tokenizer.d.ts

Version:

6.06 kBTypeScriptView Raw

1/** All the states the tokenizer can be in. */
2declare const enum State {
  Text = 1,
  BeforeTagName = 2,
  InTagName = 3,
  InSelfClosingTag = 4,
  BeforeClosingTagName = 5,
  InClosingTagName = 6,
  AfterClosingTagName = 7,
  BeforeAttributeName = 8,
  InAttributeName = 9,
  AfterAttributeName = 10,
  BeforeAttributeValue = 11,
  InAttributeValueDq = 12,
  InAttributeValueSq = 13,
  InAttributeValueNq = 14,
  BeforeDeclaration = 15,
  InDeclaration = 16,
  InProcessingInstruction = 17,
  BeforeComment = 18,
  InComment = 19,
  InSpecialComment = 20,
  AfterComment1 = 21,
  AfterComment2 = 22,
  BeforeCdata1 = 23,
  BeforeCdata2 = 24,
  BeforeCdata3 = 25,
  BeforeCdata4 = 26,
  BeforeCdata5 = 27,
  BeforeCdata6 = 28,
  InCdata = 29,
  AfterCdata1 = 30,
  AfterCdata2 = 31,
  BeforeSpecialS = 32,
  BeforeSpecialSEnd = 33,
  BeforeScript1 = 34,
  BeforeScript2 = 35,
  BeforeScript3 = 36,
  BeforeScript4 = 37,
  BeforeScript5 = 38,
  AfterScript1 = 39,
  AfterScript2 = 40,
  AfterScript3 = 41,
  AfterScript4 = 42,
  AfterScript5 = 43,
  BeforeStyle1 = 44,
  BeforeStyle2 = 45,
  BeforeStyle3 = 46,
  BeforeStyle4 = 47,
  AfterStyle1 = 48,
  AfterStyle2 = 49,
  AfterStyle3 = 50,
  AfterStyle4 = 51,
  BeforeSpecialT = 52,
  BeforeSpecialTEnd = 53,
  BeforeTitle1 = 54,
  BeforeTitle2 = 55,
  BeforeTitle3 = 56,
  BeforeTitle4 = 57,
  AfterTitle1 = 58,
  AfterTitle2 = 59,
  AfterTitle3 = 60,
  AfterTitle4 = 61,
  BeforeEntity = 62,
  BeforeNumericEntity = 63,
  InNamedEntity = 64,
  InNumericEntity = 65,
  InHexEntity = 66
69}
70export interface Callbacks {
  onattribdata(value: string): void;
  onattribend(quote: string | undefined | null): void;
  onattribname(name: string): void;
  oncdata(data: string): void;
  onclosetag(name: string): void;
  oncomment(data: string): void;
  ondeclaration(content: string): void;
  onend(): void;
  onerror(error: Error, state?: State): void;
  onopentagend(): void;
  onopentagname(name: string): void;
  onprocessinginstruction(instruction: string): void;
  onselfclosingtag(): void;
  ontext(value: string): void;
85}
86export default class Tokenizer {
  private readonly cbs;
  /** The current state the tokenizer is in. */
  _state: State;
  /** The read buffer. */
  private buffer;
  /** The beginning of the section that is currently being read. */
  sectionStart: number;
  /** The index within the buffer that we are currently looking at. */
  _index: number;
  /**
   * Data that has already been processed will be removed from the buffer occasionally.
   * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
   */
  private bufferOffset;
  /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
  private baseState;
  /** For special parsing behavior inside of script and style tags. */
  private special;
  /** Indicates whether the tokenizer has been paused. */
  private running;
  /** Indicates whether the tokenizer has finished running / `.end` has been called. */
  private ended;
  private readonly xmlMode;
  private readonly decodeEntities;
  private readonly entityTrie;
  constructor({ xmlMode, decodeEntities, }: {
      xmlMode?: boolean;
      decodeEntities?: boolean;
  }, cbs: Callbacks);
  reset(): void;
  write(chunk: string): void;
  end(chunk?: string): void;
  pause(): void;
  resume(): void;
  /**
   * The start of the current section.
   */
  getAbsoluteSectionStart(): number;
  /**
   * The current index within all of the written data.
   */
  getAbsoluteIndex(): number;
  private stateText;
  /**
   * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
   *
   * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
   * We allow anything that wouldn't end the tag.
   */
  private isTagStartChar;
  private stateBeforeTagName;
  private stateInTagName;
  private stateBeforeClosingTagName;
  private stateInClosingTagName;
  private stateAfterClosingTagName;
  private stateBeforeAttributeName;
  private stateInSelfClosingTag;
  private stateInAttributeName;
  private stateAfterAttributeName;
  private stateBeforeAttributeValue;
  private handleInAttributeValue;
  private stateInAttributeValueDoubleQuotes;
  private stateInAttributeValueSingleQuotes;
  private stateInAttributeValueNoQuotes;
  private stateBeforeDeclaration;
  private stateInDeclaration;
  private stateInProcessingInstruction;
  private stateBeforeComment;
  private stateInComment;
  private stateInSpecialComment;
  private stateAfterComment1;
  private stateAfterComment2;
  private stateBeforeCdata6;
  private stateInCdata;
  private stateAfterCdata1;
  private stateAfterCdata2;
  private stateBeforeSpecialS;
  private stateBeforeSpecialSEnd;
  private stateBeforeSpecialLast;
  private stateAfterSpecialLast;
  private trieIndex;
  private trieCurrent;
  private trieResult;
  private trieExcess;
  private stateBeforeEntity;
  private stateInNamedEntity;
  private emitNamedEntity;
  private decodeNumericEntity;
  private stateInNumericEntity;
  private stateInHexEntity;
  private allowLegacyEntity;
  /**
   * Remove data that has already been consumed from the buffer.
   */
  private cleanup;
  /**
   * Iterates through the buffer, calling the function corresponding to the current state.
   *
   * States that are more likely to be hit are higher up, as a performance improvement.
   */
  private parse;
  private finish;
  /** Handle any trailing data. */
  private handleTrailingData;
  private getSection;
  private emitPartial;
193}
194export {};
195//# sourceMappingURL=Tokenizer.d.ts.map
\No newline at end of file

1	`/** All the states the tokenizer can be in. */`
2	`declare const enum State {`
3	`Text = 1,`
4	`BeforeTagName = 2,`
5	`InTagName = 3,`
6	`InSelfClosingTag = 4,`
7	`BeforeClosingTagName = 5,`
8	`InClosingTagName = 6,`
9	`AfterClosingTagName = 7,`
10	`BeforeAttributeName = 8,`
11	`InAttributeName = 9,`
12	`AfterAttributeName = 10,`
13	`BeforeAttributeValue = 11,`
14	`InAttributeValueDq = 12,`
15	`InAttributeValueSq = 13,`
16	`InAttributeValueNq = 14,`
17	`BeforeDeclaration = 15,`
18	`InDeclaration = 16,`
19	`InProcessingInstruction = 17,`
20	`BeforeComment = 18,`
21	`InComment = 19,`
22	`InSpecialComment = 20,`
23	`AfterComment1 = 21,`
24	`AfterComment2 = 22,`
25	`BeforeCdata1 = 23,`
26	`BeforeCdata2 = 24,`
27	`BeforeCdata3 = 25,`
28	`BeforeCdata4 = 26,`
29	`BeforeCdata5 = 27,`
30	`BeforeCdata6 = 28,`
31	`InCdata = 29,`
32	`AfterCdata1 = 30,`
33	`AfterCdata2 = 31,`
34	`BeforeSpecialS = 32,`
35	`BeforeSpecialSEnd = 33,`
36	`BeforeScript1 = 34,`
37	`BeforeScript2 = 35,`
38	`BeforeScript3 = 36,`
39	`BeforeScript4 = 37,`
40	`BeforeScript5 = 38,`
41	`AfterScript1 = 39,`
42	`AfterScript2 = 40,`
43	`AfterScript3 = 41,`
44	`AfterScript4 = 42,`
45	`AfterScript5 = 43,`
46	`BeforeStyle1 = 44,`
47	`BeforeStyle2 = 45,`
48	`BeforeStyle3 = 46,`
49	`BeforeStyle4 = 47,`
50	`AfterStyle1 = 48,`
51	`AfterStyle2 = 49,`
52	`AfterStyle3 = 50,`
53	`AfterStyle4 = 51,`
54	`BeforeSpecialT = 52,`
55	`BeforeSpecialTEnd = 53,`
56	`BeforeTitle1 = 54,`
57	`BeforeTitle2 = 55,`
58	`BeforeTitle3 = 56,`
59	`BeforeTitle4 = 57,`
60	`AfterTitle1 = 58,`
61	`AfterTitle2 = 59,`
62	`AfterTitle3 = 60,`
63	`AfterTitle4 = 61,`
64	`BeforeEntity = 62,`
65	`BeforeNumericEntity = 63,`
66	`InNamedEntity = 64,`
67	`InNumericEntity = 65,`
68	`InHexEntity = 66`
69	`}`
70	`export interface Callbacks {`
71	`onattribdata(value: string): void;`
72	`onattribend(quote: string \| undefined \| null): void;`
73	`onattribname(name: string): void;`
74	`oncdata(data: string): void;`
75	`onclosetag(name: string): void;`
76	`oncomment(data: string): void;`
77	`ondeclaration(content: string): void;`
78	`onend(): void;`
79	`onerror(error: Error, state?: State): void;`
80	`onopentagend(): void;`
81	`onopentagname(name: string): void;`
82	`onprocessinginstruction(instruction: string): void;`
83	`onselfclosingtag(): void;`
84	`ontext(value: string): void;`
85	`}`
86	`export default class Tokenizer {`
87	`private readonly cbs;`
88	`/** The current state the tokenizer is in. */`
89	`_state: State;`
90	`/** The read buffer. */`
91	`private buffer;`
92	`/** The beginning of the section that is currently being read. */`
93	`sectionStart: number;`
94	`/** The index within the buffer that we are currently looking at. */`
95	`_index: number;`
96	`/**`
97	`* Data that has already been processed will be removed from the buffer occasionally.`
98	* `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
99	`*/`
100	`private bufferOffset;`
101	`/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */`
102	`private baseState;`
103	`/** For special parsing behavior inside of script and style tags. */`
104	`private special;`
105	`/** Indicates whether the tokenizer has been paused. */`
106	`private running;`
107	/** Indicates whether the tokenizer has finished running / `.end` has been called. */
108	`private ended;`
109	`private readonly xmlMode;`
110	`private readonly decodeEntities;`
111	`private readonly entityTrie;`
112	`constructor({ xmlMode, decodeEntities, }: {`
113	`xmlMode?: boolean;`
114	`decodeEntities?: boolean;`
115	`}, cbs: Callbacks);`
116	`reset(): void;`
117	`write(chunk: string): void;`
118	`end(chunk?: string): void;`
119	`pause(): void;`
120	`resume(): void;`
121	`/**`
122	`* The start of the current section.`
123	`*/`
124	`getAbsoluteSectionStart(): number;`
125	`/**`
126	`* The current index within all of the written data.`
127	`*/`
128	`getAbsoluteIndex(): number;`
129	`private stateText;`
130	`/**`
131	`* HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.`
132	`*`
133	`* XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).`
134	`* We allow anything that wouldn't end the tag.`
135	`*/`
136	`private isTagStartChar;`
137	`private stateBeforeTagName;`
138	`private stateInTagName;`
139	`private stateBeforeClosingTagName;`
140	`private stateInClosingTagName;`
141	`private stateAfterClosingTagName;`
142	`private stateBeforeAttributeName;`
143	`private stateInSelfClosingTag;`
144	`private stateInAttributeName;`
145	`private stateAfterAttributeName;`
146	`private stateBeforeAttributeValue;`
147	`private handleInAttributeValue;`
148	`private stateInAttributeValueDoubleQuotes;`
149	`private stateInAttributeValueSingleQuotes;`
150	`private stateInAttributeValueNoQuotes;`
151	`private stateBeforeDeclaration;`
152	`private stateInDeclaration;`
153	`private stateInProcessingInstruction;`
154	`private stateBeforeComment;`
155	`private stateInComment;`
156	`private stateInSpecialComment;`
157	`private stateAfterComment1;`
158	`private stateAfterComment2;`
159	`private stateBeforeCdata6;`
160	`private stateInCdata;`
161	`private stateAfterCdata1;`
162	`private stateAfterCdata2;`
163	`private stateBeforeSpecialS;`
164	`private stateBeforeSpecialSEnd;`
165	`private stateBeforeSpecialLast;`
166	`private stateAfterSpecialLast;`
167	`private trieIndex;`
168	`private trieCurrent;`
169	`private trieResult;`
170	`private trieExcess;`
171	`private stateBeforeEntity;`
172	`private stateInNamedEntity;`
173	`private emitNamedEntity;`
174	`private decodeNumericEntity;`
175	`private stateInNumericEntity;`
176	`private stateInHexEntity;`
177	`private allowLegacyEntity;`
178	`/**`
179	`* Remove data that has already been consumed from the buffer.`
180	`*/`
181	`private cleanup;`
182	`/**`
183	`* Iterates through the buffer, calling the function corresponding to the current state.`
184	`*`
185	`* States that are more likely to be hit are higher up, as a performance improvement.`
186	`*/`
187	`private parse;`
188	`private finish;`
189	`/** Handle any trailing data. */`
190	`private handleTrailingData;`
191	`private getSection;`
192	`private emitPartial;`
193	`}`
194	`export {};`
195	`//# sourceMappingURL=Tokenizer.d.ts.map`
\	No newline at end of file