UNPKG

8.09 kBTypeScriptView Raw
1export declare enum BinTrieFlags {
2 VALUE_LENGTH = 49152,
3 BRANCH_LENGTH = 16256,
4 JUMP_TABLE = 127
5}
6export declare enum DecodingMode {
7 /** Entities in text nodes that can end with any character. */
8 Legacy = 0,
9 /** Only allow entities terminated with a semicolon. */
10 Strict = 1,
11 /** Entities in attributes have limitations on ending characters. */
12 Attribute = 2
13}
14/**
15 * Producers for character reference errors as defined in the HTML spec.
16 */
17export interface EntityErrorProducer {
18 missingSemicolonAfterCharacterReference(): void;
19 absenceOfDigitsInNumericCharacterReference(consumedCharacters: number): void;
20 validateNumericCharacterReference(code: number): void;
21}
22/**
23 * Token decoder with support of writing partial entities.
24 */
25export declare class EntityDecoder {
26 /** The tree used to decode entities. */
27 private readonly decodeTree;
28 /**
29 * The function that is called when a codepoint is decoded.
30 *
31 * For multi-byte named entities, this will be called multiple times,
32 * with the second codepoint, and the same `consumed` value.
33 *
34 * @param codepoint The decoded codepoint.
35 * @param consumed The number of bytes consumed by the decoder.
36 */
37 private readonly emitCodePoint;
38 /** An object that is used to produce errors. */
39 private readonly errors?;
40 constructor(
41 /** The tree used to decode entities. */
42 decodeTree: Uint16Array,
43 /**
44 * The function that is called when a codepoint is decoded.
45 *
46 * For multi-byte named entities, this will be called multiple times,
47 * with the second codepoint, and the same `consumed` value.
48 *
49 * @param codepoint The decoded codepoint.
50 * @param consumed The number of bytes consumed by the decoder.
51 */
52 emitCodePoint: (cp: number, consumed: number) => void,
53 /** An object that is used to produce errors. */
54 errors?: EntityErrorProducer | undefined);
55 /** The current state of the decoder. */
56 private state;
57 /** Characters that were consumed while parsing an entity. */
58 private consumed;
59 /**
60 * The result of the entity.
61 *
62 * Either the result index of a numeric entity, or the codepoint of a
63 * numeric entity.
64 */
65 private result;
66 /** The current index in the decode tree. */
67 private treeIndex;
68 /** The number of characters that were consumed in excess. */
69 private excess;
70 /** The mode in which the decoder is operating. */
71 private decodeMode;
72 /** Resets the instance to make it reusable. */
73 startEntity(decodeMode: DecodingMode): void;
74 /**
75 * Write an entity to the decoder. This can be called multiple times with partial entities.
76 * If the entity is incomplete, the decoder will return -1.
77 *
78 * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
79 * entity is incomplete, and resume when the next string is written.
80 *
81 * @param input The string containing the entity (or a continuation of the entity).
82 * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
83 * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
84 */
85 write(input: string, offset: number): number;
86 /**
87 * Switches between the numeric decimal and hexadecimal states.
88 *
89 * Equivalent to the `Numeric character reference state` in the HTML spec.
90 *
91 * @param input The string containing the entity (or a continuation of the entity).
92 * @param offset The current offset.
93 * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
94 */
95 private stateNumericStart;
96 private addToNumericResult;
97 /**
98 * Parses a hexadecimal numeric entity.
99 *
100 * Equivalent to the `Hexademical character reference state` in the HTML spec.
101 *
102 * @param input The string containing the entity (or a continuation of the entity).
103 * @param offset The current offset.
104 * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
105 */
106 private stateNumericHex;
107 /**
108 * Parses a decimal numeric entity.
109 *
110 * Equivalent to the `Decimal character reference state` in the HTML spec.
111 *
112 * @param input The string containing the entity (or a continuation of the entity).
113 * @param offset The current offset.
114 * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
115 */
116 private stateNumericDecimal;
117 /**
118 * Validate and emit a numeric entity.
119 *
120 * Implements the logic from the `Hexademical character reference start
121 * state` and `Numeric character reference end state` in the HTML spec.
122 *
123 * @param lastCp The last code point of the entity. Used to see if the
124 * entity was terminated with a semicolon.
125 * @param expectedLength The minimum number of characters that should be
126 * consumed. Used to validate that at least one digit
127 * was consumed.
128 * @returns The number of characters that were consumed.
129 */
130 private emitNumericEntity;
131 /**
132 * Parses a named entity.
133 *
134 * Equivalent to the `Named character reference state` in the HTML spec.
135 *
136 * @param input The string containing the entity (or a continuation of the entity).
137 * @param offset The current offset.
138 * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
139 */
140 private stateNamedEntity;
141 /**
142 * Emit a named entity that was not terminated with a semicolon.
143 *
144 * @returns The number of characters consumed.
145 */
146 private emitNotTerminatedNamedEntity;
147 /**
148 * Emit a named entity.
149 *
150 * @param result The index of the entity in the decode tree.
151 * @param valueLength The number of bytes in the entity.
152 * @param consumed The number of characters consumed.
153 *
154 * @returns The number of characters consumed.
155 */
156 private emitNamedEntityData;
157 /**
158 * Signal to the parser that the end of the input was reached.
159 *
160 * Remaining data will be emitted and relevant errors will be produced.
161 *
162 * @returns The number of characters consumed.
163 */
164 end(): number;
165}
166/**
167 * Determines the branch of the current node that is taken given the current
168 * character. This function is used to traverse the trie.
169 *
170 * @param decodeTree The trie.
171 * @param current The current node.
172 * @param nodeIdx The index right after the current node and its value.
173 * @param char The current character.
174 * @returns The index of the next node, or -1 if no branch is taken.
175 */
176export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIndex: number, char: number): number;
177/**
178 * Decodes an HTML string.
179 *
180 * @param htmlString The string to decode.
181 * @param mode The decoding mode.
182 * @returns The decoded string.
183 */
184export declare function decodeHTML(htmlString: string, mode?: DecodingMode): string;
185/**
186 * Decodes an HTML string in an attribute.
187 *
188 * @param htmlAttribute The string to decode.
189 * @returns The decoded string.
190 */
191export declare function decodeHTMLAttribute(htmlAttribute: string): string;
192/**
193 * Decodes an HTML string, requiring all entities to be terminated by a semicolon.
194 *
195 * @param htmlString The string to decode.
196 * @returns The decoded string.
197 */
198export declare function decodeHTMLStrict(htmlString: string): string;
199/**
200 * Decodes an XML string, requiring all entities to be terminated by a semicolon.
201 *
202 * @param xmlString The string to decode.
203 * @returns The decoded string.
204 */
205export declare function decodeXML(xmlString: string): string;
206export { htmlDecodeTree } from "./generated/decode-data-html.js";
207export { xmlDecodeTree } from "./generated/decode-data-xml.js";
208export { decodeCodePoint, replaceCodePoint, fromCodePoint, } from "./decode-codepoint.js";
209//# sourceMappingURL=decode.d.ts.map
\No newline at end of file