1 | export declare enum BinTrieFlags {
|
2 | VALUE_LENGTH = 49152,
|
3 | BRANCH_LENGTH = 16256,
|
4 | JUMP_TABLE = 127
|
5 | }
|
6 | export declare enum DecodingMode {
|
7 |
|
8 | Legacy = 0,
|
9 |
|
10 | Strict = 1,
|
11 |
|
12 | Attribute = 2
|
13 | }
|
14 |
|
15 |
|
16 |
|
17 | export interface EntityErrorProducer {
|
18 | missingSemicolonAfterCharacterReference(): void;
|
19 | absenceOfDigitsInNumericCharacterReference(consumedCharacters: number): void;
|
20 | validateNumericCharacterReference(code: number): void;
|
21 | }
|
22 |
|
23 |
|
24 |
|
25 | export declare class EntityDecoder {
|
26 |
|
27 | private readonly decodeTree;
|
28 | |
29 |
|
30 |
|
31 |
|
32 |
|
33 |
|
34 |
|
35 |
|
36 |
|
37 | private readonly emitCodePoint;
|
38 |
|
39 | private readonly errors?;
|
40 | constructor(
|
41 |
|
42 | decodeTree: Uint16Array,
|
43 | |
44 |
|
45 |
|
46 |
|
47 |
|
48 |
|
49 |
|
50 |
|
51 |
|
52 | emitCodePoint: (cp: number, consumed: number) => void,
|
53 |
|
54 | errors?: EntityErrorProducer | undefined);
|
55 | /** The current state of the decoder. */
|
56 | private state;
|
57 | /** Characters that were consumed while parsing an entity. */
|
58 | private consumed;
|
59 | /**
|
60 | * The result of the entity.
|
61 | *
|
62 | * Either the result index of a numeric entity, or the codepoint of a
|
63 | * numeric entity.
|
64 | */
|
65 | private result;
|
66 | /** The current index in the decode tree. */
|
67 | private treeIndex;
|
68 | /** The number of characters that were consumed in excess. */
|
69 | private excess;
|
70 | /** The mode in which the decoder is operating. */
|
71 | private decodeMode;
|
72 | /** Resets the instance to make it reusable. */
|
73 | startEntity(decodeMode: DecodingMode): void;
|
74 | /**
|
75 | * Write an entity to the decoder. This can be called multiple times with partial entities.
|
76 | * If the entity is incomplete, the decoder will return -1.
|
77 | *
|
78 | * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
|
79 | * entity is incomplete, and resume when the next string is written.
|
80 | *
|
81 | * @param input The string containing the entity (or a continuation of the entity).
|
82 | * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
|
83 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
84 | */
|
85 | write(input: string, offset: number): number;
|
86 | /**
|
87 | * Switches between the numeric decimal and hexadecimal states.
|
88 | *
|
89 | * Equivalent to the `Numeric character reference state` in the HTML spec.
|
90 | *
|
91 | * @param input The string containing the entity (or a continuation of the entity).
|
92 | * @param offset The current offset.
|
93 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
94 | */
|
95 | private stateNumericStart;
|
96 | private addToNumericResult;
|
97 | /**
|
98 | * Parses a hexadecimal numeric entity.
|
99 | *
|
100 | * Equivalent to the `Hexademical character reference state` in the HTML spec.
|
101 | *
|
102 | * @param input The string containing the entity (or a continuation of the entity).
|
103 | * @param offset The current offset.
|
104 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
105 | */
|
106 | private stateNumericHex;
|
107 | /**
|
108 | * Parses a decimal numeric entity.
|
109 | *
|
110 | * Equivalent to the `Decimal character reference state` in the HTML spec.
|
111 | *
|
112 | * @param input The string containing the entity (or a continuation of the entity).
|
113 | * @param offset The current offset.
|
114 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
115 | */
|
116 | private stateNumericDecimal;
|
117 | /**
|
118 | * Validate and emit a numeric entity.
|
119 | *
|
120 | * Implements the logic from the `Hexademical character reference start
|
121 | * state` and `Numeric character reference end state` in the HTML spec.
|
122 | *
|
123 | * @param lastCp The last code point of the entity. Used to see if the
|
124 | * entity was terminated with a semicolon.
|
125 | * @param expectedLength The minimum number of characters that should be
|
126 | * consumed. Used to validate that at least one digit
|
127 | * was consumed.
|
128 | * @returns The number of characters that were consumed.
|
129 | */
|
130 | private emitNumericEntity;
|
131 | /**
|
132 | * Parses a named entity.
|
133 | *
|
134 | * Equivalent to the `Named character reference state` in the HTML spec.
|
135 | *
|
136 | * @param input The string containing the entity (or a continuation of the entity).
|
137 | * @param offset The current offset.
|
138 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
139 | */
|
140 | private stateNamedEntity;
|
141 | /**
|
142 | * Emit a named entity that was not terminated with a semicolon.
|
143 | *
|
144 | * @returns The number of characters consumed.
|
145 | */
|
146 | private emitNotTerminatedNamedEntity;
|
147 | /**
|
148 | * Emit a named entity.
|
149 | *
|
150 | * @param result The index of the entity in the decode tree.
|
151 | * @param valueLength The number of bytes in the entity.
|
152 | * @param consumed The number of characters consumed.
|
153 | *
|
154 | * @returns The number of characters consumed.
|
155 | */
|
156 | private emitNamedEntityData;
|
157 | /**
|
158 | * Signal to the parser that the end of the input was reached.
|
159 | *
|
160 | * Remaining data will be emitted and relevant errors will be produced.
|
161 | *
|
162 | * @returns The number of characters consumed.
|
163 | */
|
164 | end(): number;
|
165 | }
|
166 | /**
|
167 | * Determines the branch of the current node that is taken given the current
|
168 | * character. This function is used to traverse the trie.
|
169 | *
|
170 | * @param decodeTree The trie.
|
171 | * @param current The current node.
|
172 | * @param nodeIdx The index right after the current node and its value.
|
173 | * @param char The current character.
|
174 | * @returns The index of the next node, or -1 if no branch is taken.
|
175 | */
|
176 | export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIndex: number, char: number): number;
|
177 | /**
|
178 | * Decodes an HTML string.
|
179 | *
|
180 | * @param htmlString The string to decode.
|
181 | * @param mode The decoding mode.
|
182 | * @returns The decoded string.
|
183 | */
|
184 | export declare function decodeHTML(htmlString: string, mode?: DecodingMode): string;
|
185 | /**
|
186 | * Decodes an HTML string in an attribute.
|
187 | *
|
188 | * @param htmlAttribute The string to decode.
|
189 | * @returns The decoded string.
|
190 | */
|
191 | export declare function decodeHTMLAttribute(htmlAttribute: string): string;
|
192 | /**
|
193 | * Decodes an HTML string, requiring all entities to be terminated by a semicolon.
|
194 | *
|
195 | * @param htmlString The string to decode.
|
196 | * @returns The decoded string.
|
197 | */
|
198 | export declare function decodeHTMLStrict(htmlString: string): string;
|
199 | /**
|
200 | * Decodes an XML string, requiring all entities to be terminated by a semicolon.
|
201 | *
|
202 | * @param xmlString The string to decode.
|
203 | * @returns The decoded string.
|
204 | */
|
205 | export declare function decodeXML(xmlString: string): string;
|
206 | export { default as htmlDecodeTree } from "./generated/decode-data-html.js";
|
207 | export { default as xmlDecodeTree } from "./generated/decode-data-xml.js";
|
208 | export { default as decodeCodePoint, replaceCodePoint, fromCodePoint, } from "./decode-codepoint.js";
|
209 |
|
\ | No newline at end of file |