UNPKG

19 kBJavaScriptView Raw
1"use strict";
2var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3 if (k2 === undefined) k2 = k;
4 var desc = Object.getOwnPropertyDescriptor(m, k);
5 if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6 desc = { enumerable: true, get: function() { return m[k]; } };
7 }
8 Object.defineProperty(o, k2, desc);
9}) : (function(o, m, k, k2) {
10 if (k2 === undefined) k2 = k;
11 o[k2] = m[k];
12}));
13var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14 Object.defineProperty(o, "default", { enumerable: true, value: v });
15}) : function(o, v) {
16 o["default"] = v;
17});
18var __importStar = (this && this.__importStar) || function (mod) {
19 if (mod && mod.__esModule) return mod;
20 var result = {};
21 if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22 __setModuleDefault(result, mod);
23 return result;
24};
25Object.defineProperty(exports, "__esModule", { value: true });
26exports.Parser = void 0;
27var Tokenizer_js_1 = __importStar(require("./Tokenizer.js"));
28var decode_js_1 = require("entities/lib/decode.js");
29var formTags = new Set([
30 "input",
31 "option",
32 "optgroup",
33 "select",
34 "button",
35 "datalist",
36 "textarea",
37]);
38var pTag = new Set(["p"]);
39var tableSectionTags = new Set(["thead", "tbody"]);
40var ddtTags = new Set(["dd", "dt"]);
41var rtpTags = new Set(["rt", "rp"]);
42var openImpliesClose = new Map([
43 ["tr", new Set(["tr", "th", "td"])],
44 ["th", new Set(["th"])],
45 ["td", new Set(["thead", "th", "td"])],
46 ["body", new Set(["head", "link", "script"])],
47 ["li", new Set(["li"])],
48 ["p", pTag],
49 ["h1", pTag],
50 ["h2", pTag],
51 ["h3", pTag],
52 ["h4", pTag],
53 ["h5", pTag],
54 ["h6", pTag],
55 ["select", formTags],
56 ["input", formTags],
57 ["output", formTags],
58 ["button", formTags],
59 ["datalist", formTags],
60 ["textarea", formTags],
61 ["option", new Set(["option"])],
62 ["optgroup", new Set(["optgroup", "option"])],
63 ["dd", ddtTags],
64 ["dt", ddtTags],
65 ["address", pTag],
66 ["article", pTag],
67 ["aside", pTag],
68 ["blockquote", pTag],
69 ["details", pTag],
70 ["div", pTag],
71 ["dl", pTag],
72 ["fieldset", pTag],
73 ["figcaption", pTag],
74 ["figure", pTag],
75 ["footer", pTag],
76 ["form", pTag],
77 ["header", pTag],
78 ["hr", pTag],
79 ["main", pTag],
80 ["nav", pTag],
81 ["ol", pTag],
82 ["pre", pTag],
83 ["section", pTag],
84 ["table", pTag],
85 ["ul", pTag],
86 ["rt", rtpTags],
87 ["rp", rtpTags],
88 ["tbody", tableSectionTags],
89 ["tfoot", tableSectionTags],
90]);
91var voidElements = new Set([
92 "area",
93 "base",
94 "basefont",
95 "br",
96 "col",
97 "command",
98 "embed",
99 "frame",
100 "hr",
101 "img",
102 "input",
103 "isindex",
104 "keygen",
105 "link",
106 "meta",
107 "param",
108 "source",
109 "track",
110 "wbr",
111]);
112var foreignContextElements = new Set(["math", "svg"]);
113var htmlIntegrationElements = new Set([
114 "mi",
115 "mo",
116 "mn",
117 "ms",
118 "mtext",
119 "annotation-xml",
120 "foreignobject",
121 "desc",
122 "title",
123]);
124var reNameEnd = /\s|\//;
125var Parser = /** @class */ (function () {
126 function Parser(cbs, options) {
127 if (options === void 0) { options = {}; }
128 var _a, _b, _c, _d, _e, _f;
129 this.options = options;
130 /** The start index of the last event. */
131 this.startIndex = 0;
132 /** The end index of the last event. */
133 this.endIndex = 0;
134 /**
135 * Store the start index of the current open tag,
136 * so we can update the start index for attributes.
137 */
138 this.openTagStart = 0;
139 this.tagname = "";
140 this.attribname = "";
141 this.attribvalue = "";
142 this.attribs = null;
143 this.stack = [];
144 this.buffers = [];
145 this.bufferOffset = 0;
146 /** The index of the last written buffer. Used when resuming after a `pause()`. */
147 this.writeIndex = 0;
148 /** Indicates whether the parser has finished running / `.end` has been called. */
149 this.ended = false;
150 this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
151 this.htmlMode = !this.options.xmlMode;
152 this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : this.htmlMode;
153 this.lowerCaseAttributeNames =
154 (_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : this.htmlMode;
155 this.recognizeSelfClosing =
156 (_c = options.recognizeSelfClosing) !== null && _c !== void 0 ? _c : !this.htmlMode;
157 this.tokenizer = new ((_d = options.Tokenizer) !== null && _d !== void 0 ? _d : Tokenizer_js_1.default)(this.options, this);
158 this.foreignContext = [!this.htmlMode];
159 (_f = (_e = this.cbs).onparserinit) === null || _f === void 0 ? void 0 : _f.call(_e, this);
160 }
161 // Tokenizer event handlers
162 /** @internal */
163 Parser.prototype.ontext = function (start, endIndex) {
164 var _a, _b;
165 var data = this.getSlice(start, endIndex);
166 this.endIndex = endIndex - 1;
167 (_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
168 this.startIndex = endIndex;
169 };
170 /** @internal */
171 Parser.prototype.ontextentity = function (cp, endIndex) {
172 var _a, _b;
173 this.endIndex = endIndex - 1;
174 (_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, (0, decode_js_1.fromCodePoint)(cp));
175 this.startIndex = endIndex;
176 };
177 /**
178 * Checks if the current tag is a void element. Override this if you want
179 * to specify your own additional void elements.
180 */
181 Parser.prototype.isVoidElement = function (name) {
182 return this.htmlMode && voidElements.has(name);
183 };
184 /** @internal */
185 Parser.prototype.onopentagname = function (start, endIndex) {
186 this.endIndex = endIndex;
187 var name = this.getSlice(start, endIndex);
188 if (this.lowerCaseTagNames) {
189 name = name.toLowerCase();
190 }
191 this.emitOpenTag(name);
192 };
193 Parser.prototype.emitOpenTag = function (name) {
194 var _a, _b, _c, _d;
195 this.openTagStart = this.startIndex;
196 this.tagname = name;
197 var impliesClose = this.htmlMode && openImpliesClose.get(name);
198 if (impliesClose) {
199 while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
200 var element = this.stack.shift();
201 (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, true);
202 }
203 }
204 if (!this.isVoidElement(name)) {
205 this.stack.unshift(name);
206 if (this.htmlMode) {
207 if (foreignContextElements.has(name)) {
208 this.foreignContext.unshift(true);
209 }
210 else if (htmlIntegrationElements.has(name)) {
211 this.foreignContext.unshift(false);
212 }
213 }
214 }
215 (_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name);
216 if (this.cbs.onopentag)
217 this.attribs = {};
218 };
219 Parser.prototype.endOpenTag = function (isImplied) {
220 var _a, _b;
221 this.startIndex = this.openTagStart;
222 if (this.attribs) {
223 (_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs, isImplied);
224 this.attribs = null;
225 }
226 if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
227 this.cbs.onclosetag(this.tagname, true);
228 }
229 this.tagname = "";
230 };
231 /** @internal */
232 Parser.prototype.onopentagend = function (endIndex) {
233 this.endIndex = endIndex;
234 this.endOpenTag(false);
235 // Set `startIndex` for next node
236 this.startIndex = endIndex + 1;
237 };
238 /** @internal */
239 Parser.prototype.onclosetag = function (start, endIndex) {
240 var _a, _b, _c, _d, _e, _f, _g, _h;
241 this.endIndex = endIndex;
242 var name = this.getSlice(start, endIndex);
243 if (this.lowerCaseTagNames) {
244 name = name.toLowerCase();
245 }
246 if (this.htmlMode &&
247 (foreignContextElements.has(name) ||
248 htmlIntegrationElements.has(name))) {
249 this.foreignContext.shift();
250 }
251 if (!this.isVoidElement(name)) {
252 var pos = this.stack.indexOf(name);
253 if (pos !== -1) {
254 for (var index = 0; index <= pos; index++) {
255 var element = this.stack.shift();
256 // We know the stack has sufficient elements.
257 (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, index !== pos);
258 }
259 }
260 else if (this.htmlMode && name === "p") {
261 // Implicit open before close
262 this.emitOpenTag("p");
263 this.closeCurrentTag(true);
264 }
265 }
266 else if (this.htmlMode && name === "br") {
267 // We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
268 (_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, "br");
269 (_f = (_e = this.cbs).onopentag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", {}, true);
270 (_h = (_g = this.cbs).onclosetag) === null || _h === void 0 ? void 0 : _h.call(_g, "br", false);
271 }
272 // Set `startIndex` for next node
273 this.startIndex = endIndex + 1;
274 };
275 /** @internal */
276 Parser.prototype.onselfclosingtag = function (endIndex) {
277 this.endIndex = endIndex;
278 if (this.recognizeSelfClosing || this.foreignContext[0]) {
279 this.closeCurrentTag(false);
280 // Set `startIndex` for next node
281 this.startIndex = endIndex + 1;
282 }
283 else {
284 // Ignore the fact that the tag is self-closing.
285 this.onopentagend(endIndex);
286 }
287 };
288 Parser.prototype.closeCurrentTag = function (isOpenImplied) {
289 var _a, _b;
290 var name = this.tagname;
291 this.endOpenTag(isOpenImplied);
292 // Self-closing tags will be on the top of the stack
293 if (this.stack[0] === name) {
294 // If the opening tag isn't implied, the closing tag has to be implied.
295 (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
296 this.stack.shift();
297 }
298 };
299 /** @internal */
300 Parser.prototype.onattribname = function (start, endIndex) {
301 this.startIndex = start;
302 var name = this.getSlice(start, endIndex);
303 this.attribname = this.lowerCaseAttributeNames
304 ? name.toLowerCase()
305 : name;
306 };
307 /** @internal */
308 Parser.prototype.onattribdata = function (start, endIndex) {
309 this.attribvalue += this.getSlice(start, endIndex);
310 };
311 /** @internal */
312 Parser.prototype.onattribentity = function (cp) {
313 this.attribvalue += (0, decode_js_1.fromCodePoint)(cp);
314 };
315 /** @internal */
316 Parser.prototype.onattribend = function (quote, endIndex) {
317 var _a, _b;
318 this.endIndex = endIndex;
319 (_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote === Tokenizer_js_1.QuoteType.Double
320 ? '"'
321 : quote === Tokenizer_js_1.QuoteType.Single
322 ? "'"
323 : quote === Tokenizer_js_1.QuoteType.NoValue
324 ? undefined
325 : null);
326 if (this.attribs &&
327 !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) {
328 this.attribs[this.attribname] = this.attribvalue;
329 }
330 this.attribvalue = "";
331 };
332 Parser.prototype.getInstructionName = function (value) {
333 var index = value.search(reNameEnd);
334 var name = index < 0 ? value : value.substr(0, index);
335 if (this.lowerCaseTagNames) {
336 name = name.toLowerCase();
337 }
338 return name;
339 };
340 /** @internal */
341 Parser.prototype.ondeclaration = function (start, endIndex) {
342 this.endIndex = endIndex;
343 var value = this.getSlice(start, endIndex);
344 if (this.cbs.onprocessinginstruction) {
345 var name = this.getInstructionName(value);
346 this.cbs.onprocessinginstruction("!".concat(name), "!".concat(value));
347 }
348 // Set `startIndex` for next node
349 this.startIndex = endIndex + 1;
350 };
351 /** @internal */
352 Parser.prototype.onprocessinginstruction = function (start, endIndex) {
353 this.endIndex = endIndex;
354 var value = this.getSlice(start, endIndex);
355 if (this.cbs.onprocessinginstruction) {
356 var name = this.getInstructionName(value);
357 this.cbs.onprocessinginstruction("?".concat(name), "?".concat(value));
358 }
359 // Set `startIndex` for next node
360 this.startIndex = endIndex + 1;
361 };
362 /** @internal */
363 Parser.prototype.oncomment = function (start, endIndex, offset) {
364 var _a, _b, _c, _d;
365 this.endIndex = endIndex;
366 (_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, this.getSlice(start, endIndex - offset));
367 (_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c);
368 // Set `startIndex` for next node
369 this.startIndex = endIndex + 1;
370 };
371 /** @internal */
372 Parser.prototype.oncdata = function (start, endIndex, offset) {
373 var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
374 this.endIndex = endIndex;
375 var value = this.getSlice(start, endIndex - offset);
376 if (!this.htmlMode || this.options.recognizeCDATA) {
377 (_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);
378 (_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value);
379 (_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e);
380 }
381 else {
382 (_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, "[CDATA[".concat(value, "]]"));
383 (_k = (_j = this.cbs).oncommentend) === null || _k === void 0 ? void 0 : _k.call(_j);
384 }
385 // Set `startIndex` for next node
386 this.startIndex = endIndex + 1;
387 };
388 /** @internal */
389 Parser.prototype.onend = function () {
390 var _a, _b;
391 if (this.cbs.onclosetag) {
392 // Set the end index for all remaining tags
393 this.endIndex = this.startIndex;
394 for (var index = 0; index < this.stack.length; index++) {
395 this.cbs.onclosetag(this.stack[index], true);
396 }
397 }
398 (_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a);
399 };
400 /**
401 * Resets the parser to a blank state, ready to parse a new HTML document
402 */
403 Parser.prototype.reset = function () {
404 var _a, _b, _c, _d;
405 (_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
406 this.tokenizer.reset();
407 this.tagname = "";
408 this.attribname = "";
409 this.attribs = null;
410 this.stack.length = 0;
411 this.startIndex = 0;
412 this.endIndex = 0;
413 (_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);
414 this.buffers.length = 0;
415 this.foreignContext.length = 0;
416 this.foreignContext.unshift(!this.htmlMode);
417 this.bufferOffset = 0;
418 this.writeIndex = 0;
419 this.ended = false;
420 };
421 /**
422 * Resets the parser, then parses a complete document and
423 * pushes it to the handler.
424 *
425 * @param data Document to parse.
426 */
427 Parser.prototype.parseComplete = function (data) {
428 this.reset();
429 this.end(data);
430 };
431 Parser.prototype.getSlice = function (start, end) {
432 while (start - this.bufferOffset >= this.buffers[0].length) {
433 this.shiftBuffer();
434 }
435 var slice = this.buffers[0].slice(start - this.bufferOffset, end - this.bufferOffset);
436 while (end - this.bufferOffset > this.buffers[0].length) {
437 this.shiftBuffer();
438 slice += this.buffers[0].slice(0, end - this.bufferOffset);
439 }
440 return slice;
441 };
442 Parser.prototype.shiftBuffer = function () {
443 this.bufferOffset += this.buffers[0].length;
444 this.writeIndex--;
445 this.buffers.shift();
446 };
447 /**
448 * Parses a chunk of data and calls the corresponding callbacks.
449 *
450 * @param chunk Chunk to parse.
451 */
452 Parser.prototype.write = function (chunk) {
453 var _a, _b;
454 if (this.ended) {
455 (_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".write() after done!"));
456 return;
457 }
458 this.buffers.push(chunk);
459 if (this.tokenizer.running) {
460 this.tokenizer.write(chunk);
461 this.writeIndex++;
462 }
463 };
464 /**
465 * Parses the end of the buffer and clears the stack, calls onend.
466 *
467 * @param chunk Optional final chunk to parse.
468 */
469 Parser.prototype.end = function (chunk) {
470 var _a, _b;
471 if (this.ended) {
472 (_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".end() after done!"));
473 return;
474 }
475 if (chunk)
476 this.write(chunk);
477 this.ended = true;
478 this.tokenizer.end();
479 };
480 /**
481 * Pauses parsing. The parser won't emit events until `resume` is called.
482 */
483 Parser.prototype.pause = function () {
484 this.tokenizer.pause();
485 };
486 /**
487 * Resumes parsing after `pause` was called.
488 */
489 Parser.prototype.resume = function () {
490 this.tokenizer.resume();
491 while (this.tokenizer.running &&
492 this.writeIndex < this.buffers.length) {
493 this.tokenizer.write(this.buffers[this.writeIndex++]);
494 }
495 if (this.ended)
496 this.tokenizer.end();
497 };
498 /**
499 * Alias of `write`, for backwards compatibility.
500 *
501 * @param chunk Chunk to parse.
502 * @deprecated
503 */
504 Parser.prototype.parseChunk = function (chunk) {
505 this.write(chunk);
506 };
507 /**
508 * Alias of `end`, for backwards compatibility.
509 *
510 * @param chunk Optional final chunk to parse.
511 * @deprecated
512 */
513 Parser.prototype.done = function (chunk) {
514 this.end(chunk);
515 };
516 return Parser;
517}());
518exports.Parser = Parser;
519//# sourceMappingURL=Parser.js.map
\No newline at end of file