1 | "use strict";
|
2 | var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
3 | if (k2 === undefined) k2 = k;
|
4 | var desc = Object.getOwnPropertyDescriptor(m, k);
|
5 | if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
6 | desc = { enumerable: true, get: function() { return m[k]; } };
|
7 | }
|
8 | Object.defineProperty(o, k2, desc);
|
9 | }) : (function(o, m, k, k2) {
|
10 | if (k2 === undefined) k2 = k;
|
11 | o[k2] = m[k];
|
12 | }));
|
13 | var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
14 | Object.defineProperty(o, "default", { enumerable: true, value: v });
|
15 | }) : function(o, v) {
|
16 | o["default"] = v;
|
17 | });
|
18 | var __importStar = (this && this.__importStar) || function (mod) {
|
19 | if (mod && mod.__esModule) return mod;
|
20 | var result = {};
|
21 | if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
22 | __setModuleDefault(result, mod);
|
23 | return result;
|
24 | };
|
25 | Object.defineProperty(exports, "__esModule", { value: true });
|
26 | exports.Parser = void 0;
|
27 | var Tokenizer_js_1 = __importStar(require("./Tokenizer.js"));
|
28 | var decode_js_1 = require("entities/lib/decode.js");
|
29 | var formTags = new Set([
|
30 | "input",
|
31 | "option",
|
32 | "optgroup",
|
33 | "select",
|
34 | "button",
|
35 | "datalist",
|
36 | "textarea",
|
37 | ]);
|
38 | var pTag = new Set(["p"]);
|
39 | var tableSectionTags = new Set(["thead", "tbody"]);
|
40 | var ddtTags = new Set(["dd", "dt"]);
|
41 | var rtpTags = new Set(["rt", "rp"]);
|
42 | var openImpliesClose = new Map([
|
43 | ["tr", new Set(["tr", "th", "td"])],
|
44 | ["th", new Set(["th"])],
|
45 | ["td", new Set(["thead", "th", "td"])],
|
46 | ["body", new Set(["head", "link", "script"])],
|
47 | ["li", new Set(["li"])],
|
48 | ["p", pTag],
|
49 | ["h1", pTag],
|
50 | ["h2", pTag],
|
51 | ["h3", pTag],
|
52 | ["h4", pTag],
|
53 | ["h5", pTag],
|
54 | ["h6", pTag],
|
55 | ["select", formTags],
|
56 | ["input", formTags],
|
57 | ["output", formTags],
|
58 | ["button", formTags],
|
59 | ["datalist", formTags],
|
60 | ["textarea", formTags],
|
61 | ["option", new Set(["option"])],
|
62 | ["optgroup", new Set(["optgroup", "option"])],
|
63 | ["dd", ddtTags],
|
64 | ["dt", ddtTags],
|
65 | ["address", pTag],
|
66 | ["article", pTag],
|
67 | ["aside", pTag],
|
68 | ["blockquote", pTag],
|
69 | ["details", pTag],
|
70 | ["div", pTag],
|
71 | ["dl", pTag],
|
72 | ["fieldset", pTag],
|
73 | ["figcaption", pTag],
|
74 | ["figure", pTag],
|
75 | ["footer", pTag],
|
76 | ["form", pTag],
|
77 | ["header", pTag],
|
78 | ["hr", pTag],
|
79 | ["main", pTag],
|
80 | ["nav", pTag],
|
81 | ["ol", pTag],
|
82 | ["pre", pTag],
|
83 | ["section", pTag],
|
84 | ["table", pTag],
|
85 | ["ul", pTag],
|
86 | ["rt", rtpTags],
|
87 | ["rp", rtpTags],
|
88 | ["tbody", tableSectionTags],
|
89 | ["tfoot", tableSectionTags],
|
90 | ]);
|
91 | var voidElements = new Set([
|
92 | "area",
|
93 | "base",
|
94 | "basefont",
|
95 | "br",
|
96 | "col",
|
97 | "command",
|
98 | "embed",
|
99 | "frame",
|
100 | "hr",
|
101 | "img",
|
102 | "input",
|
103 | "isindex",
|
104 | "keygen",
|
105 | "link",
|
106 | "meta",
|
107 | "param",
|
108 | "source",
|
109 | "track",
|
110 | "wbr",
|
111 | ]);
|
112 | var foreignContextElements = new Set(["math", "svg"]);
|
113 | var htmlIntegrationElements = new Set([
|
114 | "mi",
|
115 | "mo",
|
116 | "mn",
|
117 | "ms",
|
118 | "mtext",
|
119 | "annotation-xml",
|
120 | "foreignobject",
|
121 | "desc",
|
122 | "title",
|
123 | ]);
|
124 | var reNameEnd = /\s|\//;
|
125 | var Parser = (function () {
|
126 | function Parser(cbs, options) {
|
127 | if (options === void 0) { options = {}; }
|
128 | var _a, _b, _c, _d, _e, _f;
|
129 | this.options = options;
|
130 |
|
131 | this.startIndex = 0;
|
132 |
|
133 | this.endIndex = 0;
|
134 | |
135 |
|
136 |
|
137 |
|
138 | this.openTagStart = 0;
|
139 | this.tagname = "";
|
140 | this.attribname = "";
|
141 | this.attribvalue = "";
|
142 | this.attribs = null;
|
143 | this.stack = [];
|
144 | this.buffers = [];
|
145 | this.bufferOffset = 0;
|
146 |
|
147 | this.writeIndex = 0;
|
148 |
|
149 | this.ended = false;
|
150 | this.cbs = cbs !== null && cbs !== void 0 ? cbs : {};
|
151 | this.htmlMode = !this.options.xmlMode;
|
152 | this.lowerCaseTagNames = (_a = options.lowerCaseTags) !== null && _a !== void 0 ? _a : this.htmlMode;
|
153 | this.lowerCaseAttributeNames =
|
154 | (_b = options.lowerCaseAttributeNames) !== null && _b !== void 0 ? _b : this.htmlMode;
|
155 | this.recognizeSelfClosing =
|
156 | (_c = options.recognizeSelfClosing) !== null && _c !== void 0 ? _c : !this.htmlMode;
|
157 | this.tokenizer = new ((_d = options.Tokenizer) !== null && _d !== void 0 ? _d : Tokenizer_js_1.default)(this.options, this);
|
158 | this.foreignContext = [!this.htmlMode];
|
159 | (_f = (_e = this.cbs).onparserinit) === null || _f === void 0 ? void 0 : _f.call(_e, this);
|
160 | }
|
161 |
|
162 |
|
163 | Parser.prototype.ontext = function (start, endIndex) {
|
164 | var _a, _b;
|
165 | var data = this.getSlice(start, endIndex);
|
166 | this.endIndex = endIndex - 1;
|
167 | (_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, data);
|
168 | this.startIndex = endIndex;
|
169 | };
|
170 |
|
171 | Parser.prototype.ontextentity = function (cp, endIndex) {
|
172 | var _a, _b;
|
173 | this.endIndex = endIndex - 1;
|
174 | (_b = (_a = this.cbs).ontext) === null || _b === void 0 ? void 0 : _b.call(_a, (0, decode_js_1.fromCodePoint)(cp));
|
175 | this.startIndex = endIndex;
|
176 | };
|
177 | |
178 |
|
179 |
|
180 |
|
181 | Parser.prototype.isVoidElement = function (name) {
|
182 | return this.htmlMode && voidElements.has(name);
|
183 | };
|
184 |
|
185 | Parser.prototype.onopentagname = function (start, endIndex) {
|
186 | this.endIndex = endIndex;
|
187 | var name = this.getSlice(start, endIndex);
|
188 | if (this.lowerCaseTagNames) {
|
189 | name = name.toLowerCase();
|
190 | }
|
191 | this.emitOpenTag(name);
|
192 | };
|
193 | Parser.prototype.emitOpenTag = function (name) {
|
194 | var _a, _b, _c, _d;
|
195 | this.openTagStart = this.startIndex;
|
196 | this.tagname = name;
|
197 | var impliesClose = this.htmlMode && openImpliesClose.get(name);
|
198 | if (impliesClose) {
|
199 | while (this.stack.length > 0 && impliesClose.has(this.stack[0])) {
|
200 | var element = this.stack.shift();
|
201 | (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, true);
|
202 | }
|
203 | }
|
204 | if (!this.isVoidElement(name)) {
|
205 | this.stack.unshift(name);
|
206 | if (this.htmlMode) {
|
207 | if (foreignContextElements.has(name)) {
|
208 | this.foreignContext.unshift(true);
|
209 | }
|
210 | else if (htmlIntegrationElements.has(name)) {
|
211 | this.foreignContext.unshift(false);
|
212 | }
|
213 | }
|
214 | }
|
215 | (_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, name);
|
216 | if (this.cbs.onopentag)
|
217 | this.attribs = {};
|
218 | };
|
219 | Parser.prototype.endOpenTag = function (isImplied) {
|
220 | var _a, _b;
|
221 | this.startIndex = this.openTagStart;
|
222 | if (this.attribs) {
|
223 | (_b = (_a = this.cbs).onopentag) === null || _b === void 0 ? void 0 : _b.call(_a, this.tagname, this.attribs, isImplied);
|
224 | this.attribs = null;
|
225 | }
|
226 | if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
|
227 | this.cbs.onclosetag(this.tagname, true);
|
228 | }
|
229 | this.tagname = "";
|
230 | };
|
231 |
|
232 | Parser.prototype.onopentagend = function (endIndex) {
|
233 | this.endIndex = endIndex;
|
234 | this.endOpenTag(false);
|
235 |
|
236 | this.startIndex = endIndex + 1;
|
237 | };
|
238 |
|
239 | Parser.prototype.onclosetag = function (start, endIndex) {
|
240 | var _a, _b, _c, _d, _e, _f, _g, _h;
|
241 | this.endIndex = endIndex;
|
242 | var name = this.getSlice(start, endIndex);
|
243 | if (this.lowerCaseTagNames) {
|
244 | name = name.toLowerCase();
|
245 | }
|
246 | if (this.htmlMode &&
|
247 | (foreignContextElements.has(name) ||
|
248 | htmlIntegrationElements.has(name))) {
|
249 | this.foreignContext.shift();
|
250 | }
|
251 | if (!this.isVoidElement(name)) {
|
252 | var pos = this.stack.indexOf(name);
|
253 | if (pos !== -1) {
|
254 | for (var index = 0; index <= pos; index++) {
|
255 | var element = this.stack.shift();
|
256 |
|
257 | (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, element, index !== pos);
|
258 | }
|
259 | }
|
260 | else if (this.htmlMode && name === "p") {
|
261 |
|
262 | this.emitOpenTag("p");
|
263 | this.closeCurrentTag(true);
|
264 | }
|
265 | }
|
266 | else if (this.htmlMode && name === "br") {
|
267 |
|
268 | (_d = (_c = this.cbs).onopentagname) === null || _d === void 0 ? void 0 : _d.call(_c, "br");
|
269 | (_f = (_e = this.cbs).onopentag) === null || _f === void 0 ? void 0 : _f.call(_e, "br", {}, true);
|
270 | (_h = (_g = this.cbs).onclosetag) === null || _h === void 0 ? void 0 : _h.call(_g, "br", false);
|
271 | }
|
272 |
|
273 | this.startIndex = endIndex + 1;
|
274 | };
|
275 |
|
276 | Parser.prototype.onselfclosingtag = function (endIndex) {
|
277 | this.endIndex = endIndex;
|
278 | if (this.recognizeSelfClosing || this.foreignContext[0]) {
|
279 | this.closeCurrentTag(false);
|
280 |
|
281 | this.startIndex = endIndex + 1;
|
282 | }
|
283 | else {
|
284 |
|
285 | this.onopentagend(endIndex);
|
286 | }
|
287 | };
|
288 | Parser.prototype.closeCurrentTag = function (isOpenImplied) {
|
289 | var _a, _b;
|
290 | var name = this.tagname;
|
291 | this.endOpenTag(isOpenImplied);
|
292 |
|
293 | if (this.stack[0] === name) {
|
294 |
|
295 | (_b = (_a = this.cbs).onclosetag) === null || _b === void 0 ? void 0 : _b.call(_a, name, !isOpenImplied);
|
296 | this.stack.shift();
|
297 | }
|
298 | };
|
299 |
|
300 | Parser.prototype.onattribname = function (start, endIndex) {
|
301 | this.startIndex = start;
|
302 | var name = this.getSlice(start, endIndex);
|
303 | this.attribname = this.lowerCaseAttributeNames
|
304 | ? name.toLowerCase()
|
305 | : name;
|
306 | };
|
307 |
|
308 | Parser.prototype.onattribdata = function (start, endIndex) {
|
309 | this.attribvalue += this.getSlice(start, endIndex);
|
310 | };
|
311 |
|
312 | Parser.prototype.onattribentity = function (cp) {
|
313 | this.attribvalue += (0, decode_js_1.fromCodePoint)(cp);
|
314 | };
|
315 |
|
316 | Parser.prototype.onattribend = function (quote, endIndex) {
|
317 | var _a, _b;
|
318 | this.endIndex = endIndex;
|
319 | (_b = (_a = this.cbs).onattribute) === null || _b === void 0 ? void 0 : _b.call(_a, this.attribname, this.attribvalue, quote === Tokenizer_js_1.QuoteType.Double
|
320 | ? '"'
|
321 | : quote === Tokenizer_js_1.QuoteType.Single
|
322 | ? "'"
|
323 | : quote === Tokenizer_js_1.QuoteType.NoValue
|
324 | ? undefined
|
325 | : null);
|
326 | if (this.attribs &&
|
327 | !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname)) {
|
328 | this.attribs[this.attribname] = this.attribvalue;
|
329 | }
|
330 | this.attribvalue = "";
|
331 | };
|
332 | Parser.prototype.getInstructionName = function (value) {
|
333 | var index = value.search(reNameEnd);
|
334 | var name = index < 0 ? value : value.substr(0, index);
|
335 | if (this.lowerCaseTagNames) {
|
336 | name = name.toLowerCase();
|
337 | }
|
338 | return name;
|
339 | };
|
340 |
|
341 | Parser.prototype.ondeclaration = function (start, endIndex) {
|
342 | this.endIndex = endIndex;
|
343 | var value = this.getSlice(start, endIndex);
|
344 | if (this.cbs.onprocessinginstruction) {
|
345 | var name = this.getInstructionName(value);
|
346 | this.cbs.onprocessinginstruction("!".concat(name), "!".concat(value));
|
347 | }
|
348 |
|
349 | this.startIndex = endIndex + 1;
|
350 | };
|
351 |
|
352 | Parser.prototype.onprocessinginstruction = function (start, endIndex) {
|
353 | this.endIndex = endIndex;
|
354 | var value = this.getSlice(start, endIndex);
|
355 | if (this.cbs.onprocessinginstruction) {
|
356 | var name = this.getInstructionName(value);
|
357 | this.cbs.onprocessinginstruction("?".concat(name), "?".concat(value));
|
358 | }
|
359 |
|
360 | this.startIndex = endIndex + 1;
|
361 | };
|
362 |
|
363 | Parser.prototype.oncomment = function (start, endIndex, offset) {
|
364 | var _a, _b, _c, _d;
|
365 | this.endIndex = endIndex;
|
366 | (_b = (_a = this.cbs).oncomment) === null || _b === void 0 ? void 0 : _b.call(_a, this.getSlice(start, endIndex - offset));
|
367 | (_d = (_c = this.cbs).oncommentend) === null || _d === void 0 ? void 0 : _d.call(_c);
|
368 |
|
369 | this.startIndex = endIndex + 1;
|
370 | };
|
371 |
|
372 | Parser.prototype.oncdata = function (start, endIndex, offset) {
|
373 | var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
|
374 | this.endIndex = endIndex;
|
375 | var value = this.getSlice(start, endIndex - offset);
|
376 | if (!this.htmlMode || this.options.recognizeCDATA) {
|
377 | (_b = (_a = this.cbs).oncdatastart) === null || _b === void 0 ? void 0 : _b.call(_a);
|
378 | (_d = (_c = this.cbs).ontext) === null || _d === void 0 ? void 0 : _d.call(_c, value);
|
379 | (_f = (_e = this.cbs).oncdataend) === null || _f === void 0 ? void 0 : _f.call(_e);
|
380 | }
|
381 | else {
|
382 | (_h = (_g = this.cbs).oncomment) === null || _h === void 0 ? void 0 : _h.call(_g, "[CDATA[".concat(value, "]]"));
|
383 | (_k = (_j = this.cbs).oncommentend) === null || _k === void 0 ? void 0 : _k.call(_j);
|
384 | }
|
385 |
|
386 | this.startIndex = endIndex + 1;
|
387 | };
|
388 |
|
389 | Parser.prototype.onend = function () {
|
390 | var _a, _b;
|
391 | if (this.cbs.onclosetag) {
|
392 |
|
393 | this.endIndex = this.startIndex;
|
394 | for (var index = 0; index < this.stack.length; index++) {
|
395 | this.cbs.onclosetag(this.stack[index], true);
|
396 | }
|
397 | }
|
398 | (_b = (_a = this.cbs).onend) === null || _b === void 0 ? void 0 : _b.call(_a);
|
399 | };
|
400 | |
401 |
|
402 |
|
403 | Parser.prototype.reset = function () {
|
404 | var _a, _b, _c, _d;
|
405 | (_b = (_a = this.cbs).onreset) === null || _b === void 0 ? void 0 : _b.call(_a);
|
406 | this.tokenizer.reset();
|
407 | this.tagname = "";
|
408 | this.attribname = "";
|
409 | this.attribs = null;
|
410 | this.stack.length = 0;
|
411 | this.startIndex = 0;
|
412 | this.endIndex = 0;
|
413 | (_d = (_c = this.cbs).onparserinit) === null || _d === void 0 ? void 0 : _d.call(_c, this);
|
414 | this.buffers.length = 0;
|
415 | this.foreignContext.length = 0;
|
416 | this.foreignContext.unshift(!this.htmlMode);
|
417 | this.bufferOffset = 0;
|
418 | this.writeIndex = 0;
|
419 | this.ended = false;
|
420 | };
|
421 | |
422 |
|
423 |
|
424 |
|
425 |
|
426 |
|
427 | Parser.prototype.parseComplete = function (data) {
|
428 | this.reset();
|
429 | this.end(data);
|
430 | };
|
431 | Parser.prototype.getSlice = function (start, end) {
|
432 | while (start - this.bufferOffset >= this.buffers[0].length) {
|
433 | this.shiftBuffer();
|
434 | }
|
435 | var slice = this.buffers[0].slice(start - this.bufferOffset, end - this.bufferOffset);
|
436 | while (end - this.bufferOffset > this.buffers[0].length) {
|
437 | this.shiftBuffer();
|
438 | slice += this.buffers[0].slice(0, end - this.bufferOffset);
|
439 | }
|
440 | return slice;
|
441 | };
|
442 | Parser.prototype.shiftBuffer = function () {
|
443 | this.bufferOffset += this.buffers[0].length;
|
444 | this.writeIndex--;
|
445 | this.buffers.shift();
|
446 | };
|
447 | |
448 |
|
449 |
|
450 |
|
451 |
|
452 | Parser.prototype.write = function (chunk) {
|
453 | var _a, _b;
|
454 | if (this.ended) {
|
455 | (_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".write() after done!"));
|
456 | return;
|
457 | }
|
458 | this.buffers.push(chunk);
|
459 | if (this.tokenizer.running) {
|
460 | this.tokenizer.write(chunk);
|
461 | this.writeIndex++;
|
462 | }
|
463 | };
|
464 | |
465 |
|
466 |
|
467 |
|
468 |
|
469 | Parser.prototype.end = function (chunk) {
|
470 | var _a, _b;
|
471 | if (this.ended) {
|
472 | (_b = (_a = this.cbs).onerror) === null || _b === void 0 ? void 0 : _b.call(_a, new Error(".end() after done!"));
|
473 | return;
|
474 | }
|
475 | if (chunk)
|
476 | this.write(chunk);
|
477 | this.ended = true;
|
478 | this.tokenizer.end();
|
479 | };
|
480 | |
481 |
|
482 |
|
483 | Parser.prototype.pause = function () {
|
484 | this.tokenizer.pause();
|
485 | };
|
486 | |
487 |
|
488 |
|
489 | Parser.prototype.resume = function () {
|
490 | this.tokenizer.resume();
|
491 | while (this.tokenizer.running &&
|
492 | this.writeIndex < this.buffers.length) {
|
493 | this.tokenizer.write(this.buffers[this.writeIndex++]);
|
494 | }
|
495 | if (this.ended)
|
496 | this.tokenizer.end();
|
497 | };
|
498 | |
499 |
|
500 |
|
501 |
|
502 |
|
503 |
|
504 | Parser.prototype.parseChunk = function (chunk) {
|
505 | this.write(chunk);
|
506 | };
|
507 | |
508 |
|
509 |
|
510 |
|
511 |
|
512 |
|
513 | Parser.prototype.done = function (chunk) {
|
514 | this.end(chunk);
|
515 | };
|
516 | return Parser;
|
517 | }());
|
518 | exports.Parser = Parser;
|
519 |
|
\ | No newline at end of file |