"use strict";
var _ParsedHTMLRewriter_onMap, _ParsedHTMLRewriter_onDocument;
Object.defineProperty(exports, "__esModule", { value: true });
exports.ParsedHTMLRewriter = void 0;
const tslib_1 = require("tslib");
const linkedom_1 = require("linkedom");
const whatwg_stream_to_async_iter_1 = require("whatwg-stream-to-async-iter");
const support_js_1 = require("./support.cjs");
const ELEMENT_NODE = 1;
const ATTRIBUTE_NODE = 2;
const TEXT_NODE = 3;
const COMMENT_NODE = 8;
const DOCUMENT_NODE = 9;
const DOCUMENT_TYPE_NODE = 10;
const DOCUMENT_FRAGMENT_NODE = 11;
const SHOW_ALL = -1;
const SHOW_ELEMENT = 1;
const SHOW_TEXT = 4;
const SHOW_COMMENT = 128;
const isText = (n) => (n === null || n === void 0 ? void 0 : n.nodeType) === TEXT_NODE;
const isElement = (n) => (n === null || n === void 0 ? void 0 : n.nodeType) === ELEMENT_NODE;
const isComment = (n) => (n === null || n === void 0 ? void 0 : n.nodeType) === COMMENT_NODE;
function* findTextNodes(el, document) {
const tw = document.createTreeWalker(el, SHOW_TEXT);
for (const node of (0, support_js_1.treeWalkerToIter)(tw))
yield node;
}
function* findCommentNodes(el, document) {
const tw = document.createTreeWalker(el, SHOW_COMMENT);
for (const node of (0, support_js_1.treeWalkerToIter)(tw))
yield node;
}
function findNext(el) {
while (el && !el.nextSibling)
el = el.parentNode;
return el && el.nextSibling;
}
/**
* A DOM-based implementation of Cloudflare's `HTMLRewriter`.
*/
class ParsedHTMLRewriter {
constructor() {
_ParsedHTMLRewriter_onMap.set(this, new Map());
_ParsedHTMLRewriter_onDocument.set(this, new Array());
}
on(selector, handlers) {
(0, support_js_1.append)((0, tslib_1.__classPrivateFieldGet)(this, _ParsedHTMLRewriter_onMap, "f"), selector, handlers);
return this;
}
onDocument(handlers) {
(0, tslib_1.__classPrivateFieldGet)(this, _ParsedHTMLRewriter_onDocument, "f").push(handlers);
return this;
}
transform(response) {
// This dance (promise => async gen => stream) is necessary because
// a) the `Response` constructor doesn't accept async data, except via (byte) streams, and
// b) `HTMLRewriter.transform` is not an async function.
return new Response((0, whatwg_stream_to_async_iter_1.asyncIterableToStream)((0, support_js_1.promiseToAsyncIterable)((() => (0, tslib_1.__awaiter)(this, void 0, void 0, function* () {
var _a, _b, _c, _d, _e, _f, _g, _h, _j;
// This is where the "parse" part comes in: We're not actually stream processing,
// instead we'll just build the DOM in memory and run the selectors.
const htmlText = yield response.text();
const { document } = (0, linkedom_1.parseHTML)(htmlText);
// const document = new DOMParser().parseFromString(htmlText, 'text/html')
// After that, the hardest part is getting the order right.
// First, we'll build a map of all elements that are "interesting", based on the registered handlers.
// We take advantage of existing DOM APIs:
const elemMap = new Map();
const htmlMap = new Map();
const textMap = new Map();
const commMap = new Map();
for (const [selector, handlers] of (0, tslib_1.__classPrivateFieldGet)(this, _ParsedHTMLRewriter_onMap, "f")) {
for (const elem of document.querySelectorAll(selector)) {
for (const handler of handlers) {
if (handler.element) {
(0, support_js_1.append)(elemMap, elem, handler.element.bind(handler));
}
// The `innerHTML` handler needs to run at the beginning of the next sibling node,
// after all the inner handlers have completed:
if (handler.innerHTML) {
(0, support_js_1.append)(htmlMap, findNext(elem), [elem, handler.innerHTML.bind(handler)]);
}
// Non-element handlers are odd, in the sense that they run for _any_ children, not just the immediate ones:
if (handler.text) {
for (const text of findTextNodes(elem, document)) {
(0, support_js_1.append)(textMap, text, handler.text.bind(handler));
}
}
if (handler.comments) {
for (const comm of findCommentNodes(elem, document)) {
(0, support_js_1.append)(commMap, comm, handler.comments.bind(handler));
}
}
}
}
}
// Handle document doctype before everything else
if (document.doctype) {
const doctype = new support_js_1.ParsedHTMLRewriterDocumentType(document.doctype);
for (const handler of (0, tslib_1.__classPrivateFieldGet)(this, _ParsedHTMLRewriter_onDocument, "f")) {
yield ((_a = handler.doctype) === null || _a === void 0 ? void 0 : _a.call(handler, doctype));
}
}
// We'll then walk the DOM and run the registered handlers each time we encounter an "interesting" node.
// Because we've stored them in a hash map, and can retrieve them via object identity:
const walker = document.createTreeWalker(document, SHOW_ELEMENT | SHOW_TEXT | SHOW_COMMENT);
// We need to walk the entire tree ahead of time,
// otherwise the order might change based on added/deleted elements:
// We're also adding `null` at the end to handle the edge case of `innerHTML` of the last element.
const nodes = [...(0, support_js_1.treeWalkerToIter)(walker), null];
for (const node of nodes) {
for (const [prevElem, handler] of (_b = htmlMap.get(node)) !== null && _b !== void 0 ? _b : []) {
yield handler(prevElem.innerHTML);
}
if (isElement(node)) {
const handlers = (_c = elemMap.get(node)) !== null && _c !== void 0 ? _c : [];
for (const handler of handlers) {
yield handler(new support_js_1.ParsedHTMLRewriterElement(node, document));
}
}
else if (isText(node)) {
const handlers = (_d = textMap.get(node)) !== null && _d !== void 0 ? _d : [];
const text = new support_js_1.ParsedHTMLRewriterText(node, document);
for (const handler of handlers) {
yield handler(text);
}
for (const handler of (0, tslib_1.__classPrivateFieldGet)(this, _ParsedHTMLRewriter_onDocument, "f")) {
yield ((_e = handler.text) === null || _e === void 0 ? void 0 : _e.call(handler, text));
}
if (!isText(node.nextSibling)) {
const textLast = new support_js_1.ParsedHTMLRewriterText(null, document);
for (const handler of handlers) {
yield handler(textLast);
}
for (const handler of (0, tslib_1.__classPrivateFieldGet)(this, _ParsedHTMLRewriter_onDocument, "f")) {
yield ((_f = handler.text) === null || _f === void 0 ? void 0 : _f.call(handler, textLast));
}
}
}
else if (isComment(node)) {
const handlers = (_g = commMap.get(node)) !== null && _g !== void 0 ? _g : [];
const comment = new support_js_1.ParsedHTMLRewriterComment(node, document);
for (const handler of handlers) {
yield handler(comment);
}
for (const handler of (0, tslib_1.__classPrivateFieldGet)(this, _ParsedHTMLRewriter_onDocument, "f")) {
yield ((_h = handler.comments) === null || _h === void 0 ? void 0 : _h.call(handler, comment));
}
}
}
// Handle document end after everything else
const end = new support_js_1.ParsedHTMLRewriterEnd(document);
for (const handler of (0, tslib_1.__classPrivateFieldGet)(this, _ParsedHTMLRewriter_onDocument, "f")) {
yield ((_j = handler.end) === null || _j === void 0 ? void 0 : _j.call(handler, end));
}
return new TextEncoder().encode(document.toString());
}))())), response);
}
}
exports.ParsedHTMLRewriter = ParsedHTMLRewriter;
_ParsedHTMLRewriter_onMap = new WeakMap(), _ParsedHTMLRewriter_onDocument = new WeakMap();