UNPKG

8.62 kBJavaScriptView Raw
1var _ParsedHTMLRewriter_onMap, _ParsedHTMLRewriter_onDocument;
2import { __classPrivateFieldGet } from "tslib";
3import { parseHTML } from 'linkedom';
4import { asyncIterableToStream } from 'whatwg-stream-to-async-iter';
5import { ParsedHTMLRewriterElement, ParsedHTMLRewriterText, ParsedHTMLRewriterComment, ParsedHTMLRewriterDocumentType, ParsedHTMLRewriterEnd, promiseToAsyncIterable, append, treeWalkerToIter, } from './support.js';
6const ELEMENT_NODE = 1;
7const ATTRIBUTE_NODE = 2;
8const TEXT_NODE = 3;
9const COMMENT_NODE = 8;
10const DOCUMENT_NODE = 9;
11const DOCUMENT_TYPE_NODE = 10;
12const DOCUMENT_FRAGMENT_NODE = 11;
13const SHOW_ALL = -1;
14const SHOW_ELEMENT = 1;
15const SHOW_TEXT = 4;
16const SHOW_COMMENT = 128;
17const isText = (n) => (n === null || n === void 0 ? void 0 : n.nodeType) === TEXT_NODE;
18const isElement = (n) => (n === null || n === void 0 ? void 0 : n.nodeType) === ELEMENT_NODE;
19const isComment = (n) => (n === null || n === void 0 ? void 0 : n.nodeType) === COMMENT_NODE;
20function* findTextNodes(el, document) {
21 const tw = document.createTreeWalker(el, SHOW_TEXT);
22 for (const node of treeWalkerToIter(tw))
23 yield node;
24}
25function* findCommentNodes(el, document) {
26 const tw = document.createTreeWalker(el, SHOW_COMMENT);
27 for (const node of treeWalkerToIter(tw))
28 yield node;
29}
30function findNext(el) {
31 while (el && !el.nextSibling)
32 el = el.parentNode;
33 return el && el.nextSibling;
34}
35/**
36 * A DOM-based implementation of Cloudflare's `HTMLRewriter`.
37 */
38export class ParsedHTMLRewriter {
39 constructor() {
40 _ParsedHTMLRewriter_onMap.set(this, new Map());
41 _ParsedHTMLRewriter_onDocument.set(this, new Array());
42 }
43 on(selector, handlers) {
44 append(__classPrivateFieldGet(this, _ParsedHTMLRewriter_onMap, "f"), selector, handlers);
45 return this;
46 }
47 onDocument(handlers) {
48 __classPrivateFieldGet(this, _ParsedHTMLRewriter_onDocument, "f").push(handlers);
49 return this;
50 }
51 transform(response) {
52 // This dance (promise => async gen => stream) is necessary because
53 // a) the `Response` constructor doesn't accept async data, except via (byte) streams, and
54 // b) `HTMLRewriter.transform` is not an async function.
55 return new Response(asyncIterableToStream(promiseToAsyncIterable((async () => {
56 var _a, _b, _c, _d, _e, _f, _g, _h, _j;
57 // This is where the "parse" part comes in: We're not actually stream processing,
58 // instead we'll just build the DOM in memory and run the selectors.
59 const htmlText = await response.text();
60 const { document } = parseHTML(htmlText);
61 // const document = new DOMParser().parseFromString(htmlText, 'text/html')
62 // After that, the hardest part is getting the order right.
63 // First, we'll build a map of all elements that are "interesting", based on the registered handlers.
64 // We take advantage of existing DOM APIs:
65 const elemMap = new Map();
66 const htmlMap = new Map();
67 const textMap = new Map();
68 const commMap = new Map();
69 for (const [selector, handlers] of __classPrivateFieldGet(this, _ParsedHTMLRewriter_onMap, "f")) {
70 for (const elem of document.querySelectorAll(selector)) {
71 for (const handler of handlers) {
72 if (handler.element) {
73 append(elemMap, elem, handler.element.bind(handler));
74 }
75 // The `innerHTML` handler needs to run at the beginning of the next sibling node,
76 // after all the inner handlers have completed:
77 if (handler.innerHTML) {
78 append(htmlMap, findNext(elem), [elem, handler.innerHTML.bind(handler)]);
79 }
80 // Non-element handlers are odd, in the sense that they run for _any_ children, not just the immediate ones:
81 if (handler.text) {
82 for (const text of findTextNodes(elem, document)) {
83 append(textMap, text, handler.text.bind(handler));
84 }
85 }
86 if (handler.comments) {
87 for (const comm of findCommentNodes(elem, document)) {
88 append(commMap, comm, handler.comments.bind(handler));
89 }
90 }
91 }
92 }
93 }
94 // Handle document doctype before everything else
95 if (document.doctype) {
96 const doctype = new ParsedHTMLRewriterDocumentType(document.doctype);
97 for (const handler of __classPrivateFieldGet(this, _ParsedHTMLRewriter_onDocument, "f")) {
98 await ((_a = handler.doctype) === null || _a === void 0 ? void 0 : _a.call(handler, doctype));
99 }
100 }
101 // We'll then walk the DOM and run the registered handlers each time we encounter an "interesting" node.
102 // Because we've stored them in a hash map, and can retrieve them via object identity:
103 const walker = document.createTreeWalker(document, SHOW_ELEMENT | SHOW_TEXT | SHOW_COMMENT);
104 // We need to walk the entire tree ahead of time,
105 // otherwise the order might change based on added/deleted elements:
106 // We're also adding `null` at the end to handle the edge case of `innerHTML` of the last element.
107 const nodes = [...treeWalkerToIter(walker), null];
108 for (const node of nodes) {
109 for (const [prevElem, handler] of (_b = htmlMap.get(node)) !== null && _b !== void 0 ? _b : []) {
110 await handler(prevElem.innerHTML);
111 }
112 if (isElement(node)) {
113 const handlers = (_c = elemMap.get(node)) !== null && _c !== void 0 ? _c : [];
114 for (const handler of handlers) {
115 await handler(new ParsedHTMLRewriterElement(node, document));
116 }
117 }
118 else if (isText(node)) {
119 const handlers = (_d = textMap.get(node)) !== null && _d !== void 0 ? _d : [];
120 const text = new ParsedHTMLRewriterText(node, document);
121 for (const handler of handlers) {
122 await handler(text);
123 }
124 for (const handler of __classPrivateFieldGet(this, _ParsedHTMLRewriter_onDocument, "f")) {
125 await ((_e = handler.text) === null || _e === void 0 ? void 0 : _e.call(handler, text));
126 }
127 if (!isText(node.nextSibling)) {
128 const textLast = new ParsedHTMLRewriterText(null, document);
129 for (const handler of handlers) {
130 await handler(textLast);
131 }
132 for (const handler of __classPrivateFieldGet(this, _ParsedHTMLRewriter_onDocument, "f")) {
133 await ((_f = handler.text) === null || _f === void 0 ? void 0 : _f.call(handler, textLast));
134 }
135 }
136 }
137 else if (isComment(node)) {
138 const handlers = (_g = commMap.get(node)) !== null && _g !== void 0 ? _g : [];
139 const comment = new ParsedHTMLRewriterComment(node, document);
140 for (const handler of handlers) {
141 await handler(comment);
142 }
143 for (const handler of __classPrivateFieldGet(this, _ParsedHTMLRewriter_onDocument, "f")) {
144 await ((_h = handler.comments) === null || _h === void 0 ? void 0 : _h.call(handler, comment));
145 }
146 }
147 }
148 // Handle document end after everything else
149 const end = new ParsedHTMLRewriterEnd(document);
150 for (const handler of __classPrivateFieldGet(this, _ParsedHTMLRewriter_onDocument, "f")) {
151 await ((_j = handler.end) === null || _j === void 0 ? void 0 : _j.call(handler, end));
152 }
153 return new TextEncoder().encode(document.toString());
154 })())), response);
155 }
156}
157_ParsedHTMLRewriter_onMap = new WeakMap(), _ParsedHTMLRewriter_onDocument = new WeakMap();
158//# sourceMappingURL=index.js.map
\No newline at end of file