1 | "use strict";
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 | Object.defineProperty(exports, "__esModule", { value: true });
|
18 | exports.HTMLProcessingParser = exports.HTMLProcessor = exports.ParagraphForTesting = exports.NodeOrTextForTesting = void 0;
|
19 | const dom_js_1 = require("./dom.js");
|
20 | const parser_js_1 = require("./parser.js");
|
21 | const win_js_1 = require("./win.js");
|
22 | const assert = console.assert;
|
23 | const ZWSP_CODEPOINT = 0x200b;
|
24 | const ZWSP = String.fromCharCode(ZWSP_CODEPOINT);
|
25 |
|
26 |
|
27 | const NodeType = {
|
28 | ELEMENT_NODE: 1,
|
29 | TEXT_NODE: 3,
|
30 | };
|
31 | const DomAction = {
|
32 | Inline: 0,
|
33 | Block: 1,
|
34 | Skip: 2,
|
35 | Break: 3,
|
36 | NoBreak: 4,
|
37 | BreakOpportunity: 5,
|
38 | };
|
39 |
|
40 |
|
41 |
|
42 |
|
43 |
|
44 | const domActions = {
|
45 |
|
46 |
|
47 | AREA: DomAction.Skip,
|
48 | BASE: DomAction.Skip,
|
49 | BASEFONT: DomAction.Skip,
|
50 | DATALIST: DomAction.Skip,
|
51 | HEAD: DomAction.Skip,
|
52 | LINK: DomAction.Skip,
|
53 | META: DomAction.Skip,
|
54 | NOEMBED: DomAction.Skip,
|
55 | NOFRAMES: DomAction.Skip,
|
56 | PARAM: DomAction.Skip,
|
57 | RP: DomAction.Skip,
|
58 | SCRIPT: DomAction.Skip,
|
59 | STYLE: DomAction.Skip,
|
60 | TEMPLATE: DomAction.Skip,
|
61 | TITLE: DomAction.Skip,
|
62 | NOSCRIPT: DomAction.Skip,
|
63 |
|
64 |
|
65 | HR: DomAction.Break,
|
66 |
|
67 | LISTING: DomAction.Skip,
|
68 | PLAINTEXT: DomAction.Skip,
|
69 | PRE: DomAction.Skip,
|
70 | XMP: DomAction.Skip,
|
71 |
|
72 |
|
73 | BR: DomAction.Break,
|
74 | RT: DomAction.Skip,
|
75 | WBR: DomAction.BreakOpportunity,
|
76 |
|
77 |
|
78 | INPUT: DomAction.Skip,
|
79 | SELECT: DomAction.Skip,
|
80 | BUTTON: DomAction.Skip,
|
81 | TEXTAREA: DomAction.Skip,
|
82 |
|
83 |
|
84 | ABBR: DomAction.Skip,
|
85 | CODE: DomAction.Skip,
|
86 | IFRAME: DomAction.Skip,
|
87 | TIME: DomAction.Skip,
|
88 | VAR: DomAction.Skip,
|
89 |
|
90 |
|
91 | NOBR: DomAction.NoBreak,
|
92 | };
|
93 | const defaultBlockElements = new Set([
|
94 |
|
95 | 'HTML',
|
96 | 'BODY',
|
97 |
|
98 | 'ADDRESS',
|
99 | 'BLOCKQUOTE',
|
100 | 'CENTER',
|
101 | 'DIALOG',
|
102 | 'DIV',
|
103 | 'FIGURE',
|
104 | 'FIGCAPTION',
|
105 | 'FOOTER',
|
106 | 'FORM',
|
107 | 'HEADER',
|
108 | 'LEGEND',
|
109 | 'LISTING',
|
110 | 'MAIN',
|
111 | 'P',
|
112 |
|
113 | 'ARTICLE',
|
114 | 'ASIDE',
|
115 | 'H1',
|
116 | 'H2',
|
117 | 'H3',
|
118 | 'H4',
|
119 | 'H5',
|
120 | 'H6',
|
121 | 'HGROUP',
|
122 | 'NAV',
|
123 | 'SECTION',
|
124 |
|
125 | 'DIR',
|
126 | 'DD',
|
127 | 'DL',
|
128 | 'DT',
|
129 | 'MENU',
|
130 | 'OL',
|
131 | 'UL',
|
132 | 'LI',
|
133 |
|
134 | 'TABLE',
|
135 | 'CAPTION',
|
136 | 'COL',
|
137 | 'TR',
|
138 | 'TD',
|
139 | 'TH',
|
140 |
|
141 | 'FIELDSET',
|
142 |
|
143 | 'DETAILS',
|
144 | 'SUMMARY',
|
145 |
|
146 | 'MARQUEE',
|
147 | ]);
|
148 |
|
149 |
|
150 | const NODETYPE = {
|
151 | ELEMENT: 1,
|
152 | TEXT: 3,
|
153 | };
|
154 |
|
155 |
|
156 |
|
157 |
|
158 |
|
159 | function actionForElement(element) {
|
160 | const nodeName = element.nodeName;
|
161 | const action = domActions[nodeName];
|
162 | if (action !== undefined)
|
163 | return action;
|
164 | if (typeof win_js_1.win.getComputedStyle === 'function') {
|
165 | const style = win_js_1.win.getComputedStyle(element);
|
166 | switch (style.whiteSpace) {
|
167 | case 'nowrap':
|
168 | case 'pre':
|
169 | return DomAction.NoBreak;
|
170 | }
|
171 | const display = style.display;
|
172 | if (display)
|
173 | return display === 'inline' ? DomAction.Inline : DomAction.Block;
|
174 |
|
175 | }
|
176 |
|
177 |
|
178 | return defaultBlockElements.has(nodeName)
|
179 | ? DomAction.Block
|
180 | : DomAction.Inline;
|
181 | }
|
182 |
|
183 |
|
184 |
|
185 |
|
186 |
|
187 |
|
188 |
|
189 | class NodeOrText {
|
190 | constructor(nodeOrText) {
|
191 | this.chunks = [];
|
192 | this.hasBreakOpportunityAfter = false;
|
193 | this.nodeOrText = nodeOrText;
|
194 | }
|
195 | get isString() {
|
196 | return typeof this.nodeOrText === 'string';
|
197 | }
|
198 | get canSplit() {
|
199 | return !this.isString;
|
200 | }
|
201 | get text() {
|
202 | return this.isString
|
203 | ? this.nodeOrText
|
204 | : this.nodeOrText.nodeValue;
|
205 | }
|
206 | get length() {
|
207 | var _a, _b;
|
208 | return (_b = (_a = this.text) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0;
|
209 | }
|
210 | |
211 |
|
212 |
|
213 |
|
214 | split(separator) {
|
215 | const chunks = this.chunks;
|
216 | assert(chunks.length === 0 || chunks.join('') === this.text);
|
217 | if (chunks.length <= 1)
|
218 | return;
|
219 | assert(this.canSplit);
|
220 | const node = this.nodeOrText;
|
221 | if (typeof separator === 'string') {
|
222 |
|
223 | node.nodeValue = chunks.join(separator);
|
224 | return;
|
225 | }
|
226 |
|
227 |
|
228 | const document = node.ownerDocument;
|
229 | let nodes = [];
|
230 | for (const chunk of chunks) {
|
231 | if (chunk)
|
232 | nodes.push(document.createTextNode(chunk));
|
233 |
|
234 |
|
235 |
|
236 |
|
237 | nodes.push(null);
|
238 | }
|
239 | nodes.pop();
|
240 | nodes = nodes.map(n => (n ? n : separator.cloneNode(true)));
|
241 | node.replaceWith(...nodes);
|
242 | }
|
243 | }
|
244 | class NodeOrTextForTesting extends NodeOrText {
|
245 | }
|
246 | exports.NodeOrTextForTesting = NodeOrTextForTesting;
|
247 |
|
248 |
|
249 |
|
250 |
|
251 |
|
252 |
|
253 |
|
254 |
|
255 | class Paragraph {
|
256 | constructor(element) {
|
257 | this.nodes = [];
|
258 | this.element = element;
|
259 | }
|
260 | isEmpty() {
|
261 | return this.nodes.length === 0;
|
262 | }
|
263 | get text() {
|
264 | return this.nodes.map(node => node.text).join('');
|
265 | }
|
266 | get lastNode() {
|
267 | return this.nodes.length ? this.nodes[this.nodes.length - 1] : undefined;
|
268 | }
|
269 | setHasBreakOpportunityAfter() {
|
270 | const lastNode = this.lastNode;
|
271 | if (lastNode)
|
272 | lastNode.hasBreakOpportunityAfter = true;
|
273 | }
|
274 | |
275 |
|
276 |
|
277 |
|
278 | getForcedOpportunities() {
|
279 | const opportunities = [];
|
280 | let len = 0;
|
281 | for (const node of this.nodes) {
|
282 | if (node.canSplit) {
|
283 | const text = node.text;
|
284 | if (text) {
|
285 | for (let i = 0; i < text.length; ++i) {
|
286 | if (text.charCodeAt(i) === ZWSP_CODEPOINT) {
|
287 | opportunities.push(len + i + 1);
|
288 | }
|
289 | }
|
290 | }
|
291 | }
|
292 | len += node.length;
|
293 | if (node.hasBreakOpportunityAfter) {
|
294 | opportunities.push(len);
|
295 | }
|
296 | }
|
297 | return opportunities;
|
298 | }
|
299 | |
300 |
|
301 |
|
302 |
|
303 |
|
304 | excludeForcedOpportunities(boundaries) {
|
305 | const forcedOpportunities = this.getForcedOpportunities();
|
306 | if (!forcedOpportunities.length)
|
307 | return boundaries;
|
308 | const set = new Set(forcedOpportunities);
|
309 | return boundaries.filter(i => !set.has(i));
|
310 | }
|
311 | }
|
312 | class ParagraphForTesting extends Paragraph {
|
313 | }
|
314 | exports.ParagraphForTesting = ParagraphForTesting;
|
315 |
|
316 |
|
317 |
|
318 | class HTMLProcessor {
|
319 | |
320 |
|
321 |
|
322 | constructor(parser, options) {
|
323 |
|
324 | this.separator = ZWSP;
|
325 | this.parser_ = parser;
|
326 | if (options !== undefined) {
|
327 | if (options.className !== undefined)
|
328 | this.className = options.className;
|
329 | if (options.separator !== undefined)
|
330 | this.separator = options.separator;
|
331 | }
|
332 | }
|
333 | |
334 |
|
335 |
|
336 |
|
337 |
|
338 |
|
339 | static hasChildTextNode(ele) {
|
340 | for (const child of ele.childNodes) {
|
341 | if (child.nodeType === NODETYPE.TEXT)
|
342 | return true;
|
343 | }
|
344 | return false;
|
345 | }
|
346 | |
347 |
|
348 |
|
349 |
|
350 |
|
351 |
|
352 |
|
353 | applyToElement(element) {
|
354 | for (const block of this.getBlocks(element)) {
|
355 | assert(!block.isEmpty());
|
356 | this.applyToParagraph(block);
|
357 | }
|
358 | }
|
359 | |
360 |
|
361 |
|
362 |
|
363 |
|
364 |
|
365 | *getBlocks(element, parent) {
|
366 | assert(element.nodeType === NodeType.ELEMENT_NODE);
|
367 |
|
368 | if (this.className && element.classList.contains(this.className))
|
369 | return;
|
370 | const action = actionForElement(element);
|
371 | if (action === DomAction.Skip)
|
372 | return;
|
373 | if (action === DomAction.Break) {
|
374 | if (parent && !parent.isEmpty()) {
|
375 | parent.setHasBreakOpportunityAfter();
|
376 | yield parent;
|
377 | parent.nodes = [];
|
378 | }
|
379 | assert(!element.firstChild);
|
380 | return;
|
381 | }
|
382 | if (action === DomAction.BreakOpportunity) {
|
383 | if (parent)
|
384 | parent.setHasBreakOpportunityAfter();
|
385 | return;
|
386 | }
|
387 |
|
388 |
|
389 | assert(action === DomAction.Block ||
|
390 | action === DomAction.Inline ||
|
391 | action === DomAction.NoBreak);
|
392 | const isNewBlock = !parent || action === DomAction.Block;
|
393 | const block = isNewBlock ? new Paragraph(element) : parent;
|
394 |
|
395 |
|
396 | for (const child of element.childNodes) {
|
397 | switch (child.nodeType) {
|
398 | case NodeType.ELEMENT_NODE:
|
399 | for (const childBlock of this.getBlocks(child, block))
|
400 | yield childBlock;
|
401 | break;
|
402 | case NodeType.TEXT_NODE:
|
403 | if (action === DomAction.NoBreak) {
|
404 | const text = child.nodeValue;
|
405 | if (text) {
|
406 | block.nodes.push(new NodeOrText(text));
|
407 | }
|
408 | break;
|
409 | }
|
410 | block.nodes.push(new NodeOrText(child));
|
411 | break;
|
412 | }
|
413 | }
|
414 |
|
415 | if (isNewBlock && !block.isEmpty())
|
416 | yield block;
|
417 | }
|
418 | |
419 |
|
420 |
|
421 |
|
422 | applyToParagraph(paragraph) {
|
423 | assert(paragraph.nodes.length > 0);
|
424 | if (!paragraph.nodes.some(node => node.canSplit))
|
425 | return;
|
426 | const text = paragraph.text;
|
427 |
|
428 | if (/^\s*$/.test(text))
|
429 | return;
|
430 |
|
431 | const boundaries = this.parser_.parseBoundaries(text);
|
432 |
|
433 | if (boundaries.length <= 0)
|
434 | return;
|
435 |
|
436 |
|
437 | assert(boundaries[0] > 0);
|
438 | assert(boundaries.every((x, i) => i === 0 || x > boundaries[i - 1]));
|
439 | assert(boundaries[boundaries.length - 1] < text.length);
|
440 | const adjustedBoundaries = paragraph.excludeForcedOpportunities(boundaries);
|
441 |
|
442 | adjustedBoundaries.push(text.length + 1);
|
443 | this.splitNodes(paragraph.nodes, adjustedBoundaries);
|
444 | this.applyBlockStyle(paragraph.element);
|
445 | }
|
446 | |
447 |
|
448 |
|
449 |
|
450 |
|
451 | splitNodes(nodes, boundaries) {
|
452 | var _a;
|
453 | assert(boundaries.length > 0);
|
454 | assert(boundaries.every((x, i) => i === 0 || x > boundaries[i - 1]));
|
455 | const textLen = nodes.reduce((sum, node) => sum + node.length, 0);
|
456 |
|
457 | assert(boundaries[boundaries.length - 1] > textLen);
|
458 |
|
459 | let boundary_index = 0;
|
460 | let boundary = boundaries[0];
|
461 | assert(boundary > 0);
|
462 | let nodeStart = 0;
|
463 | let lastNode = null;
|
464 | for (const node of nodes) {
|
465 | assert(boundary >= nodeStart);
|
466 | assert(node.chunks.length === 0);
|
467 | const nodeText = node.text;
|
468 | if (!nodeText)
|
469 | continue;
|
470 | const nodeLength = nodeText.length;
|
471 | const nodeEnd = nodeStart + nodeLength;
|
472 | assert(!lastNode || lastNode.canSplit);
|
473 | if (!node.canSplit) {
|
474 |
|
475 |
|
476 | if (lastNode && boundary === nodeStart) {
|
477 | if (lastNode.chunks.length === 0)
|
478 | lastNode.chunks.push((_a = lastNode.text) !== null && _a !== void 0 ? _a : '');
|
479 | lastNode.chunks.push('');
|
480 | }
|
481 | while (boundary < nodeEnd) {
|
482 | boundary = boundaries[++boundary_index];
|
483 | }
|
484 | lastNode = null;
|
485 | nodeStart = nodeEnd;
|
486 | continue;
|
487 | }
|
488 |
|
489 | lastNode = node;
|
490 | if (boundary >= nodeEnd) {
|
491 | nodeStart = nodeEnd;
|
492 | continue;
|
493 | }
|
494 |
|
495 | const chunks = node.chunks;
|
496 | let chunkStartInNode = 0;
|
497 | while (boundary < nodeEnd) {
|
498 | const boundaryInNode = boundary - nodeStart;
|
499 | assert(boundaryInNode >= chunkStartInNode);
|
500 | chunks.push(nodeText.slice(chunkStartInNode, boundaryInNode));
|
501 | chunkStartInNode = boundaryInNode;
|
502 | boundary = boundaries[++boundary_index];
|
503 | }
|
504 |
|
505 | assert(chunkStartInNode < nodeLength);
|
506 | chunks.push(nodeText.slice(chunkStartInNode));
|
507 | nodeStart = nodeEnd;
|
508 | }
|
509 |
|
510 | assert(nodeStart === textLen);
|
511 | assert(boundary_index < boundaries.length);
|
512 | assert(boundaries[boundary_index] >= textLen);
|
513 |
|
514 | for (const node of nodes) {
|
515 | node.split(this.separator);
|
516 | }
|
517 | }
|
518 | |
519 |
|
520 |
|
521 |
|
522 | applyBlockStyle(element) {
|
523 | if (this.className) {
|
524 | element.classList.add(this.className);
|
525 | return;
|
526 | }
|
527 | (0, dom_js_1.applyWrapStyle)(element);
|
528 | }
|
529 | }
|
530 | exports.HTMLProcessor = HTMLProcessor;
|
531 |
|
532 |
|
533 |
|
534 | class HTMLProcessingParser extends parser_js_1.Parser {
|
535 | constructor(model, htmlProcessorOptions = {
|
536 | separator: ZWSP,
|
537 | }) {
|
538 | super(model);
|
539 | this.htmlProcessor = new HTMLProcessor(this, htmlProcessorOptions);
|
540 | }
|
541 | |
542 |
|
543 |
|
544 |
|
545 |
|
546 |
|
547 |
|
548 | applyElement(parentElement) {
|
549 | console.warn('`applyElement` is deprecated. Please use `applyToElement` instead. ' +
|
550 | '`applyElement` will be removed in v0.7.0.');
|
551 | this.applyToElement(parentElement);
|
552 | }
|
553 | |
554 |
|
555 |
|
556 |
|
557 | applyToElement(parentElement) {
|
558 | this.htmlProcessor.applyToElement(parentElement);
|
559 | }
|
560 | |
561 |
|
562 |
|
563 |
|
564 |
|
565 |
|
566 | translateHTMLString(html) {
|
567 | if (html === '')
|
568 | return html;
|
569 | const doc = (0, dom_js_1.parseFromString)(html);
|
570 | if (HTMLProcessor.hasChildTextNode(doc.body)) {
|
571 | const wrapper = doc.createElement('span');
|
572 | wrapper.append(...doc.body.childNodes);
|
573 | doc.body.append(wrapper);
|
574 | }
|
575 | this.applyToElement(doc.body.childNodes[0]);
|
576 | return doc.body.innerHTML;
|
577 | }
|
578 | }
|
579 | exports.HTMLProcessingParser = HTMLProcessingParser;
|
580 |
|
\ | No newline at end of file |