1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 | import { applyWrapStyle, parseFromString } from './dom.js';
|
17 | import { Parser } from './parser.js';
|
18 | import { win } from './win.js';
|
19 | const assert = console.assert;
|
20 | const ZWSP_CODEPOINT = 0x200b;
|
21 | const ZWSP = String.fromCharCode(ZWSP_CODEPOINT);
|
22 |
|
23 |
|
24 | const NodeType = {
|
25 | ELEMENT_NODE: 1,
|
26 | TEXT_NODE: 3,
|
27 | };
|
28 | const DomAction = {
|
29 | Inline: 0,
|
30 | Block: 1,
|
31 | Skip: 2,
|
32 | Break: 3,
|
33 | NoBreak: 4,
|
34 | BreakOpportunity: 5,
|
35 | };
|
36 |
|
37 |
|
38 |
|
39 |
|
40 |
|
41 | const domActions = {
|
42 |
|
43 |
|
44 | AREA: DomAction.Skip,
|
45 | BASE: DomAction.Skip,
|
46 | BASEFONT: DomAction.Skip,
|
47 | DATALIST: DomAction.Skip,
|
48 | HEAD: DomAction.Skip,
|
49 | LINK: DomAction.Skip,
|
50 | META: DomAction.Skip,
|
51 | NOEMBED: DomAction.Skip,
|
52 | NOFRAMES: DomAction.Skip,
|
53 | PARAM: DomAction.Skip,
|
54 | RP: DomAction.Skip,
|
55 | SCRIPT: DomAction.Skip,
|
56 | STYLE: DomAction.Skip,
|
57 | TEMPLATE: DomAction.Skip,
|
58 | TITLE: DomAction.Skip,
|
59 | NOSCRIPT: DomAction.Skip,
|
60 |
|
61 |
|
62 | HR: DomAction.Break,
|
63 |
|
64 | LISTING: DomAction.Skip,
|
65 | PLAINTEXT: DomAction.Skip,
|
66 | PRE: DomAction.Skip,
|
67 | XMP: DomAction.Skip,
|
68 |
|
69 |
|
70 | BR: DomAction.Break,
|
71 | RT: DomAction.Skip,
|
72 | WBR: DomAction.BreakOpportunity,
|
73 |
|
74 |
|
75 | INPUT: DomAction.Skip,
|
76 | SELECT: DomAction.Skip,
|
77 | BUTTON: DomAction.Skip,
|
78 | TEXTAREA: DomAction.Skip,
|
79 |
|
80 |
|
81 | ABBR: DomAction.Skip,
|
82 | CODE: DomAction.Skip,
|
83 | IFRAME: DomAction.Skip,
|
84 | TIME: DomAction.Skip,
|
85 | VAR: DomAction.Skip,
|
86 |
|
87 |
|
88 | NOBR: DomAction.NoBreak,
|
89 | };
|
90 | const defaultBlockElements = new Set([
|
91 |
|
92 | 'HTML',
|
93 | 'BODY',
|
94 |
|
95 | 'ADDRESS',
|
96 | 'BLOCKQUOTE',
|
97 | 'CENTER',
|
98 | 'DIALOG',
|
99 | 'DIV',
|
100 | 'FIGURE',
|
101 | 'FIGCAPTION',
|
102 | 'FOOTER',
|
103 | 'FORM',
|
104 | 'HEADER',
|
105 | 'LEGEND',
|
106 | 'LISTING',
|
107 | 'MAIN',
|
108 | 'P',
|
109 |
|
110 | 'ARTICLE',
|
111 | 'ASIDE',
|
112 | 'H1',
|
113 | 'H2',
|
114 | 'H3',
|
115 | 'H4',
|
116 | 'H5',
|
117 | 'H6',
|
118 | 'HGROUP',
|
119 | 'NAV',
|
120 | 'SECTION',
|
121 |
|
122 | 'DIR',
|
123 | 'DD',
|
124 | 'DL',
|
125 | 'DT',
|
126 | 'MENU',
|
127 | 'OL',
|
128 | 'UL',
|
129 | 'LI',
|
130 |
|
131 | 'TABLE',
|
132 | 'CAPTION',
|
133 | 'COL',
|
134 | 'TR',
|
135 | 'TD',
|
136 | 'TH',
|
137 |
|
138 | 'FIELDSET',
|
139 |
|
140 | 'DETAILS',
|
141 | 'SUMMARY',
|
142 |
|
143 | 'MARQUEE',
|
144 | ]);
|
145 |
|
146 |
|
147 | const NODETYPE = {
|
148 | ELEMENT: 1,
|
149 | TEXT: 3,
|
150 | };
|
151 |
|
152 |
|
153 |
|
154 |
|
155 |
|
156 | function actionForElement(element) {
|
157 | const nodeName = element.nodeName;
|
158 | const action = domActions[nodeName];
|
159 | if (action !== undefined)
|
160 | return action;
|
161 | if (typeof win.getComputedStyle === 'function') {
|
162 | const style = win.getComputedStyle(element);
|
163 | switch (style.whiteSpace) {
|
164 | case 'nowrap':
|
165 | case 'pre':
|
166 | return DomAction.NoBreak;
|
167 | }
|
168 | const display = style.display;
|
169 | if (display)
|
170 | return display === 'inline' ? DomAction.Inline : DomAction.Block;
|
171 |
|
172 | }
|
173 |
|
174 |
|
175 | return defaultBlockElements.has(nodeName)
|
176 | ? DomAction.Block
|
177 | : DomAction.Inline;
|
178 | }
|
179 |
|
180 |
|
181 |
|
182 |
|
183 |
|
184 |
|
185 |
|
186 | class NodeOrText {
|
187 | constructor(nodeOrText) {
|
188 | this.chunks = [];
|
189 | this.hasBreakOpportunityAfter = false;
|
190 | this.nodeOrText = nodeOrText;
|
191 | }
|
192 | get isString() {
|
193 | return typeof this.nodeOrText === 'string';
|
194 | }
|
195 | get canSplit() {
|
196 | return !this.isString;
|
197 | }
|
198 | get text() {
|
199 | return this.isString
|
200 | ? this.nodeOrText
|
201 | : this.nodeOrText.nodeValue;
|
202 | }
|
203 | get length() {
|
204 | var _a, _b;
|
205 | return (_b = (_a = this.text) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0;
|
206 | }
|
207 | |
208 |
|
209 |
|
210 |
|
211 | split(separator) {
|
212 | const chunks = this.chunks;
|
213 | assert(chunks.length === 0 || chunks.join('') === this.text);
|
214 | if (chunks.length <= 1)
|
215 | return;
|
216 | assert(this.canSplit);
|
217 | const node = this.nodeOrText;
|
218 | if (typeof separator === 'string') {
|
219 |
|
220 | node.nodeValue = chunks.join(separator);
|
221 | return;
|
222 | }
|
223 |
|
224 |
|
225 | const document = node.ownerDocument;
|
226 | let nodes = [];
|
227 | for (const chunk of chunks) {
|
228 | if (chunk)
|
229 | nodes.push(document.createTextNode(chunk));
|
230 |
|
231 |
|
232 |
|
233 |
|
234 | nodes.push(null);
|
235 | }
|
236 | nodes.pop();
|
237 | nodes = nodes.map(n => (n ? n : separator.cloneNode(true)));
|
238 | node.replaceWith(...nodes);
|
239 | }
|
240 | }
|
241 | export class NodeOrTextForTesting extends NodeOrText {
|
242 | }
|
243 |
|
244 |
|
245 |
|
246 |
|
247 |
|
248 |
|
249 |
|
250 |
|
251 | class Paragraph {
|
252 | constructor(element) {
|
253 | this.nodes = [];
|
254 | this.element = element;
|
255 | }
|
256 | isEmpty() {
|
257 | return this.nodes.length === 0;
|
258 | }
|
259 | get text() {
|
260 | return this.nodes.map(node => node.text).join('');
|
261 | }
|
262 | get lastNode() {
|
263 | return this.nodes.length ? this.nodes[this.nodes.length - 1] : undefined;
|
264 | }
|
265 | setHasBreakOpportunityAfter() {
|
266 | const lastNode = this.lastNode;
|
267 | if (lastNode)
|
268 | lastNode.hasBreakOpportunityAfter = true;
|
269 | }
|
270 | |
271 |
|
272 |
|
273 |
|
274 | getForcedOpportunities() {
|
275 | const opportunities = [];
|
276 | let len = 0;
|
277 | for (const node of this.nodes) {
|
278 | if (node.canSplit) {
|
279 | const text = node.text;
|
280 | if (text) {
|
281 | for (let i = 0; i < text.length; ++i) {
|
282 | if (text.charCodeAt(i) === ZWSP_CODEPOINT) {
|
283 | opportunities.push(len + i + 1);
|
284 | }
|
285 | }
|
286 | }
|
287 | }
|
288 | len += node.length;
|
289 | if (node.hasBreakOpportunityAfter) {
|
290 | opportunities.push(len);
|
291 | }
|
292 | }
|
293 | return opportunities;
|
294 | }
|
295 | |
296 |
|
297 |
|
298 |
|
299 |
|
300 | excludeForcedOpportunities(boundaries) {
|
301 | const forcedOpportunities = this.getForcedOpportunities();
|
302 | if (!forcedOpportunities.length)
|
303 | return boundaries;
|
304 | const set = new Set(forcedOpportunities);
|
305 | return boundaries.filter(i => !set.has(i));
|
306 | }
|
307 | }
|
308 | export class ParagraphForTesting extends Paragraph {
|
309 | }
|
310 |
|
311 |
|
312 |
|
313 | export class HTMLProcessor {
|
314 | |
315 |
|
316 |
|
317 | constructor(parser, options) {
|
318 |
|
319 | this.separator = ZWSP;
|
320 | this.parser_ = parser;
|
321 | if (options !== undefined) {
|
322 | if (options.className !== undefined)
|
323 | this.className = options.className;
|
324 | if (options.separator !== undefined)
|
325 | this.separator = options.separator;
|
326 | }
|
327 | }
|
328 | |
329 |
|
330 |
|
331 |
|
332 |
|
333 |
|
334 | static hasChildTextNode(ele) {
|
335 | for (const child of ele.childNodes) {
|
336 | if (child.nodeType === NODETYPE.TEXT)
|
337 | return true;
|
338 | }
|
339 | return false;
|
340 | }
|
341 | |
342 |
|
343 |
|
344 |
|
345 |
|
346 |
|
347 |
|
348 | applyToElement(element) {
|
349 | for (const block of this.getBlocks(element)) {
|
350 | assert(!block.isEmpty());
|
351 | this.applyToParagraph(block);
|
352 | }
|
353 | }
|
354 | |
355 |
|
356 |
|
357 |
|
358 |
|
359 |
|
360 | *getBlocks(element, parent) {
|
361 | assert(element.nodeType === NodeType.ELEMENT_NODE);
|
362 |
|
363 | if (this.className && element.classList.contains(this.className))
|
364 | return;
|
365 | const action = actionForElement(element);
|
366 | if (action === DomAction.Skip)
|
367 | return;
|
368 | if (action === DomAction.Break) {
|
369 | if (parent && !parent.isEmpty()) {
|
370 | parent.setHasBreakOpportunityAfter();
|
371 | yield parent;
|
372 | parent.nodes = [];
|
373 | }
|
374 | assert(!element.firstChild);
|
375 | return;
|
376 | }
|
377 | if (action === DomAction.BreakOpportunity) {
|
378 | if (parent)
|
379 | parent.setHasBreakOpportunityAfter();
|
380 | return;
|
381 | }
|
382 |
|
383 |
|
384 | assert(action === DomAction.Block ||
|
385 | action === DomAction.Inline ||
|
386 | action === DomAction.NoBreak);
|
387 | const isNewBlock = !parent || action === DomAction.Block;
|
388 | const block = isNewBlock ? new Paragraph(element) : parent;
|
389 |
|
390 |
|
391 | for (const child of element.childNodes) {
|
392 | switch (child.nodeType) {
|
393 | case NodeType.ELEMENT_NODE:
|
394 | for (const childBlock of this.getBlocks(child, block))
|
395 | yield childBlock;
|
396 | break;
|
397 | case NodeType.TEXT_NODE:
|
398 | if (action === DomAction.NoBreak) {
|
399 | const text = child.nodeValue;
|
400 | if (text) {
|
401 | block.nodes.push(new NodeOrText(text));
|
402 | }
|
403 | break;
|
404 | }
|
405 | block.nodes.push(new NodeOrText(child));
|
406 | break;
|
407 | }
|
408 | }
|
409 |
|
410 | if (isNewBlock && !block.isEmpty())
|
411 | yield block;
|
412 | }
|
413 | |
414 |
|
415 |
|
416 |
|
417 | applyToParagraph(paragraph) {
|
418 | assert(paragraph.nodes.length > 0);
|
419 | if (!paragraph.nodes.some(node => node.canSplit))
|
420 | return;
|
421 | const text = paragraph.text;
|
422 |
|
423 | if (/^\s*$/.test(text))
|
424 | return;
|
425 |
|
426 | const boundaries = this.parser_.parseBoundaries(text);
|
427 |
|
428 | if (boundaries.length <= 0)
|
429 | return;
|
430 |
|
431 |
|
432 | assert(boundaries[0] > 0);
|
433 | assert(boundaries.every((x, i) => i === 0 || x > boundaries[i - 1]));
|
434 | assert(boundaries[boundaries.length - 1] < text.length);
|
435 | const adjustedBoundaries = paragraph.excludeForcedOpportunities(boundaries);
|
436 |
|
437 | adjustedBoundaries.push(text.length + 1);
|
438 | this.splitNodes(paragraph.nodes, adjustedBoundaries);
|
439 | this.applyBlockStyle(paragraph.element);
|
440 | }
|
441 | |
442 |
|
443 |
|
444 |
|
445 |
|
446 | splitNodes(nodes, boundaries) {
|
447 | var _a;
|
448 | assert(boundaries.length > 0);
|
449 | assert(boundaries.every((x, i) => i === 0 || x > boundaries[i - 1]));
|
450 | const textLen = nodes.reduce((sum, node) => sum + node.length, 0);
|
451 |
|
452 | assert(boundaries[boundaries.length - 1] > textLen);
|
453 |
|
454 | let boundary_index = 0;
|
455 | let boundary = boundaries[0];
|
456 | assert(boundary > 0);
|
457 | let nodeStart = 0;
|
458 | let lastNode = null;
|
459 | for (const node of nodes) {
|
460 | assert(boundary >= nodeStart);
|
461 | assert(node.chunks.length === 0);
|
462 | const nodeText = node.text;
|
463 | if (!nodeText)
|
464 | continue;
|
465 | const nodeLength = nodeText.length;
|
466 | const nodeEnd = nodeStart + nodeLength;
|
467 | assert(!lastNode || lastNode.canSplit);
|
468 | if (!node.canSplit) {
|
469 |
|
470 |
|
471 | if (lastNode && boundary === nodeStart) {
|
472 | if (lastNode.chunks.length === 0)
|
473 | lastNode.chunks.push((_a = lastNode.text) !== null && _a !== void 0 ? _a : '');
|
474 | lastNode.chunks.push('');
|
475 | }
|
476 | while (boundary < nodeEnd) {
|
477 | boundary = boundaries[++boundary_index];
|
478 | }
|
479 | lastNode = null;
|
480 | nodeStart = nodeEnd;
|
481 | continue;
|
482 | }
|
483 |
|
484 | lastNode = node;
|
485 | if (boundary >= nodeEnd) {
|
486 | nodeStart = nodeEnd;
|
487 | continue;
|
488 | }
|
489 |
|
490 | const chunks = node.chunks;
|
491 | let chunkStartInNode = 0;
|
492 | while (boundary < nodeEnd) {
|
493 | const boundaryInNode = boundary - nodeStart;
|
494 | assert(boundaryInNode >= chunkStartInNode);
|
495 | chunks.push(nodeText.slice(chunkStartInNode, boundaryInNode));
|
496 | chunkStartInNode = boundaryInNode;
|
497 | boundary = boundaries[++boundary_index];
|
498 | }
|
499 |
|
500 | assert(chunkStartInNode < nodeLength);
|
501 | chunks.push(nodeText.slice(chunkStartInNode));
|
502 | nodeStart = nodeEnd;
|
503 | }
|
504 |
|
505 | assert(nodeStart === textLen);
|
506 | assert(boundary_index < boundaries.length);
|
507 | assert(boundaries[boundary_index] >= textLen);
|
508 |
|
509 | for (const node of nodes) {
|
510 | node.split(this.separator);
|
511 | }
|
512 | }
|
513 | |
514 |
|
515 |
|
516 |
|
517 | applyBlockStyle(element) {
|
518 | if (this.className) {
|
519 | element.classList.add(this.className);
|
520 | return;
|
521 | }
|
522 | applyWrapStyle(element);
|
523 | }
|
524 | }
|
525 |
|
526 |
|
527 |
|
528 | export class HTMLProcessingParser extends Parser {
|
529 | constructor(model, htmlProcessorOptions = {
|
530 | separator: ZWSP,
|
531 | }) {
|
532 | super(model);
|
533 | this.htmlProcessor = new HTMLProcessor(this, htmlProcessorOptions);
|
534 | }
|
535 | |
536 |
|
537 |
|
538 |
|
539 |
|
540 |
|
541 |
|
542 | applyElement(parentElement) {
|
543 | console.warn('`applyElement` is deprecated. Please use `applyToElement` instead. ' +
|
544 | '`applyElement` will be removed in v0.7.0.');
|
545 | this.applyToElement(parentElement);
|
546 | }
|
547 | |
548 |
|
549 |
|
550 |
|
551 | applyToElement(parentElement) {
|
552 | this.htmlProcessor.applyToElement(parentElement);
|
553 | }
|
554 | |
555 |
|
556 |
|
557 |
|
558 |
|
559 |
|
560 | translateHTMLString(html) {
|
561 | if (html === '')
|
562 | return html;
|
563 | const doc = parseFromString(html);
|
564 | if (HTMLProcessor.hasChildTextNode(doc.body)) {
|
565 | const wrapper = doc.createElement('span');
|
566 | wrapper.append(...doc.body.childNodes);
|
567 | doc.body.append(wrapper);
|
568 | }
|
569 | this.applyToElement(doc.body.childNodes[0]);
|
570 | return doc.body.innerHTML;
|
571 | }
|
572 | }
|
573 |
|
\ | No newline at end of file |