1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 | import { DomConverter, ViewDocument } from 'ckeditor5/src/engine.js';
|
10 | import { normalizeSpacing, normalizeSpacerunSpans } from './space.js';
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 | export function parseHtml(htmlString, stylesProcessor) {
|
17 | const domParser = new DOMParser();
|
18 |
|
19 | htmlString = htmlString.replace(/<!--\[if gte vml 1]>/g, '');
|
20 |
|
21 |
|
22 | htmlString = htmlString.replace(/<o:SmartTagType(?:\s+[^\s>=]+(?:="[^"]*")?)*\s*\/?>/gi, '');
|
23 | const normalizedHtml = normalizeSpacing(cleanContentAfterBody(htmlString));
|
24 |
|
25 | const htmlDocument = domParser.parseFromString(normalizedHtml, 'text/html');
|
26 | normalizeSpacerunSpans(htmlDocument);
|
27 |
|
28 | const bodyString = htmlDocument.body.innerHTML;
|
29 |
|
30 | const bodyView = documentToView(htmlDocument, stylesProcessor);
|
31 |
|
32 | const stylesObject = extractStyles(htmlDocument);
|
33 | return {
|
34 | body: bodyView,
|
35 | bodyString,
|
36 | styles: stylesObject.styles,
|
37 | stylesString: stylesObject.stylesString
|
38 | };
|
39 | }
|
40 |
|
41 |
|
42 |
|
43 |
|
44 |
|
45 | function documentToView(htmlDocument, stylesProcessor) {
|
46 | const viewDocument = new ViewDocument(stylesProcessor);
|
47 | const domConverter = new DomConverter(viewDocument, { renderingMode: 'data' });
|
48 | const fragment = htmlDocument.createDocumentFragment();
|
49 | const nodes = htmlDocument.body.childNodes;
|
50 | while (nodes.length > 0) {
|
51 | fragment.appendChild(nodes[0]);
|
52 | }
|
53 | return domConverter.domToView(fragment, { skipComments: true });
|
54 | }
|
55 |
|
56 |
|
57 |
|
58 |
|
59 |
|
60 | function extractStyles(htmlDocument) {
|
61 | const styles = [];
|
62 | const stylesString = [];
|
63 | const styleTags = Array.from(htmlDocument.getElementsByTagName('style'));
|
64 | for (const style of styleTags) {
|
65 | if (style.sheet && style.sheet.cssRules && style.sheet.cssRules.length) {
|
66 | styles.push(style.sheet);
|
67 | stylesString.push(style.innerHTML);
|
68 | }
|
69 | }
|
70 | return {
|
71 | styles,
|
72 | stylesString: stylesString.join(' ')
|
73 | };
|
74 | }
|
75 |
|
76 |
|
77 |
|
78 |
|
79 |
|
80 |
|
81 |
|
82 |
|
83 |
|
84 |
|
85 |
|
86 | function cleanContentAfterBody(htmlString) {
|
87 | const bodyCloseTag = '</body>';
|
88 | const htmlCloseTag = '</html>';
|
89 | const bodyCloseIndex = htmlString.indexOf(bodyCloseTag);
|
90 | if (bodyCloseIndex < 0) {
|
91 | return htmlString;
|
92 | }
|
93 | const htmlCloseIndex = htmlString.indexOf(htmlCloseTag, bodyCloseIndex + bodyCloseTag.length);
|
94 | return htmlString.substring(0, bodyCloseIndex + bodyCloseTag.length) +
|
95 | (htmlCloseIndex >= 0 ? htmlString.substring(htmlCloseIndex) : '');
|
96 | }
|