UNPKG

7.57 kBJavaScriptView Raw
1"use strict";
2var __importDefault = (this && this.__importDefault) || function (mod) {
3 return (mod && mod.__esModule) ? mod : { "default": mod };
4};
5Object.defineProperty(exports, "__esModule", { value: true });
6var back_1 = __importDefault(require("./back"));
7var comment_1 = __importDefault(require("./nodes/comment"));
8var html_1 = __importDefault(require("./nodes/html"));
9var text_1 = __importDefault(require("./nodes/text"));
10var comment_2 = require("./nodes/comment");
11exports.CommentNode = comment_2.default;
12var html_2 = require("./nodes/html");
13exports.HTMLElement = html_2.default;
14var node_1 = require("./nodes/node");
15exports.Node = node_1.default;
16var text_2 = require("./nodes/text");
17exports.TextNode = text_2.default;
18// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
19var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
20var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]+)"|'([^']+)'|(\S+))/ig;
21var kSelfClosingElements = {
22 area: true,
23 base: true,
24 br: true,
25 col: true,
26 hr: true,
27 img: true,
28 input: true,
29 link: true,
30 meta: true,
31 source: true
32};
33var kElementsClosedByOpening = {
34 li: { li: true },
35 p: { p: true, div: true },
36 b: { div: true },
37 td: { td: true, th: true },
38 th: { td: true, th: true },
39 h1: { h1: true },
40 h2: { h2: true },
41 h3: { h3: true },
42 h4: { h4: true },
43 h5: { h5: true },
44 h6: { h6: true }
45};
46var kElementsClosedByClosing = {
47 li: { ul: true, ol: true },
48 a: { div: true },
49 b: { div: true },
50 i: { div: true },
51 p: { div: true },
52 td: { tr: true, table: true },
53 th: { tr: true, table: true }
54};
55var kBlockTextElements = {
56 script: true,
57 noscript: true,
58 style: true,
59 pre: true
60};
61/**
62 * Parses HTML and returns a root element
63 * Parse a chuck of HTML source.
64 * @param {string} data html
65 * @return {HTMLElement} root element
66 */
67function parse(data, options) {
68 if (options === void 0) { options = {}; }
69 var root = new html_1.default(null, {});
70 var currentParent = root;
71 var stack = [root];
72 var lastTextPos = -1;
73 var match;
74 while (match = kMarkupPattern.exec(data)) {
75 if (lastTextPos > -1) {
76 if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) {
77 // if has content
78 var text = data.substring(lastTextPos, kMarkupPattern.lastIndex - match[0].length);
79 currentParent.appendChild(new text_1.default(text));
80 }
81 }
82 lastTextPos = kMarkupPattern.lastIndex;
83 if (match[0][1] === '!') {
84 // this is a comment
85 if (options.comment) {
86 // Only keep what is in between <!-- and -->
87 var text = data.substring(lastTextPos - 3, lastTextPos - match[0].length + 4);
88 currentParent.appendChild(new comment_1.default(text));
89 }
90 continue;
91 }
92 if (options.lowerCaseTagName)
93 match[2] = match[2].toLowerCase();
94 if (!match[1]) {
95 // not </ tags
96 var attrs = {};
97 for (var attMatch = void 0; attMatch = kAttributePattern.exec(match[3]);) {
98 attrs[attMatch[2]] = attMatch[4] || attMatch[5] || attMatch[6];
99 }
100 var tagName = currentParent.tagName;
101 if (!match[4] && kElementsClosedByOpening[tagName]) {
102 if (kElementsClosedByOpening[tagName][match[2]]) {
103 stack.pop();
104 currentParent = back_1.default(stack);
105 }
106 }
107 currentParent = currentParent.appendChild(new html_1.default(match[2], attrs, match[3]));
108 stack.push(currentParent);
109 if (kBlockTextElements[match[2]]) {
110 // a little test to find next </script> or </style> ...
111 var closeMarkup = '</' + match[2] + '>';
112 var index = data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
113 if (options[match[2]]) {
114 var text = void 0;
115 if (index === -1) {
116 // there is no matching ending for the text element.
117 text = data.substr(kMarkupPattern.lastIndex);
118 }
119 else {
120 text = data.substring(kMarkupPattern.lastIndex, index);
121 }
122 if (text.length > 0) {
123 currentParent.appendChild(new text_1.default(text));
124 }
125 }
126 if (index === -1) {
127 lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
128 }
129 else {
130 lastTextPos = kMarkupPattern.lastIndex = index + closeMarkup.length;
131 match[1] = 'true';
132 }
133 }
134 }
135 if (match[1] || match[4] ||
136 kSelfClosingElements[match[2]]) {
137 // </ or /> or <br> etc.
138 while (true) {
139 if (currentParent.tagName === match[2]) {
140 stack.pop();
141 currentParent = back_1.default(stack);
142 break;
143 }
144 else {
145 var tagName = currentParent.tagName;
146 // Trying to close current tag, and move on
147 if (kElementsClosedByClosing[tagName]) {
148 if (kElementsClosedByClosing[tagName][match[2]]) {
149 stack.pop();
150 currentParent = back_1.default(stack);
151 continue;
152 }
153 }
154 // Use aggressive strategy to handle unmatching markups.
155 break;
156 }
157 }
158 }
159 }
160 var valid = !!(stack.length === 1);
161 if (!options.noFix) {
162 var response = root;
163 response.valid = valid;
164 var _loop_1 = function () {
165 // Handle each error elements.
166 var last = stack.pop();
167 var oneBefore = back_1.default(stack);
168 if (last.parentNode && last.parentNode.parentNode) {
169 if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
170 // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
171 oneBefore.removeChild(last);
172 last.childNodes.forEach(function (child) {
173 oneBefore.parentNode.appendChild(child);
174 });
175 stack.pop();
176 }
177 else {
178 // Single error <div> <h3> </div> handle: Just removes <h3>
179 oneBefore.removeChild(last);
180 last.childNodes.forEach(function (child) {
181 oneBefore.appendChild(child);
182 });
183 }
184 }
185 else {
186 // If it's final element just skip.
187 }
188 };
189 while (stack.length > 1) {
190 _loop_1();
191 }
192 response.childNodes.forEach(function (node) {
193 if (node instanceof html_1.default) {
194 node.parentNode = null;
195 }
196 });
197 return response;
198 }
199 else {
200 var response = new text_1.default(data);
201 response.valid = valid;
202 return response;
203 }
204}
205exports.parse = parse;
206exports.default = parse;