UNPKG

8.58 kBJavaScriptView Raw
1var htmlparser = require('htmlparser2'),
2 voidElements = [
3 'area',
4 'base',
5 'basefont',
6 'br',
7 'col',
8 'command',
9 'embed',
10 'frame',
11 'hr',
12 'img',
13 'input',
14 'isindex',
15 'keygen',
16 'link',
17 'meta',
18 'param',
19 'source',
20 'track',
21 'wbr',
22
23 //common self closing svg elements
24 'circle',
25 'ellipse',
26 'line',
27 'path',
28 'polygon',
29 'polyline',
30 'rect',
31 'stop',
32 'use'
33 ],
34 options = {};
35
36function setup(opt) {
37 options = {
38 'break-around-comments': true,
39 'break-around-tags': [
40 'blockquote',
41 'body',
42 'br',
43 'div',
44 'h1',
45 'h2',
46 'h3',
47 'h4',
48 'h5',
49 'h6',
50 'head',
51 'hr',
52 'link',
53 'meta',
54 'p',
55 'script',
56 'style',
57 'table',
58 'td',
59 'title',
60 'tr'
61 ],
62 'indent': ' ',
63 'remove-attributes': [
64 'align',
65 'bgcolor',
66 'border',
67 'cellpadding',
68 'cellspacing',
69 'color',
70 'height',
71 'target',
72 'valign',
73 'width'
74 ],
75 'remove-comments': false,
76 'remove-empty-tags': [],
77 'remove-tags': [
78 'center',
79 'font'
80 ],
81 'replace-nbsp': false,
82 'wrap': 120
83 };
84
85 if (!opt) {
86 return;
87 }
88
89 options['break-around-comments'] = opt['break-around-comments'] === false ? false : true;
90 options['break-around-tags'] = opt['break-around-tags'] || options['break-around-tags'];
91 options['indent'] = opt['indent'] || options['indent'];
92 options['remove-attributes'] = opt['remove-attributes'] || options['remove-attributes'];
93 options['remove-comments'] = opt['remove-comments'] === true ? true : false;
94 options['remove-empty-tags'] = opt['remove-empty-tags'] || options['remove-empty-tags'];
95 options['remove-tags'] = opt['remove-tags'] || options['remove-tags'];
96 options['replace-nbsp'] = opt['replace-nbsp'] === true ? true : false;
97 options['wrap'] = opt['wrap'] >= 0 ? opt['wrap'] : options['wrap'];
98
99 if (opt['add-break-around-tags']) {
100 options['break-around-tags'] = options['break-around-tags'].concat(opt['add-break-around-tags']);
101 }
102
103 if (opt['add-remove-attributes']) {
104 options['remove-attributes'] = options['remove-attributes'].concat(opt['add-remove-attributes']);
105 }
106
107 if (opt['add-remove-tags']) {
108 options['remove-tags'] = options['remove-tags'].concat(opt['add-remove-tags']);
109 }
110}
111
112function breakAround(node) {
113 if (shouldRemove(node)) {
114 return false;
115 }
116
117 if (node.type == 'text') {
118 return false;
119 }
120
121 if (node.type == 'comment') {
122 return options['break-around-comments'];
123 }
124
125 if (options['break-around-tags'].indexOf(node.name) != -1) {
126 return true;
127 }
128
129 return breakWithin(node);
130}
131
132function breakWithin(node) {
133 if (shouldRemove(node)) {
134 return false;
135 }
136
137 if (node.type != 'tag') {
138 return false;
139 }
140
141 return node.children.some(breakAround) || node.children.some(breakWithin);
142}
143
144function isEmpty(node) {
145 if (node.type == 'text') {
146 if (options['replace-nbsp']) {
147 !node.data.replace(/ /g, ' ').trim();
148 }
149
150 return !node.data.trim();
151 }
152
153 if (node.type == 'comment') {
154 return !node.data.trim();
155 }
156
157 return !node.children.length || node.children.every(isEmpty);
158}
159
160function shouldRemove(node) {
161 if (node.type == 'text') {
162 return isEmpty(node);
163 }
164
165 if (node.type == 'comment') {
166 return options['remove-comments'] || isEmpty(node);
167 }
168
169 if (options['remove-empty-tags'].indexOf(node.name) != -1) {
170 return isEmpty(node);
171 }
172
173 return options['remove-tags'].indexOf(node.name) != -1;
174}
175
176function renderText(node) {
177 if (shouldRemove(node)) {
178 return '';
179 }
180
181 var text = node.data;
182
183 if (node.parent && (node.parent.type == 'script' || node.parent.type == 'style')) {
184 return text;
185 }
186
187 if (options['replace-nbsp']) {
188 text = text.replace(/ /g, ' ');
189 }
190
191 if (!node.prev || breakAround(node.prev)) {
192 text = text.trimLeft();
193 }
194
195 if (!node.next || breakAround(node.next)) {
196 text = text.trimRight();
197 }
198
199 // replace all whitespace characters with a single space
200 return text.replace(/\s+/g, ' ');
201}
202
203function renderComment(node) {
204 if (shouldRemove(node)) {
205 return '';
206 }
207
208 var comment = '<!--' + node.data + '-->';
209
210 if (breakAround(node)) {
211 return '\n' + comment + '\n';
212 }
213
214 return comment;
215}
216
217function renderTag(node) {
218 if (shouldRemove(node)) {
219 if (isEmpty(node)) {
220 return '';
221 }
222
223 return render(node.children);
224 }
225
226 var openTag = '<' + node.name;
227
228 for (var attrib in node.attribs) {
229 if (options['remove-attributes'].indexOf(attrib) == -1) {
230 openTag += ' ' + attrib + '="' + node.attribs[attrib] + '"';
231 }
232 }
233
234 openTag += '>';
235
236 if (voidElements.indexOf(node.name) != -1) {
237 if (breakAround(node)) {
238 return '\n' + openTag + '\n';
239 }
240
241 return openTag;
242 }
243
244 var closeTag = '</' + node.name + '>';
245
246 if (breakAround(node)) {
247 openTag = '\n' + openTag;
248 closeTag = closeTag + '\n';
249 }
250
251 if (breakWithin(node)) {
252 openTag = openTag + '\n';
253 closeTag = '\n' + closeTag;
254 }
255
256 return openTag + render(node.children) + closeTag;
257}
258
259function renderDirective(node) {
260 return '<' + node.data + '>';
261}
262
263function render(nodes) {
264 var html = '';
265
266 nodes.forEach(function (node) {
267 if (node.type == 'root') {
268 html += render(node.children);
269 return;
270 }
271
272 if (node.type == 'text') {
273 html += renderText(node);
274 return;
275 }
276
277 if (node.type == 'comment') {
278 html += renderComment(node);
279 return;
280 }
281
282 if (node.type == 'directive') {
283 html += renderDirective(node)
284 return;
285 }
286
287 html += renderTag(node);
288 });
289
290 // remove extra line breaks
291 html = html.replace(/\n+/g, '\n');
292
293 return html;
294}
295
296function getIndent(indentLevel) {
297 var indent = '';
298
299 for (var i = 0; i < indentLevel; i++) {
300 indent += options['indent'];
301 }
302
303 return indent;
304}
305
306function wrap(line, indent) {
307 var bound = line.lastIndexOf(' ', options['wrap']);
308
309 if (bound == -1) {
310 bound = line.indexOf(' ', options['wrap']);
311
312 if (bound == -1) {
313 return line;
314 }
315 }
316
317 var line1 = line.substr(0, bound),
318 line2 = indent + line.substr(bound + 1);
319
320 if (line2.length > options['wrap']) {
321 line2 = wrap(line2, indent);
322 }
323
324 return line1 + '\n' + line2;
325}
326
327function indent(html) {
328 var indentLevel = 0;
329
330 return html.replace(/.*\n/g, function (line) {
331 var openTags = [],
332 tagRegEx = /<\/?(\w+).*?>/g,
333 tag,
334 tagName,
335 result;
336
337 while (result = tagRegEx.exec(line)) {
338 tag = result[0];
339 tagName = result[1];
340
341 if (voidElements.indexOf(tagName) != -1) {
342 continue;
343 }
344
345 if (tag.indexOf('</') == -1) {
346 openTags.push(tag);
347 indentLevel++;
348 } else {
349 openTags.pop();
350 indentLevel--;
351 }
352 }
353
354 var indent = getIndent(indentLevel - openTags.length);
355
356 line = indent + line;
357
358 if (options['wrap'] && line.length > options['wrap']) {
359 line = wrap(line, indent);
360 }
361
362 return line;
363 });
364}
365
366function clean(html, opt, callback) {
367 if (typeof opt == 'function') {
368 callback = opt;
369 opt = null;
370 }
371
372 setup(opt);
373
374 var handler = new htmlparser.DomHandler(function (err, dom) {
375 if (err) {
376 throw err;
377 }
378
379 var html = render(dom);
380 html = indent(html).trim();
381
382 callback(html);
383 });
384
385 var parser = new htmlparser.Parser(handler);
386 parser.write(html);
387 parser.done();
388}
389
390module.exports = {
391 clean: clean
392};