UNPKG

5.76 kBJavaScriptView Raw
1var htmlparser = require('htmlparser2'),
2 options = {};
3
4function setup(opt) {
5 options = {
6 'attr-to-remove': [
7 'align',
8 'bgcolor',
9 'border',
10 'cellpadding',
11 'cellspacing',
12 'color',
13 'disabled',
14 'height',
15 'target',
16 'valign',
17 'width'
18 ],
19 'block-tags': [
20 'blockquote',
21 'div',
22 'h1',
23 'h2',
24 'h3',
25 'h4',
26 'h5',
27 'h6',
28 'hr',
29 'p',
30 'table',
31 'td',
32 'tr'
33 ],
34 'break-around-comments': true,
35 'break-after-br': true,
36 'empty-tags': [
37 'br',
38 'hr',
39 'img'
40 ],
41 'indent': ' ',
42 'remove-comments': false,
43 'remove-empty-paras': false,
44 'tags-to-remove': [
45 'center',
46 'font'
47 ]
48 };
49
50 if (!opt) {
51 return;
52 }
53
54 options['attr-to-remove'] = opt['attr-to-remove'] || options['attr-to-remove'];
55 options['block-tags'] = opt['block-tags'] || options['block-tags'];
56 options['break-around-comments'] = opt['break-around-comments'] === false ? false : true;
57 options['break-after-br'] = opt['break-after-br'] === false ? false : true;
58 options['empty-tags'] = opt['empty-tags'] || options['empty-tags'];
59 options['indent'] = opt['indent'] || options['indent'];
60 options['remove-comments'] = opt['remove-comments'] === true ? true : false;
61 options['remove-empty-paras'] = opt['remove-empty-paras'] === true ? true : false;
62 options['tags-to-remove'] = opt['tags-to-remove'] || options['tags-to-remove'];
63
64 if (opt['add-attr-to-remove']) {
65 options['attr-to-remove'] = options['attr-to-remove'].concat(opt['add-attr-to-remove']);
66 }
67
68 if (opt['add-block-tags']) {
69 options['block-tags'] = options['block-tags'].concat(opt['add-block-tags']);
70 }
71
72 if (opt['add-empty-tags']) {
73 options['empty-tags'] = options['empty-tags'].concat(opt['add-empty-tags']);
74 }
75
76 if (opt['add-tags-to-remove']) {
77 options['tags-to-remove'] = options['tags-to-remove'].concat(opt['add-tags-to-remove']);
78 }
79}
80
81function isEmpty(node) {
82 if (node.type == 'text' || node.type == 'comment') {
83 return !node.data.trim();
84 }
85
86 return !node.children.length || node.children.every(isEmpty);
87}
88
89function renderText(node) {
90 return node.data.replace(/\s+/g, ' ');
91}
92
93function renderComment(node) {
94 if (options['remove-comments']) {
95 return '';
96 }
97
98 if (options['break-around-comments']) {
99 return '\n' + '<!--' + node.data + '-->' + '\n';
100 }
101
102 return '<!--' + node.data + '-->';
103}
104
105function renderTag(node) {
106 if (options['remove-empty-paras'] && node.name == 'p' && isEmpty(node)) {
107 return '';
108 }
109
110 if (options['tags-to-remove'].indexOf(node.name) > -1) {
111 if (!node.children.length) {
112 return '';
113 }
114
115 return render(node.children);
116 }
117
118 var openTag = '<' + node.name,
119 closeTag;
120
121 for (var attrib in node.attribs) {
122 if (options['attr-to-remove'].indexOf(attrib) == -1) {
123 openTag += ' ' + attrib + '="' + node.attribs[attrib] + '"';
124 }
125 }
126
127 openTag += '>';
128
129 if (options['empty-tags'].indexOf(node.name) > -1) {
130 if (options['break-after-br'] && node.name == 'br') {
131 return openTag + '\n';
132 }
133
134 return openTag;
135 }
136
137 closeTag = '</' + node.name + '>';
138
139 if (options['block-tags'].indexOf(node.name) > -1) {
140 openTag = '\n' + openTag + '\n';
141 closeTag = '\n' + closeTag + '\n';
142 }
143
144 if (!node.children.length) {
145 return openTag + closeTag;
146 }
147
148 return openTag + render(node.children) + closeTag;
149}
150
151function render(nodes) {
152 var html = '';
153
154 nodes.forEach(function (node) {
155 if (node.type == 'root') {
156 html += render(node.children);
157 return;
158 }
159
160 if (node.type == 'text') {
161 html += renderText(node);
162 return;
163 }
164
165 if (node.type == 'comment') {
166 html += renderComment(node);
167 return;
168 }
169
170 html += renderTag(node);
171 });
172
173 return html.replace(/\s{2,}/g, '\n');
174}
175
176function indentLine(line, indentLevel) {
177 var indent = '';
178
179 for (var i = 0; i < indentLevel; i++) {
180 indent += options['indent'];
181 }
182
183 return indent + line;
184}
185
186function indent(html) {
187 var indentLevel = 0;
188
189 return html.replace(/.*\n/g, function (line) {
190 var match = line.match(/<\/?(\w+).*?>/);
191
192 if (!match) {
193 return indentLine(line, indentLevel);
194 }
195
196 var tag = match[0],
197 tagName = match[1];
198
199 if (options['block-tags'].indexOf(tagName) > -1) {
200 if (tag.indexOf('</') == -1) {
201 line = indentLine(line, indentLevel);
202 indentLevel++;
203 } else {
204 indentLevel--;
205 line = indentLine(line, indentLevel);
206 }
207
208 return line;
209 }
210
211 return indentLine(line, indentLevel);
212 });
213}
214
215function clean(html, opt, callback) {
216 if (typeof opt == 'function') {
217 callback = opt;
218 opt = null;
219 }
220
221 setup(opt);
222
223 var handler = new htmlparser.DomHandler(function (err, dom) {
224 if (err) {
225 throw err;
226 }
227
228 var html = render(dom);
229 html = indent(html).trim();
230
231 callback(html);
232 });
233
234 var parser = new htmlparser.Parser(handler);
235 parser.write(html);
236 parser.done();
237}
238
239module.exports = {
240 clean: clean
241};