UNPKG

6.28 kBJavaScriptView Raw
1var options = {};
2
3function setup(opt) {
4 options = {
5 'attr-to-remove': [
6 'align',
7 'valign',
8 'bgcolor',
9 'color',
10 'width',
11 'height',
12 'border',
13 'cellpadding',
14 'cellspacing'
15 ],
16 'block-tags': [
17 'h1',
18 'h2',
19 'h3',
20 'h4',
21 'h5',
22 'h6',
23 'div',
24 'p',
25 'table',
26 'tr',
27 'td',
28 'blockquote',
29 'hr'
30 ],
31 'break-after-br': true,
32 'close-empty-tags': false,
33 'empty-tags': [
34 'br',
35 'hr',
36 'img'
37 ],
38 'fix-end-tags': true,
39 'indent': ' ',
40 'pretty': true,
41 'remove-comments': false,
42 'remove-empty-paras': false,
43 'tags-to-remove': [
44 'font'
45 ]
46 };
47
48 if (!opt) {
49 return;
50 }
51
52 options['attr-to-remove'] = opt['attr-to-remove'] || options['attr-to-remove'];
53 options['block-tags'] = opt['block-tags'] || options['block-tags'];
54 options['break-after-br'] = opt['break-after-br'] === false ? false : true;
55 options['close-empty-tags'] = opt['close-empty-tags'] === true ? true : false;
56 options['empty-tags'] = opt['empty-tags'] || options['empty-tags'];
57 options['fix-end-tags'] = opt['fix-end-tags'] === false ? false : true;
58 options['indent'] = opt['indent'] || options['indent'];
59 options['pretty'] = opt['pretty'] === false ? false : true;
60 options['remove-comments'] = opt['remove-comments'] === true ? true : false;
61 options['remove-empty-paras'] = opt['remove-empty-paras'] === true ? true : false;
62 options['tags-to-remove'] = opt['tags-to-remove'] || options['tags-to-remove'];
63
64 if (opt['add-attr-to-remove']) {
65 options['attr-to-remove'] = options['attr-to-remove'].concat(opt['add-attr-to-remove']);
66 }
67
68 if (opt['add-block-tags']) {
69 options['block-tags'] = options['block-tags'].concat(opt['add-block-tags']);
70 }
71
72 if (opt['add-empty-tags']) {
73 options['empty-tags'] = options['empty-tags'].concat(opt['add-empty-tags']);
74 }
75
76 if (opt['add-tags-to-remove']) {
77 options['tags-to-remove'] = options['tags-to-remove'].concat(opt['add-tags-to-remove']);
78 }
79}
80
81function replaceWhiteSpace(html) {
82 return html.replace(/\s/g, ' ');
83}
84
85function removeExtraSpaces(html) {
86 return html.replace(/ {2,}/g, ' ');
87}
88
89function closeEmptyTag(tag) {
90 return tag.replace(/ ?\/?>/, '/>');
91}
92
93function removeTrailingSlash(tag) {
94 return tag.replace(/ ?\/>/, '>');
95}
96
97function cleanAttributes(tag) {
98 return tag.replace(/ (\w+)=['"].+?['"]/g, function (attribute, attributeName) {
99 if (options['attr-to-remove'].indexOf(attributeName) > -1) {
100 return '';
101 }
102
103 return attribute;
104 });
105}
106
107function cleanTags(html) {
108 var openTags = [];
109
110 html = html.replace(/<\/?(\w+).*?>/g, function (tag, tagName) {
111 tag = tag.toLowerCase();
112 tagName = tagName.toLowerCase();
113
114 if (options['tags-to-remove'].indexOf(tagName) > -1) {
115 return '';
116 }
117
118 if (options['empty-tags'].indexOf(tagName) > -1) {
119 if (options['close-empty-tags']) {
120 tag = closeEmptyTag(tag);
121 } else {
122 tag = removeTrailingSlash(tag);
123 }
124
125 return cleanAttributes(tag);
126 }
127
128 if (tag.indexOf('</') == -1) {
129 // open tag
130 openTags.unshift(tagName);
131
132 return cleanAttributes(tag);
133 }
134
135 if (openTags[0] == tagName) {
136 // close tag
137 openTags.shift();
138
139 return tag;
140 }
141
142 var openTagIndex = openTags.indexOf(tagName);
143
144 if (openTagIndex > -1) {
145 // tags are out of order - close previous tags, then close this tag
146 return '</' + openTags.splice(0, openTagIndex + 1).join('></') + '>';
147 }
148
149 // tag was never opened or was already closed - discard
150 return '';
151 });
152
153 if (openTags.length) {
154 // append remaining tags
155 html += '</' + openTags.join('></') + '>';
156 }
157
158 return html;
159}
160
161function removeComments(html) {
162 return html.replace(/<!--.*?-->/g, '');
163}
164
165function removeEmptyParagraphs(html) {
166 return html.replace(/<p( \w+=['"].+?['"])?>\s*<\/p>/g, '');
167}
168
169function addLineBreaks(html) {
170 return html.replace(/<\/?(\w+).*?>/g, function (tag, tagName) {
171 if (options['block-tags'].indexOf(tagName) > -1) {
172 return '\n' + tag + '\n';
173 }
174
175 if (tagName == 'br' && options['break-after-br']) {
176 return tag + '\n';
177 }
178
179 return tag;
180 });
181}
182
183function removeBlankLines(html) {
184 return html.replace(/\s{2,}/g, '\n');
185}
186
187function indentLine(line, indentLevel) {
188 var indent = '';
189
190 for (var i = 0; i < indentLevel; i++) {
191 indent += options['indent'];
192 }
193
194 return indent + line;
195}
196
197function indent(html) {
198 var indentLevel = 0;
199
200 return html.replace(/.*\n/g, function (line) {
201 var match = line.match(/<\/?(\w+).*?>/);
202
203 if (!match) {
204 return indentLine(line, indentLevel);
205 }
206
207 var tag = match[0],
208 tagName = match[1];
209
210 if (options['block-tags'].indexOf(tagName) > -1) {
211 if (tag.indexOf('</') == -1) {
212 line = indentLine(line, indentLevel);
213 indentLevel++;
214 } else {
215 indentLevel--;
216 line = indentLine(line, indentLevel);
217 }
218
219 return line;
220 }
221
222 return indentLine(line, indentLevel);
223 });
224}
225
226function clean(html, opt) {
227 setup(opt);
228
229 html = replaceWhiteSpace(html);
230 html = removeExtraSpaces(html);
231 html = cleanTags(html);
232
233 if (options['remove-comments']) {
234 html = removeComments(html);
235 }
236
237 if (options['remove-empty-paras']) {
238 html = removeEmptyParagraphs(html);
239 }
240
241 if (options['pretty']) {
242 html = addLineBreaks(html);
243 html = removeBlankLines(html);
244 html = indent(html);
245 }
246
247 return html.trim();
248}
249
250module.exports = {
251 clean: clean
252};