1 | var options = {};
|
2 |
|
3 | function setup(opt) {
|
4 | options = {
|
5 | 'attr-to-remove': [
|
6 | 'align',
|
7 | 'valign',
|
8 | 'bgcolor',
|
9 | 'color',
|
10 | 'width',
|
11 | 'height',
|
12 | 'border',
|
13 | 'cellpadding',
|
14 | 'cellspacing'
|
15 | ],
|
16 | 'block-tags': [
|
17 | 'h1',
|
18 | 'h2',
|
19 | 'h3',
|
20 | 'h4',
|
21 | 'h5',
|
22 | 'h6',
|
23 | 'div',
|
24 | 'p',
|
25 | 'table',
|
26 | 'tr',
|
27 | 'td',
|
28 | 'blockquote',
|
29 | 'hr'
|
30 | ],
|
31 | 'break-after-br': true,
|
32 | 'close-empty-tags': false,
|
33 | 'empty-tags': [
|
34 | 'br',
|
35 | 'hr',
|
36 | 'img'
|
37 | ],
|
38 | 'fix-end-tags': true,
|
39 | 'indent': ' ',
|
40 | 'pretty': true,
|
41 | 'remove-comments': false,
|
42 | 'remove-empty-paras': false,
|
43 | 'tags-to-remove': [
|
44 | 'font'
|
45 | ]
|
46 | };
|
47 |
|
48 | if (!opt) {
|
49 | return;
|
50 | }
|
51 |
|
52 | options['attr-to-remove'] = opt['attr-to-remove'] || options['attr-to-remove'];
|
53 | options['block-tags'] = opt['block-tags'] || options['block-tags'];
|
54 | options['break-after-br'] = opt['break-after-br'] === false ? false : true;
|
55 | options['close-empty-tags'] = opt['close-empty-tags'] === true ? true : false;
|
56 | options['empty-tags'] = opt['empty-tags'] || options['empty-tags'];
|
57 | options['fix-end-tags'] = opt['fix-end-tags'] === false ? false : true;
|
58 | options['indent'] = opt['indent'] || options['indent'];
|
59 | options['pretty'] = opt['pretty'] === false ? false : true;
|
60 | options['remove-comments'] = opt['remove-comments'] === true ? true : false;
|
61 | options['remove-empty-paras'] = opt['remove-empty-paras'] === true ? true : false;
|
62 | options['tags-to-remove'] = opt['tags-to-remove'] || options['tags-to-remove'];
|
63 |
|
64 | if (opt['add-attr-to-remove']) {
|
65 | options['attr-to-remove'] = options['attr-to-remove'].concat(opt['add-attr-to-remove']);
|
66 | }
|
67 |
|
68 | if (opt['add-block-tags']) {
|
69 | options['block-tags'] = options['block-tags'].concat(opt['add-block-tags']);
|
70 | }
|
71 |
|
72 | if (opt['add-empty-tags']) {
|
73 | options['empty-tags'] = options['empty-tags'].concat(opt['add-empty-tags']);
|
74 | }
|
75 |
|
76 | if (opt['add-tags-to-remove']) {
|
77 | options['tags-to-remove'] = options['tags-to-remove'].concat(opt['add-tags-to-remove']);
|
78 | }
|
79 | }
|
80 |
|
81 | function replaceWhiteSpace(html) {
|
82 | return html.replace(/\s/g, ' ');
|
83 | }
|
84 |
|
85 | function removeExtraSpaces(html) {
|
86 | return html.replace(/ {2,}/g, ' ');
|
87 | }
|
88 |
|
89 | function closeEmptyTag(tag) {
|
90 | return tag.replace(/ ?\/?>/, '/>');
|
91 | }
|
92 |
|
93 | function removeTrailingSlash(tag) {
|
94 | return tag.replace(/ ?\/>/, '>');
|
95 | }
|
96 |
|
97 | function cleanAttributes(tag) {
|
98 | return tag.replace(/ (\w+)=['"].+?['"]/g, function (attribute, attributeName) {
|
99 | if (options['attr-to-remove'].indexOf(attributeName) > -1) {
|
100 | return '';
|
101 | }
|
102 |
|
103 | return attribute;
|
104 | });
|
105 | }
|
106 |
|
107 | function cleanTags(html) {
|
108 | var openTags = [];
|
109 |
|
110 | html = html.replace(/<\/?(\w+).*?>/g, function (tag, tagName) {
|
111 | tag = tag.toLowerCase();
|
112 | tagName = tagName.toLowerCase();
|
113 |
|
114 | if (options['tags-to-remove'].indexOf(tagName) > -1) {
|
115 | return '';
|
116 | }
|
117 |
|
118 | if (options['empty-tags'].indexOf(tagName) > -1) {
|
119 | if (options['close-empty-tags']) {
|
120 | tag = closeEmptyTag(tag);
|
121 | } else {
|
122 | tag = removeTrailingSlash(tag);
|
123 | }
|
124 |
|
125 | return cleanAttributes(tag);
|
126 | }
|
127 |
|
128 | if (tag.indexOf('</') == -1) {
|
129 |
|
130 | openTags.unshift(tagName);
|
131 |
|
132 | return cleanAttributes(tag);
|
133 | }
|
134 |
|
135 | if (openTags[0] == tagName) {
|
136 |
|
137 | openTags.shift();
|
138 |
|
139 | return tag;
|
140 | }
|
141 |
|
142 | var openTagIndex = openTags.indexOf(tagName);
|
143 |
|
144 | if (openTagIndex > -1) {
|
145 |
|
146 | return '</' + openTags.splice(0, openTagIndex + 1).join('></') + '>';
|
147 | }
|
148 |
|
149 |
|
150 | return '';
|
151 | });
|
152 |
|
153 | if (openTags.length) {
|
154 |
|
155 | html += '</' + openTags.join('></') + '>';
|
156 | }
|
157 |
|
158 | return html;
|
159 | }
|
160 |
|
161 | function removeComments(html) {
|
162 | return html.replace(/<!--.*?-->/g, '');
|
163 | }
|
164 |
|
165 | function removeEmptyParagraphs(html) {
|
166 | return html.replace(/<p( \w+=['"].+?['"])?>\s*<\/p>/g, '');
|
167 | }
|
168 |
|
169 | function addLineBreaks(html) {
|
170 | return html.replace(/<\/?(\w+).*?>/g, function (tag, tagName) {
|
171 | if (options['block-tags'].indexOf(tagName) > -1) {
|
172 | return '\n' + tag + '\n';
|
173 | }
|
174 |
|
175 | if (tagName == 'br' && options['break-after-br']) {
|
176 | return tag + '\n';
|
177 | }
|
178 |
|
179 | return tag;
|
180 | });
|
181 | }
|
182 |
|
183 | function removeBlankLines(html) {
|
184 | return html.replace(/\s{2,}/g, '\n');
|
185 | }
|
186 |
|
187 | function indentLine(line, indentLevel) {
|
188 | var indent = '';
|
189 |
|
190 | for (var i = 0; i < indentLevel; i++) {
|
191 | indent += options['indent'];
|
192 | }
|
193 |
|
194 | return indent + line;
|
195 | }
|
196 |
|
197 | function indent(html) {
|
198 | var indentLevel = 0;
|
199 |
|
200 | return html.replace(/.*\n/g, function (line) {
|
201 | var match = line.match(/<\/?(\w+).*?>/);
|
202 |
|
203 | if (!match) {
|
204 | return indentLine(line, indentLevel);
|
205 | }
|
206 |
|
207 | var tag = match[0],
|
208 | tagName = match[1];
|
209 |
|
210 | if (options['block-tags'].indexOf(tagName) > -1) {
|
211 | if (tag.indexOf('</') == -1) {
|
212 | line = indentLine(line, indentLevel);
|
213 | indentLevel++;
|
214 | } else {
|
215 | indentLevel--;
|
216 | line = indentLine(line, indentLevel);
|
217 | }
|
218 |
|
219 | return line;
|
220 | }
|
221 |
|
222 | return indentLine(line, indentLevel);
|
223 | });
|
224 | }
|
225 |
|
226 | function clean(html, opt) {
|
227 | setup(opt);
|
228 |
|
229 | html = replaceWhiteSpace(html);
|
230 | html = removeExtraSpaces(html);
|
231 | html = cleanTags(html);
|
232 |
|
233 | if (options['remove-comments']) {
|
234 | html = removeComments(html);
|
235 | }
|
236 |
|
237 | if (options['remove-empty-paras']) {
|
238 | html = removeEmptyParagraphs(html);
|
239 | }
|
240 |
|
241 | if (options['pretty']) {
|
242 | html = addLineBreaks(html);
|
243 | html = removeBlankLines(html);
|
244 | html = indent(html);
|
245 | }
|
246 |
|
247 | return html.trim();
|
248 | }
|
249 |
|
250 | module.exports = {
|
251 | clean: clean
|
252 | };
|