UNPKG

5.1 kBJavaScriptView Raw
1var iconv = require('iconv-lite'),
2 options = {
3 'attr-to-remove': [
4 'align',
5 'valign',
6 'bgcolor',
7 'color',
8 'width',
9 'height',
10 'border',
11 'cellpadding',
12 'cellspacing'
13 ],
14 'block-tags': [
15 'div',
16 'p',
17 'table',
18 'tr',
19 'td',
20 'blockquote',
21 'hr'
22 ],
23 'empty-tags': [
24 'br',
25 'hr',
26 'img'
27 ],
28 'encoding': 'utf-8',
29 'indent': false,
30 'line-breaks': false,
31 'pretty': false,
32 'remove-comments': true,
33 'tags-to-remove': [
34 'font'
35 ]
36 };
37
38function setup(opt) {
39 if (!opt) {
40 return;
41 }
42
43 options['attr-to-remove'] = opt['attr-to-remove'] || options['attr-to-remove'];
44 options['block-tags'] = opt['block-tags'] || options['block-tags'];
45 options['empty-tags'] = opt['empty-tags'] || options['empty-tags'];
46 options['encoding'] = opt['encoding'] || options['encoding'];
47 options['indent'] = opt['indent'] || options['indent'];
48 options['line-breaks'] = opt['line-breaks'] || options['line-breaks'];
49 options['pretty'] = opt['pretty'] || options['pretty'];
50 options['remove-comments'] = opt['remove-comments'] || true;
51 options['tags-to-remove'] = opt['tags-to-remove'] || options['tags-to-remove'];
52
53 if (opt['add-attr-to-remove']) {
54 options['attr-to-remove'] = options['attr-to-remove'].concat(opt['add-attr-to-remove']);
55 }
56
57 if (opt['add-block-tags']) {
58 options['block-tags'] = options['block-tags'].concat(opt['add-block-tags']);
59 }
60
61 if (opt['add-empty-tags']) {
62 options['empty-tags'] = options['empty-tags'].concat(opt['add-empty-tags']);
63 }
64
65 if (opt['add-tags-to-remove']) {
66 options['tags-to-remove'] = options['tags-to-remove'].concat(opt['add-tags-to-remove']);
67 }
68}
69
70function replaceWhiteSpace(html) {
71 return html.replace(/\s/g, ' ');
72}
73
74function removeExtraSpaces(html) {
75 return html.replace(/ {2,}/g, ' ');
76}
77
78function removeTrailingSlash(tag) {
79 return tag.replace(/ ?\/>/, '>');
80}
81
82function cleanAttributes(tag) {
83 return tag.replace(/ (\w+)=['"].+?['"]/g, function (attribute, attributeName) {
84 if (options['attr-to-remove'].indexOf(attributeName) > -1) {
85 return '';
86 }
87
88 return attribute;
89 });
90}
91
92function cleanTags(html) {
93 return html.replace(/<\/?(\w+).*?>/g, function (tag, tagName) {
94 tag = tag.toLowerCase();
95 tagName = tagName.toLowerCase();
96
97 if (options['tags-to-remove'].indexOf(tagName) > -1) {
98 return '';
99 }
100
101 if (options['empty-tags'].indexOf(tagName) > -1) {
102 tag = removeTrailingSlash(tag);
103 }
104
105 tag = cleanAttributes(tag);
106
107 return tag;
108 });
109}
110
111function removeComments(html) {
112 return html.replace(/<!--.*?-->/g, '');
113}
114
115function addLineBreaks(html) {
116 return html.replace(/<\/?(\w+).*?>/g, function (tag, tagName) {
117 if (options['block-tags'].indexOf(tagName) > -1) {
118 return '\n' + tag + '\n';
119 }
120
121 if (tagName == 'br') {
122 return tag + '\n';
123 }
124
125 return tag;
126 });
127}
128
129function removeBlankLines(html) {
130 return html.replace(/\s{2,}/g, '\n');
131}
132
133function indentLine(line, indentLevel) {
134 var indent = '';
135
136 for (var i = 0; i < indentLevel; i++) {
137 indent += ' ';
138 }
139
140 return indent + line;
141}
142
143function indent(html) {
144 var indentLevel = 0;
145
146 return html.replace(/.*\n/g, function (line) {
147 var match = line.match(/<\/?(\w+).*?>/);
148
149 if (!match) {
150 return indentLine(line, indentLevel);
151 }
152
153 var tag = match[0],
154 tagName = match[1];
155
156 if (options['block-tags'].indexOf(tagName) > -1) {
157 if (tag.indexOf('</') === 0) {
158 indentLevel -= 2;
159 line = indentLine(line, indentLevel);
160 } else {
161 line = indentLine(line, indentLevel);
162 indentLevel += 2;
163 }
164
165 return line;
166 }
167
168 return indentLine(line, indentLevel);
169 });
170}
171
172function clean(data, opt) {
173 if (!data instanceof Buffer) {
174 return process.stderr.write('data must be a buffer\n');
175 }
176
177 setup(opt);
178
179 if (options['encoding'] != 'utf-8') {
180 html = iconv.decode(data, options['encoding']);
181 } else {
182 html = data.toString('utf-8');
183 }
184
185 html = replaceWhiteSpace(html);
186 html = removeExtraSpaces(html);
187 html = cleanTags(html);
188
189 if (options['remove-comments']) {
190 html = removeComments(html);
191 }
192
193 if (!options['line-breaks'] && !options['pretty']) {
194 return html.trim();
195 }
196
197 html = addLineBreaks(html);
198 html = removeBlankLines(html);
199
200 if (!options['indent'] && !options['pretty']) {
201 return html.trim();
202 }
203
204 html = indent(html);
205
206 return html.trim();
207}
208
209module.exports = {
210 clean: clean
211};