UNPKG

4.8 kBJavaScriptView Raw
1var iconv = require('iconv-lite'),
2 options = {
3 'attr-to-remove': [
4 'align',
5 'valign',
6 'bgcolor',
7 'color',
8 'width',
9 'height',
10 'border',
11 'cellpadding',
12 'cellspacing'
13 ],
14 'block-tags': [
15 'div',
16 'p',
17 'table',
18 'tr',
19 'td',
20 'blockquote',
21 'hr'
22 ],
23 'empty-tags': [
24 'br',
25 'hr',
26 'img'
27 ],
28 'encoding': 'utf-8',
29 'pretty': true,
30 'remove-comments': false,
31 'tags-to-remove': [
32 'font'
33 ]
34 };
35
36function setup(opt) {
37 if (!opt) {
38 return;
39 }
40
41 options['attr-to-remove'] = opt['attr-to-remove'] || options['attr-to-remove'];
42 options['block-tags'] = opt['block-tags'] || options['block-tags'];
43 options['empty-tags'] = opt['empty-tags'] || options['empty-tags'];
44 options['encoding'] = opt['encoding'] || options['encoding'];
45 options['pretty'] = opt['pretty'] || options['pretty'];
46 options['remove-comments'] = opt['remove-comments'] || options['remove-comments'];
47 options['tags-to-remove'] = opt['tags-to-remove'] || options['tags-to-remove'];
48
49 if (opt['add-attr-to-remove']) {
50 options['attr-to-remove'] = options['attr-to-remove'].concat(opt['add-attr-to-remove']);
51 }
52
53 if (opt['add-block-tags']) {
54 options['block-tags'] = options['block-tags'].concat(opt['add-block-tags']);
55 }
56
57 if (opt['add-empty-tags']) {
58 options['empty-tags'] = options['empty-tags'].concat(opt['add-empty-tags']);
59 }
60
61 if (opt['add-tags-to-remove']) {
62 options['tags-to-remove'] = options['tags-to-remove'].concat(opt['add-tags-to-remove']);
63 }
64}
65
66function replaceWhiteSpace(html) {
67 return html.replace(/\s/g, ' ');
68}
69
70function removeExtraSpaces(html) {
71 return html.replace(/ {2,}/g, ' ');
72}
73
74function removeTrailingSlash(tag) {
75 return tag.replace(/ ?\/>/, '>');
76}
77
78function cleanAttributes(tag) {
79 return tag.replace(/ (\w+)=['"].+?['"]/g, function (attribute, attributeName) {
80 if (options['attr-to-remove'].indexOf(attributeName) > -1) {
81 return '';
82 }
83
84 return attribute;
85 });
86}
87
88function cleanTags(html) {
89 return html.replace(/<\/?(\w+).*?>/g, function (tag, tagName) {
90 tag = tag.toLowerCase();
91 tagName = tagName.toLowerCase();
92
93 if (options['tags-to-remove'].indexOf(tagName) > -1) {
94 return '';
95 }
96
97 if (options['empty-tags'].indexOf(tagName) > -1) {
98 tag = removeTrailingSlash(tag);
99 }
100
101 tag = cleanAttributes(tag);
102
103 return tag;
104 });
105}
106
107function removeComments(html) {
108 return html.replace(/<!--.*?-->/g, '');
109}
110
111function addLineBreaks(html) {
112 return html.replace(/<\/?(\w+).*?>/g, function (tag, tagName) {
113 if (options['block-tags'].indexOf(tagName) > -1) {
114 return '\n' + tag + '\n';
115 }
116
117 if (tagName == 'br') {
118 return tag + '\n';
119 }
120
121 return tag;
122 });
123}
124
125function removeBlankLines(html) {
126 return html.replace(/\s{2,}/g, '\n');
127}
128
129function indentLine(line, indentLevel) {
130 var indent = '';
131
132 for (var i = 0; i < indentLevel; i++) {
133 indent += ' ';
134 }
135
136 return indent + line;
137}
138
139function indent(html) {
140 var indentLevel = 0;
141
142 return html.replace(/.*\n/g, function (line) {
143 var match = line.match(/<\/?(\w+).*?>/);
144
145 if (!match) {
146 return indentLine(line, indentLevel);
147 }
148
149 var tag = match[0],
150 tagName = match[1];
151
152 if (options['block-tags'].indexOf(tagName) > -1) {
153 if (tag.indexOf('</') === 0) {
154 indentLevel -= 2;
155 line = indentLine(line, indentLevel);
156 } else {
157 line = indentLine(line, indentLevel);
158 indentLevel += 2;
159 }
160
161 return line;
162 }
163
164 return indentLine(line, indentLevel);
165 });
166}
167
168function clean(data, opt) {
169 if (!data instanceof Buffer) {
170 return process.stderr.write('data must be a buffer\n');
171 }
172
173 setup(opt);
174
175 if (options['encoding'] != 'utf-8') {
176 html = iconv.decode(data, options['encoding']);
177 } else {
178 html = data.toString('utf-8');
179 }
180
181 html = replaceWhiteSpace(html);
182 html = removeExtraSpaces(html);
183 html = cleanTags(html);
184
185 if (options['remove-comments']) {
186 html = removeComments(html);
187 }
188
189 if (options['pretty']) {
190 html = addLineBreaks(html);
191 html = removeBlankLines(html);
192 html = indent(html);
193 }
194
195 return html.trim();
196}
197
198module.exports = {
199 clean: clean
200};