1 | var iconv = require('iconv-lite'),
|
2 | options = {
|
3 | 'attr-to-remove': [
|
4 | 'align',
|
5 | 'valign',
|
6 | 'bgcolor',
|
7 | 'color',
|
8 | 'width',
|
9 | 'height',
|
10 | 'border',
|
11 | 'cellpadding',
|
12 | 'cellspacing'
|
13 | ],
|
14 | 'block-tags': [
|
15 | 'div',
|
16 | 'p',
|
17 | 'table',
|
18 | 'tr',
|
19 | 'td',
|
20 | 'blockquote',
|
21 | 'hr'
|
22 | ],
|
23 | 'empty-tags': [
|
24 | 'br',
|
25 | 'hr',
|
26 | 'img'
|
27 | ],
|
28 | 'encoding': 'utf-8',
|
29 | 'indent': false,
|
30 | 'line-breaks': false,
|
31 | 'pretty': false,
|
32 | 'remove-comments': true,
|
33 | 'tags-to-remove': [
|
34 | 'font'
|
35 | ]
|
36 | };
|
37 |
|
38 | function setup(opt) {
|
39 | if (!opt) {
|
40 | return;
|
41 | }
|
42 |
|
43 | options['attr-to-remove'] = opt['attr-to-remove'] || options['attr-to-remove'];
|
44 | options['block-tags'] = opt['block-tags'] || options['block-tags'];
|
45 | options['empty-tags'] = opt['empty-tags'] || options['empty-tags'];
|
46 | options['encoding'] = opt['encoding'] || options['encoding'];
|
47 | options['indent'] = opt['indent'] || options['indent'];
|
48 | options['line-breaks'] = opt['line-breaks'] || options['line-breaks'];
|
49 | options['pretty'] = opt['pretty'] || options['pretty'];
|
50 | options['remove-comments'] = opt['remove-comments'] || true;
|
51 | options['tags-to-remove'] = opt['tags-to-remove'] || options['tags-to-remove'];
|
52 |
|
53 | if (opt['add-attr-to-remove']) {
|
54 | options['attr-to-remove'] = options['attr-to-remove'].concat(opt['add-attr-to-remove']);
|
55 | }
|
56 |
|
57 | if (opt['add-block-tags']) {
|
58 | options['block-tags'] = options['block-tags'].concat(opt['add-block-tags']);
|
59 | }
|
60 |
|
61 | if (opt['add-empty-tags']) {
|
62 | options['empty-tags'] = options['empty-tags'].concat(opt['add-empty-tags']);
|
63 | }
|
64 |
|
65 | if (opt['add-tags-to-remove']) {
|
66 | options['tags-to-remove'] = options['tags-to-remove'].concat(opt['add-tags-to-remove']);
|
67 | }
|
68 | }
|
69 |
|
70 | function replaceWhiteSpace(html) {
|
71 | return html.replace(/\s/g, ' ');
|
72 | }
|
73 |
|
74 | function removeExtraSpaces(html) {
|
75 | return html.replace(/ {2,}/g, ' ');
|
76 | }
|
77 |
|
78 | function removeTrailingSlash(tag) {
|
79 | return tag.replace(/ ?\/>/, '>');
|
80 | }
|
81 |
|
82 | function cleanAttributes(tag) {
|
83 | return tag.replace(/ (\w+)=['"].+?['"]/g, function (attribute, attributeName) {
|
84 | if (options['attr-to-remove'].indexOf(attributeName) > -1) {
|
85 | return '';
|
86 | }
|
87 |
|
88 | return attribute;
|
89 | });
|
90 | }
|
91 |
|
92 | function cleanTags(html) {
|
93 | return html.replace(/<\/?(\w+).*?>/g, function (tag, tagName) {
|
94 | tag = tag.toLowerCase();
|
95 | tagName = tagName.toLowerCase();
|
96 |
|
97 | if (options['tags-to-remove'].indexOf(tagName) > -1) {
|
98 | return '';
|
99 | }
|
100 |
|
101 | if (options['empty-tags'].indexOf(tagName) > -1) {
|
102 | tag = removeTrailingSlash(tag);
|
103 | }
|
104 |
|
105 | tag = cleanAttributes(tag);
|
106 |
|
107 | return tag;
|
108 | });
|
109 | }
|
110 |
|
111 | function removeComments(html) {
|
112 | return html.replace(/<!--.*?-->/g, '');
|
113 | }
|
114 |
|
115 | function addLineBreaks(html) {
|
116 | return html.replace(/<\/?(\w+).*?>/g, function (tag, tagName) {
|
117 | if (options['block-tags'].indexOf(tagName) > -1) {
|
118 | return '\n' + tag + '\n';
|
119 | }
|
120 |
|
121 | if (tagName == 'br') {
|
122 | return tag + '\n';
|
123 | }
|
124 |
|
125 | return tag;
|
126 | });
|
127 | }
|
128 |
|
129 | function removeBlankLines(html) {
|
130 | return html.replace(/\s{2,}/g, '\n');
|
131 | }
|
132 |
|
133 | function indentLine(line, indentLevel) {
|
134 | var indent = '';
|
135 |
|
136 | for (var i = 0; i < indentLevel; i++) {
|
137 | indent += ' ';
|
138 | }
|
139 |
|
140 | return indent + line;
|
141 | }
|
142 |
|
143 | function indent(html) {
|
144 | var indentLevel = 0;
|
145 |
|
146 | return html.replace(/.*\n/g, function (line) {
|
147 | var match = line.match(/<\/?(\w+).*?>/);
|
148 |
|
149 | if (!match) {
|
150 | return indentLine(line, indentLevel);
|
151 | }
|
152 |
|
153 | var tag = match[0],
|
154 | tagName = match[1];
|
155 |
|
156 | if (options['block-tags'].indexOf(tagName) > -1) {
|
157 | if (tag.indexOf('</') === 0) {
|
158 | indentLevel -= 2;
|
159 | line = indentLine(line, indentLevel);
|
160 | } else {
|
161 | line = indentLine(line, indentLevel);
|
162 | indentLevel += 2;
|
163 | }
|
164 |
|
165 | return line;
|
166 | }
|
167 |
|
168 | return indentLine(line, indentLevel);
|
169 | });
|
170 | }
|
171 |
|
172 | function clean(data, opt) {
|
173 | if (!data instanceof Buffer) {
|
174 | return process.stderr.write('data must be a buffer\n');
|
175 | }
|
176 |
|
177 | setup(opt);
|
178 |
|
179 | if (options['encoding'] != 'utf-8') {
|
180 | html = iconv.decode(data, options['encoding']);
|
181 | } else {
|
182 | html = data.toString('utf-8');
|
183 | }
|
184 |
|
185 | html = replaceWhiteSpace(html);
|
186 | html = removeExtraSpaces(html);
|
187 | html = cleanTags(html);
|
188 |
|
189 | if (options['remove-comments']) {
|
190 | html = removeComments(html);
|
191 | }
|
192 |
|
193 | if (!options['line-breaks'] && !options['pretty']) {
|
194 | return html.trim();
|
195 | }
|
196 |
|
197 | html = addLineBreaks(html);
|
198 | html = removeBlankLines(html);
|
199 |
|
200 | if (!options['indent'] && !options['pretty']) {
|
201 | return html.trim();
|
202 | }
|
203 |
|
204 | html = indent(html);
|
205 |
|
206 | return html.trim();
|
207 | }
|
208 |
|
209 | module.exports = {
|
210 | clean: clean
|
211 | };
|