UNPKG

10.5 kBJavaScriptView Raw
1/**
2 * default settings
3 *
4 * @author Zongmin Lei<leizongmin@gmail.com>
5 */
6
7var FilterCSS = require("cssfilter").FilterCSS;
8var getDefaultCSSWhiteList = require("cssfilter").getDefaultWhiteList;
9var _ = require("./util");
10
11function getDefaultWhiteList() {
12 return {
13 a: ["target", "href", "title"],
14 abbr: ["title"],
15 address: [],
16 area: ["shape", "coords", "href", "alt"],
17 article: [],
18 aside: [],
19 audio: [
20 "autoplay",
21 "controls",
22 "crossorigin",
23 "loop",
24 "muted",
25 "preload",
26 "src",
27 ],
28 b: [],
29 bdi: ["dir"],
30 bdo: ["dir"],
31 big: [],
32 blockquote: ["cite"],
33 br: [],
34 caption: [],
35 center: [],
36 cite: [],
37 code: [],
38 col: ["align", "valign", "span", "width"],
39 colgroup: ["align", "valign", "span", "width"],
40 dd: [],
41 del: ["datetime"],
42 details: ["open"],
43 div: [],
44 dl: [],
45 dt: [],
46 em: [],
47 figcaption: [],
48 figure: [],
49 font: ["color", "size", "face"],
50 footer: [],
51 h1: [],
52 h2: [],
53 h3: [],
54 h4: [],
55 h5: [],
56 h6: [],
57 header: [],
58 hr: [],
59 i: [],
60 img: ["src", "alt", "title", "width", "height"],
61 ins: ["datetime"],
62 li: [],
63 mark: [],
64 nav: [],
65 ol: [],
66 p: [],
67 pre: [],
68 s: [],
69 section: [],
70 small: [],
71 span: [],
72 sub: [],
73 summary: [],
74 sup: [],
75 strong: [],
76 strike: [],
77 table: ["width", "border", "align", "valign"],
78 tbody: ["align", "valign"],
79 td: ["width", "rowspan", "colspan", "align", "valign"],
80 tfoot: ["align", "valign"],
81 th: ["width", "rowspan", "colspan", "align", "valign"],
82 thead: ["align", "valign"],
83 tr: ["rowspan", "align", "valign"],
84 tt: [],
85 u: [],
86 ul: [],
87 video: [
88 "autoplay",
89 "controls",
90 "crossorigin",
91 "loop",
92 "muted",
93 "playsinline",
94 "poster",
95 "preload",
96 "src",
97 "height",
98 "width",
99 ],
100 };
101}
102
103var defaultCSSFilter = new FilterCSS();
104
105/**
106 * default onTag function
107 *
108 * @param {String} tag
109 * @param {String} html
110 * @param {Object} options
111 * @return {String}
112 */
113function onTag(tag, html, options) {
114 // do nothing
115}
116
117/**
118 * default onIgnoreTag function
119 *
120 * @param {String} tag
121 * @param {String} html
122 * @param {Object} options
123 * @return {String}
124 */
125function onIgnoreTag(tag, html, options) {
126 // do nothing
127}
128
129/**
130 * default onTagAttr function
131 *
132 * @param {String} tag
133 * @param {String} name
134 * @param {String} value
135 * @return {String}
136 */
137function onTagAttr(tag, name, value) {
138 // do nothing
139}
140
141/**
142 * default onIgnoreTagAttr function
143 *
144 * @param {String} tag
145 * @param {String} name
146 * @param {String} value
147 * @return {String}
148 */
149function onIgnoreTagAttr(tag, name, value) {
150 // do nothing
151}
152
153/**
154 * default escapeHtml function
155 *
156 * @param {String} html
157 */
158function escapeHtml(html) {
159 return html.replace(REGEXP_LT, "&lt;").replace(REGEXP_GT, "&gt;");
160}
161
162/**
163 * default safeAttrValue function
164 *
165 * @param {String} tag
166 * @param {String} name
167 * @param {String} value
168 * @param {Object} cssFilter
169 * @return {String}
170 */
171function safeAttrValue(tag, name, value, cssFilter) {
172 // unescape attribute value firstly
173 value = friendlyAttrValue(value);
174
175 if (name === "href" || name === "src") {
176 // filter `href` and `src` attribute
177 // only allow the value that starts with `http://` | `https://` | `mailto:` | `/` | `#`
178 value = _.trim(value);
179 if (value === "#") return "#";
180 if (
181 !(
182 value.substr(0, 7) === "http://" ||
183 value.substr(0, 8) === "https://" ||
184 value.substr(0, 7) === "mailto:" ||
185 value.substr(0, 4) === "tel:" ||
186 value.substr(0, 11) === "data:image/" ||
187 value.substr(0, 6) === "ftp://" ||
188 value.substr(0, 2) === "./" ||
189 value.substr(0, 3) === "../" ||
190 value[0] === "#" ||
191 value[0] === "/"
192 )
193 ) {
194 return "";
195 }
196 } else if (name === "background") {
197 // filter `background` attribute (maybe no use)
198 // `javascript:`
199 REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
200 if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
201 return "";
202 }
203 } else if (name === "style") {
204 // `expression()`
205 REGEXP_DEFAULT_ON_TAG_ATTR_7.lastIndex = 0;
206 if (REGEXP_DEFAULT_ON_TAG_ATTR_7.test(value)) {
207 return "";
208 }
209 // `url()`
210 REGEXP_DEFAULT_ON_TAG_ATTR_8.lastIndex = 0;
211 if (REGEXP_DEFAULT_ON_TAG_ATTR_8.test(value)) {
212 REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
213 if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
214 return "";
215 }
216 }
217 if (cssFilter !== false) {
218 cssFilter = cssFilter || defaultCSSFilter;
219 value = cssFilter.process(value);
220 }
221 }
222
223 // escape `<>"` before returns
224 value = escapeAttrValue(value);
225 return value;
226}
227
228// RegExp list
229var REGEXP_LT = /</g;
230var REGEXP_GT = />/g;
231var REGEXP_QUOTE = /"/g;
232var REGEXP_QUOTE_2 = /&quot;/g;
233var REGEXP_ATTR_VALUE_1 = /&#([a-zA-Z0-9]*);?/gim;
234var REGEXP_ATTR_VALUE_COLON = /&colon;?/gim;
235var REGEXP_ATTR_VALUE_NEWLINE = /&newline;?/gim;
236// var REGEXP_DEFAULT_ON_TAG_ATTR_3 = /\/\*|\*\//gm;
237var REGEXP_DEFAULT_ON_TAG_ATTR_4 =
238 /((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a):/gi;
239// var REGEXP_DEFAULT_ON_TAG_ATTR_5 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:/gi;
240// var REGEXP_DEFAULT_ON_TAG_ATTR_6 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:\s*image\//gi;
241var REGEXP_DEFAULT_ON_TAG_ATTR_7 =
242 /e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/gi;
243var REGEXP_DEFAULT_ON_TAG_ATTR_8 = /u\s*r\s*l\s*\(.*/gi;
244
245/**
246 * escape double quote
247 *
248 * @param {String} str
249 * @return {String} str
250 */
251function escapeQuote(str) {
252 return str.replace(REGEXP_QUOTE, "&quot;");
253}
254
255/**
256 * unescape double quote
257 *
258 * @param {String} str
259 * @return {String} str
260 */
261function unescapeQuote(str) {
262 return str.replace(REGEXP_QUOTE_2, '"');
263}
264
265/**
266 * escape html entities
267 *
268 * @param {String} str
269 * @return {String}
270 */
271function escapeHtmlEntities(str) {
272 return str.replace(REGEXP_ATTR_VALUE_1, function replaceUnicode(str, code) {
273 return code[0] === "x" || code[0] === "X"
274 ? String.fromCharCode(parseInt(code.substr(1), 16))
275 : String.fromCharCode(parseInt(code, 10));
276 });
277}
278
279/**
280 * escape html5 new danger entities
281 *
282 * @param {String} str
283 * @return {String}
284 */
285function escapeDangerHtml5Entities(str) {
286 return str
287 .replace(REGEXP_ATTR_VALUE_COLON, ":")
288 .replace(REGEXP_ATTR_VALUE_NEWLINE, " ");
289}
290
291/**
292 * clear nonprintable characters
293 *
294 * @param {String} str
295 * @return {String}
296 */
297function clearNonPrintableCharacter(str) {
298 var str2 = "";
299 for (var i = 0, len = str.length; i < len; i++) {
300 str2 += str.charCodeAt(i) < 32 ? " " : str.charAt(i);
301 }
302 return _.trim(str2);
303}
304
305/**
306 * get friendly attribute value
307 *
308 * @param {String} str
309 * @return {String}
310 */
311function friendlyAttrValue(str) {
312 str = unescapeQuote(str);
313 str = escapeHtmlEntities(str);
314 str = escapeDangerHtml5Entities(str);
315 str = clearNonPrintableCharacter(str);
316 return str;
317}
318
319/**
320 * unescape attribute value
321 *
322 * @param {String} str
323 * @return {String}
324 */
325function escapeAttrValue(str) {
326 str = escapeQuote(str);
327 str = escapeHtml(str);
328 return str;
329}
330
331/**
332 * `onIgnoreTag` function for removing all the tags that are not in whitelist
333 */
334function onIgnoreTagStripAll() {
335 return "";
336}
337
338/**
339 * remove tag body
340 * specify a `tags` list, if the tag is not in the `tags` list then process by the specify function (optional)
341 *
342 * @param {array} tags
343 * @param {function} next
344 */
345function StripTagBody(tags, next) {
346 if (typeof next !== "function") {
347 next = function () {};
348 }
349
350 var isRemoveAllTag = !Array.isArray(tags);
351 function isRemoveTag(tag) {
352 if (isRemoveAllTag) return true;
353 return _.indexOf(tags, tag) !== -1;
354 }
355
356 var removeList = [];
357 var posStart = false;
358
359 return {
360 onIgnoreTag: function (tag, html, options) {
361 if (isRemoveTag(tag)) {
362 if (options.isClosing) {
363 var ret = "[/removed]";
364 var end = options.position + ret.length;
365 removeList.push([
366 posStart !== false ? posStart : options.position,
367 end,
368 ]);
369 posStart = false;
370 return ret;
371 } else {
372 if (!posStart) {
373 posStart = options.position;
374 }
375 return "[removed]";
376 }
377 } else {
378 return next(tag, html, options);
379 }
380 },
381 remove: function (html) {
382 var rethtml = "";
383 var lastPos = 0;
384 _.forEach(removeList, function (pos) {
385 rethtml += html.slice(lastPos, pos[0]);
386 lastPos = pos[1];
387 });
388 rethtml += html.slice(lastPos);
389 return rethtml;
390 },
391 };
392}
393
394/**
395 * remove html comments
396 *
397 * @param {String} html
398 * @return {String}
399 */
400function stripCommentTag(html) {
401 var retHtml = "";
402 var lastPos = 0;
403 while (lastPos < html.length) {
404 var i = html.indexOf("<!--", lastPos);
405 if (i === -1) {
406 retHtml += html.slice(lastPos);
407 break;
408 }
409 retHtml += html.slice(lastPos, i);
410 var j = html.indexOf("-->", i);
411 if (j === -1) {
412 break;
413 }
414 lastPos = j + 3;
415 }
416 return retHtml;
417}
418
419/**
420 * remove invisible characters
421 *
422 * @param {String} html
423 * @return {String}
424 */
425function stripBlankChar(html) {
426 var chars = html.split("");
427 chars = chars.filter(function (char) {
428 var c = char.charCodeAt(0);
429 if (c === 127) return false;
430 if (c <= 31) {
431 if (c === 10 || c === 13) return true;
432 return false;
433 }
434 return true;
435 });
436 return chars.join("");
437}
438
439exports.whiteList = getDefaultWhiteList();
440exports.getDefaultWhiteList = getDefaultWhiteList;
441exports.onTag = onTag;
442exports.onIgnoreTag = onIgnoreTag;
443exports.onTagAttr = onTagAttr;
444exports.onIgnoreTagAttr = onIgnoreTagAttr;
445exports.safeAttrValue = safeAttrValue;
446exports.escapeHtml = escapeHtml;
447exports.escapeQuote = escapeQuote;
448exports.unescapeQuote = unescapeQuote;
449exports.escapeHtmlEntities = escapeHtmlEntities;
450exports.escapeDangerHtml5Entities = escapeDangerHtml5Entities;
451exports.clearNonPrintableCharacter = clearNonPrintableCharacter;
452exports.friendlyAttrValue = friendlyAttrValue;
453exports.escapeAttrValue = escapeAttrValue;
454exports.onIgnoreTagStripAll = onIgnoreTagStripAll;
455exports.StripTagBody = StripTagBody;
456exports.stripCommentTag = stripCommentTag;
457exports.stripBlankChar = stripBlankChar;
458exports.cssFilter = defaultCSSFilter;
459exports.getDefaultCSSWhiteList = getDefaultCSSWhiteList;