UNPKG

38.3 kBJavaScriptView Raw
1/**
2 * @license
3 * Copyright Google LLC All Rights Reserved.
4 *
5 * Use of this source code is governed by an MIT-style license that can be
6 * found in the LICENSE file at https://angular.io/license
7 */
8import { trustedHTMLFromString } from '../util/security/trusted_types';
9import { getInertBodyHelper } from './inert_body';
10import { _sanitizeUrl, sanitizeSrcset } from './url_sanitizer';
11function tagSet(tags) {
12 const res = {};
13 for (const t of tags.split(','))
14 res[t] = true;
15 return res;
16}
17function merge(...sets) {
18 const res = {};
19 for (const s of sets) {
20 for (const v in s) {
21 if (s.hasOwnProperty(v))
22 res[v] = true;
23 }
24 }
25 return res;
26}
27// Good source of info about elements and attributes
28// https://html.spec.whatwg.org/#semantics
29// https://simon.html5.org/html-elements
30// Safe Void Elements - HTML5
31// https://html.spec.whatwg.org/#void-elements
32const VOID_ELEMENTS = tagSet('area,br,col,hr,img,wbr');
33// Elements that you can, intentionally, leave open (and which close themselves)
34// https://html.spec.whatwg.org/#optional-tags
35const OPTIONAL_END_TAG_BLOCK_ELEMENTS = tagSet('colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr');
36const OPTIONAL_END_TAG_INLINE_ELEMENTS = tagSet('rp,rt');
37const OPTIONAL_END_TAG_ELEMENTS = merge(OPTIONAL_END_TAG_INLINE_ELEMENTS, OPTIONAL_END_TAG_BLOCK_ELEMENTS);
38// Safe Block Elements - HTML5
39const BLOCK_ELEMENTS = merge(OPTIONAL_END_TAG_BLOCK_ELEMENTS, tagSet('address,article,' +
40 'aside,blockquote,caption,center,del,details,dialog,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,' +
41 'h6,header,hgroup,hr,ins,main,map,menu,nav,ol,pre,section,summary,table,ul'));
42// Inline Elements - HTML5
43const INLINE_ELEMENTS = merge(OPTIONAL_END_TAG_INLINE_ELEMENTS, tagSet('a,abbr,acronym,audio,b,' +
44 'bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,picture,q,ruby,rp,rt,s,' +
45 'samp,small,source,span,strike,strong,sub,sup,time,track,tt,u,var,video'));
46export const VALID_ELEMENTS = merge(VOID_ELEMENTS, BLOCK_ELEMENTS, INLINE_ELEMENTS, OPTIONAL_END_TAG_ELEMENTS);
47// Attributes that have href and hence need to be sanitized
48export const URI_ATTRS = tagSet('background,cite,href,itemtype,longdesc,poster,src,xlink:href');
49// Attributes that have special href set hence need to be sanitized
50export const SRCSET_ATTRS = tagSet('srcset');
51const HTML_ATTRS = tagSet('abbr,accesskey,align,alt,autoplay,axis,bgcolor,border,cellpadding,cellspacing,class,clear,color,cols,colspan,' +
52 'compact,controls,coords,datetime,default,dir,download,face,headers,height,hidden,hreflang,hspace,' +
53 'ismap,itemscope,itemprop,kind,label,lang,language,loop,media,muted,nohref,nowrap,open,preload,rel,rev,role,rows,rowspan,rules,' +
54 'scope,scrolling,shape,size,sizes,span,srclang,start,summary,tabindex,target,title,translate,type,usemap,' +
55 'valign,value,vspace,width');
56// Accessibility attributes as per WAI-ARIA 1.1 (W3C Working Draft 14 December 2018)
57const ARIA_ATTRS = tagSet('aria-activedescendant,aria-atomic,aria-autocomplete,aria-busy,aria-checked,aria-colcount,aria-colindex,' +
58 'aria-colspan,aria-controls,aria-current,aria-describedby,aria-details,aria-disabled,aria-dropeffect,' +
59 'aria-errormessage,aria-expanded,aria-flowto,aria-grabbed,aria-haspopup,aria-hidden,aria-invalid,' +
60 'aria-keyshortcuts,aria-label,aria-labelledby,aria-level,aria-live,aria-modal,aria-multiline,' +
61 'aria-multiselectable,aria-orientation,aria-owns,aria-placeholder,aria-posinset,aria-pressed,aria-readonly,' +
62 'aria-relevant,aria-required,aria-roledescription,aria-rowcount,aria-rowindex,aria-rowspan,aria-selected,' +
63 'aria-setsize,aria-sort,aria-valuemax,aria-valuemin,aria-valuenow,aria-valuetext');
64// NB: This currently consciously doesn't support SVG. SVG sanitization has had several security
65// issues in the past, so it seems safer to leave it out if possible. If support for binding SVG via
66// innerHTML is required, SVG attributes should be added here.
67// NB: Sanitization does not allow <form> elements or other active elements (<button> etc). Those
68// can be sanitized, but they increase security surface area without a legitimate use case, so they
69// are left out here.
70export const VALID_ATTRS = merge(URI_ATTRS, SRCSET_ATTRS, HTML_ATTRS, ARIA_ATTRS);
71// Elements whose content should not be traversed/preserved, if the elements themselves are invalid.
72//
73// Typically, `<invalid>Some content</invalid>` would traverse (and in this case preserve)
74// `Some content`, but strip `invalid-element` opening/closing tags. For some elements, though, we
75// don't want to preserve the content, if the elements themselves are going to be removed.
76const SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS = tagSet('script,style,template');
77/**
78 * SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe
79 * attributes.
80 */
81class SanitizingHtmlSerializer {
82 constructor() {
83 // Explicitly track if something was stripped, to avoid accidentally warning of sanitization just
84 // because characters were re-encoded.
85 this.sanitizedSomething = false;
86 this.buf = [];
87 }
88 sanitizeChildren(el) {
89 // This cannot use a TreeWalker, as it has to run on Angular's various DOM adapters.
90 // However this code never accesses properties off of `document` before deleting its contents
91 // again, so it shouldn't be vulnerable to DOM clobbering.
92 let current = el.firstChild;
93 let traverseContent = true;
94 while (current) {
95 if (current.nodeType === Node.ELEMENT_NODE) {
96 traverseContent = this.startElement(current);
97 }
98 else if (current.nodeType === Node.TEXT_NODE) {
99 this.chars(current.nodeValue);
100 }
101 else {
102 // Strip non-element, non-text nodes.
103 this.sanitizedSomething = true;
104 }
105 if (traverseContent && current.firstChild) {
106 current = current.firstChild;
107 continue;
108 }
109 while (current) {
110 // Leaving the element. Walk up and to the right, closing tags as we go.
111 if (current.nodeType === Node.ELEMENT_NODE) {
112 this.endElement(current);
113 }
114 let next = this.checkClobberedElement(current, current.nextSibling);
115 if (next) {
116 current = next;
117 break;
118 }
119 current = this.checkClobberedElement(current, current.parentNode);
120 }
121 }
122 return this.buf.join('');
123 }
124 /**
125 * Sanitizes an opening element tag (if valid) and returns whether the element's contents should
126 * be traversed. Element content must always be traversed (even if the element itself is not
127 * valid/safe), unless the element is one of `SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS`.
128 *
129 * @param element The element to sanitize.
130 * @return True if the element's contents should be traversed.
131 */
132 startElement(element) {
133 const tagName = element.nodeName.toLowerCase();
134 if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
135 this.sanitizedSomething = true;
136 return !SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS.hasOwnProperty(tagName);
137 }
138 this.buf.push('<');
139 this.buf.push(tagName);
140 const elAttrs = element.attributes;
141 for (let i = 0; i < elAttrs.length; i++) {
142 const elAttr = elAttrs.item(i);
143 const attrName = elAttr.name;
144 const lower = attrName.toLowerCase();
145 if (!VALID_ATTRS.hasOwnProperty(lower)) {
146 this.sanitizedSomething = true;
147 continue;
148 }
149 let value = elAttr.value;
150 // TODO(martinprobst): Special case image URIs for data:image/...
151 if (URI_ATTRS[lower])
152 value = _sanitizeUrl(value);
153 if (SRCSET_ATTRS[lower])
154 value = sanitizeSrcset(value);
155 this.buf.push(' ', attrName, '="', encodeEntities(value), '"');
156 }
157 this.buf.push('>');
158 return true;
159 }
160 endElement(current) {
161 const tagName = current.nodeName.toLowerCase();
162 if (VALID_ELEMENTS.hasOwnProperty(tagName) && !VOID_ELEMENTS.hasOwnProperty(tagName)) {
163 this.buf.push('</');
164 this.buf.push(tagName);
165 this.buf.push('>');
166 }
167 }
168 chars(chars) {
169 this.buf.push(encodeEntities(chars));
170 }
171 checkClobberedElement(node, nextNode) {
172 if (nextNode &&
173 (node.compareDocumentPosition(nextNode) &
174 Node.DOCUMENT_POSITION_CONTAINED_BY) === Node.DOCUMENT_POSITION_CONTAINED_BY) {
175 throw new Error(`Failed to sanitize html because the element is clobbered: ${node.outerHTML}`);
176 }
177 return nextNode;
178 }
179}
180// Regular Expressions for parsing tags and attributes
181const SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
182// ! to ~ is the ASCII range.
183const NON_ALPHANUMERIC_REGEXP = /([^\#-~ |!])/g;
184/**
185 * Escapes all potentially dangerous characters, so that the
186 * resulting string can be safely inserted into attribute or
187 * element text.
188 * @param value
189 */
190function encodeEntities(value) {
191 return value.replace(/&/g, '&amp;')
192 .replace(SURROGATE_PAIR_REGEXP, function (match) {
193 const hi = match.charCodeAt(0);
194 const low = match.charCodeAt(1);
195 return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
196 })
197 .replace(NON_ALPHANUMERIC_REGEXP, function (match) {
198 return '&#' + match.charCodeAt(0) + ';';
199 })
200 .replace(/</g, '&lt;')
201 .replace(/>/g, '&gt;');
202}
203let inertBodyHelper;
204/**
205 * Sanitizes the given unsafe, untrusted HTML fragment, and returns HTML text that is safe to add to
206 * the DOM in a browser environment.
207 */
208export function _sanitizeHtml(defaultDoc, unsafeHtmlInput) {
209 let inertBodyElement = null;
210 try {
211 inertBodyHelper = inertBodyHelper || getInertBodyHelper(defaultDoc);
212 // Make sure unsafeHtml is actually a string (TypeScript types are not enforced at runtime).
213 let unsafeHtml = unsafeHtmlInput ? String(unsafeHtmlInput) : '';
214 inertBodyElement = inertBodyHelper.getInertBodyElement(unsafeHtml);
215 // mXSS protection. Repeatedly parse the document to make sure it stabilizes, so that a browser
216 // trying to auto-correct incorrect HTML cannot cause formerly inert HTML to become dangerous.
217 let mXSSAttempts = 5;
218 let parsedHtml = unsafeHtml;
219 do {
220 if (mXSSAttempts === 0) {
221 throw new Error('Failed to sanitize html because the input is unstable');
222 }
223 mXSSAttempts--;
224 unsafeHtml = parsedHtml;
225 parsedHtml = inertBodyElement.innerHTML;
226 inertBodyElement = inertBodyHelper.getInertBodyElement(unsafeHtml);
227 } while (unsafeHtml !== parsedHtml);
228 const sanitizer = new SanitizingHtmlSerializer();
229 const safeHtml = sanitizer.sanitizeChildren(getTemplateContent(inertBodyElement) || inertBodyElement);
230 if ((typeof ngDevMode === 'undefined' || ngDevMode) && sanitizer.sanitizedSomething) {
231 console.warn('WARNING: sanitizing HTML stripped some content, see https://g.co/ng/security#xss');
232 }
233 return trustedHTMLFromString(safeHtml);
234 }
235 finally {
236 // In case anything goes wrong, clear out inertElement to reset the entire DOM structure.
237 if (inertBodyElement) {
238 const parent = getTemplateContent(inertBodyElement) || inertBodyElement;
239 while (parent.firstChild) {
240 parent.removeChild(parent.firstChild);
241 }
242 }
243 }
244}
245export function getTemplateContent(el) {
246 return 'content' in el /** Microsoft/TypeScript#21517 */ && isTemplateElement(el) ?
247 el.content :
248 null;
249}
250function isTemplateElement(el) {
251 return el.nodeType === Node.ELEMENT_NODE && el.nodeName === 'TEMPLATE';
252}
253//# sourceMappingURL=data:application/json;base64,
\No newline at end of file