| 1 | // Copyright 2013 The Closure Library Authors. All Rights Reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS-IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | /** |
| 16 | * @fileoverview The SafeUrl type and its builders. |
| 17 | * |
| 18 | * TODO(xtof): Link to document stating type contract. |
| 19 | */ |
| 20 | |
| 21 | goog.provide('goog.html.SafeUrl'); |
| 22 | |
| 23 | goog.require('goog.asserts'); |
| 24 | goog.require('goog.fs.url'); |
| 25 | goog.require('goog.i18n.bidi.Dir'); |
| 26 | goog.require('goog.i18n.bidi.DirectionalString'); |
| 27 | goog.require('goog.string.Const'); |
| 28 | goog.require('goog.string.TypedString'); |
| 29 | |
| 30 | |
| 31 | |
| 32 | /** |
| 33 | * A string that is safe to use in URL context in DOM APIs and HTML documents. |
| 34 | * |
| 35 | * A SafeUrl is a string-like object that carries the security type contract |
| 36 | * that its value as a string will not cause untrusted script execution |
| 37 | * when evaluated as a hyperlink URL in a browser. |
| 38 | * |
| 39 | * Values of this type are guaranteed to be safe to use in URL/hyperlink |
| 40 | * contexts, such as, assignment to URL-valued DOM properties, or |
| 41 | * interpolation into a HTML template in URL context (e.g., inside a href |
| 42 | * attribute), in the sense that the use will not result in a |
| 43 | * Cross-Site-Scripting vulnerability. |
| 44 | * |
| 45 | * Note that, as documented in {@code goog.html.SafeUrl.unwrap}, this type's |
| 46 | * contract does not guarantee that instances are safe to interpolate into HTML |
| 47 | * without appropriate escaping. |
| 48 | * |
| 49 | * Note also that this type's contract does not imply any guarantees regarding |
| 50 | * the resource the URL refers to. In particular, SafeUrls are <b>not</b> |
| 51 | * safe to use in a context where the referred-to resource is interpreted as |
| 52 | * trusted code, e.g., as the src of a script tag. |
| 53 | * |
| 54 | * Instances of this type must be created via the factory methods |
| 55 | * ({@code goog.html.SafeUrl.fromConstant}, {@code goog.html.SafeUrl.sanitize}), |
| 56 | * etc and not by invoking its constructor. The constructor intentionally |
| 57 | * takes no parameters and the type is immutable; hence only a default instance |
| 58 | * corresponding to the empty string can be obtained via constructor invocation. |
| 59 | * |
| 60 | * @see goog.html.SafeUrl#fromConstant |
| 61 | * @see goog.html.SafeUrl#from |
| 62 | * @see goog.html.SafeUrl#sanitize |
| 63 | * @constructor |
| 64 | * @final |
| 65 | * @struct |
| 66 | * @implements {goog.i18n.bidi.DirectionalString} |
| 67 | * @implements {goog.string.TypedString} |
| 68 | */ |
| 69 | goog.html.SafeUrl = function() { |
| 70 | /** |
| 71 | * The contained value of this SafeUrl. The field has a purposely ugly |
| 72 | * name to make (non-compiled) code that attempts to directly access this |
| 73 | * field stand out. |
| 74 | * @private {string} |
| 75 | */ |
| 76 | this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = ''; |
| 77 | |
| 78 | /** |
| 79 | * A type marker used to implement additional run-time type checking. |
| 80 | * @see goog.html.SafeUrl#unwrap |
| 81 | * @const |
| 82 | * @private |
| 83 | */ |
| 84 | this.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ = |
| 85 | goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_; |
| 86 | }; |
| 87 | |
| 88 | |
| 89 | /** |
| 90 | * The innocuous string generated by goog.html.SafeUrl.sanitize when passed |
| 91 | * an unsafe URL. |
| 92 | * |
| 93 | * about:invalid is registered in |
| 94 | * http://www.w3.org/TR/css3-values/#about-invalid. |
| 95 | * http://tools.ietf.org/html/rfc6694#section-2.2.1 permits about URLs to |
| 96 | * contain a fragment, which is not to be considered when determining if an |
| 97 | * about URL is well-known. |
| 98 | * |
| 99 | * Using about:invalid seems preferable to using a fixed data URL, since |
| 100 | * browsers might choose to not report CSP violations on it, as legitimate |
| 101 | * CSS function calls to attr() can result in this URL being produced. It is |
| 102 | * also a standard URL which matches exactly the semantics we need: |
| 103 | * "The about:invalid URI references a non-existent document with a generic |
| 104 | * error condition. It can be used when a URI is necessary, but the default |
| 105 | * value shouldn't be resolveable as any type of document". |
| 106 | * |
| 107 | * @const {string} |
| 108 | */ |
| 109 | goog.html.SafeUrl.INNOCUOUS_STRING = 'about:invalid#zClosurez'; |
| 110 | |
| 111 | |
| 112 | /** |
| 113 | * @override |
| 114 | * @const |
| 115 | */ |
| 116 | goog.html.SafeUrl.prototype.implementsGoogStringTypedString = true; |
| 117 | |
| 118 | |
| 119 | /** |
| 120 | * Returns this SafeUrl's value a string. |
| 121 | * |
| 122 | * IMPORTANT: In code where it is security relevant that an object's type is |
| 123 | * indeed {@code SafeUrl}, use {@code goog.html.SafeUrl.unwrap} instead of this |
| 124 | * method. If in doubt, assume that it's security relevant. In particular, note |
| 125 | * that goog.html functions which return a goog.html type do not guarantee that |
| 126 | * the returned instance is of the right type. For example: |
| 127 | * |
| 128 | * <pre> |
| 129 | * var fakeSafeHtml = new String('fake'); |
| 130 | * fakeSafeHtml.__proto__ = goog.html.SafeHtml.prototype; |
| 131 | * var newSafeHtml = goog.html.SafeHtml.htmlEscape(fakeSafeHtml); |
| 132 | * // newSafeHtml is just an alias for fakeSafeHtml, it's passed through by |
| 133 | * // goog.html.SafeHtml.htmlEscape() as fakeSafeHtml instanceof |
| 134 | * // goog.html.SafeHtml. |
| 135 | * </pre> |
| 136 | * |
| 137 | * IMPORTANT: The guarantees of the SafeUrl type contract only extend to the |
| 138 | * behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST |
| 139 | * be appropriately escaped before embedding in a HTML document. Note that the |
| 140 | * required escaping is context-sensitive (e.g. a different escaping is |
| 141 | * required for embedding a URL in a style property within a style |
| 142 | * attribute, as opposed to embedding in a href attribute). |
| 143 | * |
| 144 | * @see goog.html.SafeUrl#unwrap |
| 145 | * @override |
| 146 | */ |
| 147 | goog.html.SafeUrl.prototype.getTypedStringValue = function() { |
| 148 | return this.privateDoNotAccessOrElseSafeHtmlWrappedValue_; |
| 149 | }; |
| 150 | |
| 151 | |
| 152 | /** |
| 153 | * @override |
| 154 | * @const |
| 155 | */ |
| 156 | goog.html.SafeUrl.prototype.implementsGoogI18nBidiDirectionalString = true; |
| 157 | |
| 158 | |
| 159 | /** |
| 160 | * Returns this URLs directionality, which is always {@code LTR}. |
| 161 | * @override |
| 162 | */ |
| 163 | goog.html.SafeUrl.prototype.getDirection = function() { |
| 164 | return goog.i18n.bidi.Dir.LTR; |
| 165 | }; |
| 166 | |
| 167 | |
| 168 | if (goog.DEBUG) { |
| 169 | /** |
| 170 | * Returns a debug string-representation of this value. |
| 171 | * |
| 172 | * To obtain the actual string value wrapped in a SafeUrl, use |
| 173 | * {@code goog.html.SafeUrl.unwrap}. |
| 174 | * |
| 175 | * @see goog.html.SafeUrl#unwrap |
| 176 | * @override |
| 177 | */ |
| 178 | goog.html.SafeUrl.prototype.toString = function() { |
| 179 | return 'SafeUrl{' + this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ + |
| 180 | '}'; |
| 181 | }; |
| 182 | } |
| 183 | |
| 184 | |
| 185 | /** |
| 186 | * Performs a runtime check that the provided object is indeed a SafeUrl |
| 187 | * object, and returns its value. |
| 188 | * |
| 189 | * IMPORTANT: The guarantees of the SafeUrl type contract only extend to the |
| 190 | * behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST |
| 191 | * be appropriately escaped before embedding in a HTML document. Note that the |
| 192 | * required escaping is context-sensitive (e.g. a different escaping is |
| 193 | * required for embedding a URL in a style property within a style |
| 194 | * attribute, as opposed to embedding in a href attribute). |
| 195 | * |
| 196 | * @param {!goog.html.SafeUrl} safeUrl The object to extract from. |
| 197 | * @return {string} The SafeUrl object's contained string, unless the run-time |
| 198 | * type check fails. In that case, {@code unwrap} returns an innocuous |
| 199 | * string, or, if assertions are enabled, throws |
| 200 | * {@code goog.asserts.AssertionError}. |
| 201 | */ |
| 202 | goog.html.SafeUrl.unwrap = function(safeUrl) { |
| 203 | // Perform additional Run-time type-checking to ensure that safeUrl is indeed |
| 204 | // an instance of the expected type. This provides some additional protection |
| 205 | // against security bugs due to application code that disables type checks. |
| 206 | // Specifically, the following checks are performed: |
| 207 | // 1. The object is an instance of the expected type. |
| 208 | // 2. The object is not an instance of a subclass. |
| 209 | // 3. The object carries a type marker for the expected type. "Faking" an |
| 210 | // object requires a reference to the type marker, which has names intended |
| 211 | // to stand out in code reviews. |
| 212 | if (safeUrl instanceof goog.html.SafeUrl && |
| 213 | safeUrl.constructor === goog.html.SafeUrl && |
| 214 | safeUrl.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ === |
| 215 | goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_) { |
| 216 | return safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_; |
| 217 | } else { |
| 218 | goog.asserts.fail('expected object of type SafeUrl, got \'' + |
| 219 | safeUrl + '\''); |
| 220 | return 'type_error:SafeUrl'; |
| 221 | |
| 222 | } |
| 223 | }; |
| 224 | |
| 225 | |
| 226 | /** |
| 227 | * Creates a SafeUrl object from a compile-time constant string. |
| 228 | * |
| 229 | * Compile-time constant strings are inherently program-controlled and hence |
| 230 | * trusted. |
| 231 | * |
| 232 | * @param {!goog.string.Const} url A compile-time-constant string from which to |
| 233 | * create a SafeUrl. |
| 234 | * @return {!goog.html.SafeUrl} A SafeUrl object initialized to {@code url}. |
| 235 | */ |
| 236 | goog.html.SafeUrl.fromConstant = function(url) { |
| 237 | return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse( |
| 238 | goog.string.Const.unwrap(url)); |
| 239 | }; |
| 240 | |
| 241 | |
| 242 | /** |
| 243 | * A pattern that matches Blob or data types that can have SafeUrls created |
| 244 | * from URL.createObjectURL(blob) or via a data: URI. Only matches image and |
| 245 | * video types, currently. |
| 246 | * @const |
| 247 | * @private |
| 248 | */ |
| 249 | goog.html.SAFE_MIME_TYPE_PATTERN_ = |
| 250 | /^(?:image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)|video\/(?:mpeg|mp4|ogg|webm))$/i; |
| 251 | |
| 252 | |
| 253 | /** |
| 254 | * Creates a SafeUrl wrapping a blob URL for the given {@code blob}. |
| 255 | * |
| 256 | * The blob URL is created with {@code URL.createObjectURL}. If the MIME type |
| 257 | * for {@code blob} is not of a known safe image or video MIME type, then the |
| 258 | * SafeUrl will wrap {@link #INNOCUOUS_STRING}. |
| 259 | * |
| 260 | * @see http://www.w3.org/TR/FileAPI/#url |
| 261 | * @param {!Blob} blob |
| 262 | * @return {!goog.html.SafeUrl} The blob URL, or an innocuous string wrapped |
| 263 | * as a SafeUrl. |
| 264 | */ |
| 265 | goog.html.SafeUrl.fromBlob = function(blob) { |
| 266 | var url = goog.html.SAFE_MIME_TYPE_PATTERN_.test(blob.type) ? |
| 267 | goog.fs.url.createObjectUrl(blob) : goog.html.SafeUrl.INNOCUOUS_STRING; |
| 268 | return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url); |
| 269 | }; |
| 270 | |
| 271 | |
| 272 | /** |
| 273 | * Matches a base-64 data URL, with the first match group being the MIME type. |
| 274 | * @const |
| 275 | * @private |
| 276 | */ |
| 277 | goog.html.DATA_URL_PATTERN_ = /^data:([^;,]*);base64,[a-z0-9+\/]+=*$/i; |
| 278 | |
| 279 | |
| 280 | /** |
| 281 | * Creates a SafeUrl wrapping a data: URL, after validating it matches a |
| 282 | * known-safe image or video MIME type. |
| 283 | * |
| 284 | * @param {string} dataUrl A valid base64 data URL with one of the whitelisted |
| 285 | * image or video MIME types. |
| 286 | * @return {!goog.html.SafeUrl} A matching safe URL, or {@link INNOCUOUS_STRING} |
| 287 | * wrapped as a SafeUrl if it does not pass. |
| 288 | */ |
| 289 | goog.html.SafeUrl.fromDataUrl = function(dataUrl) { |
| 290 | // There's a slight risk here that a browser sniffs the content type if it |
| 291 | // doesn't know the MIME type and executes HTML within the data: URL. For this |
| 292 | // to cause XSS it would also have to execute the HTML in the same origin |
| 293 | // of the page with the link. It seems unlikely that both of these will |
| 294 | // happen, particularly in not really old IEs. |
| 295 | var match = dataUrl.match(goog.html.DATA_URL_PATTERN_); |
| 296 | var valid = match && goog.html.SAFE_MIME_TYPE_PATTERN_.test(match[1]); |
| 297 | return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse( |
| 298 | valid ? dataUrl : goog.html.SafeUrl.INNOCUOUS_STRING); |
| 299 | }; |
| 300 | |
| 301 | |
| 302 | /** |
| 303 | * A pattern that recognizes a commonly useful subset of URLs that satisfy |
| 304 | * the SafeUrl contract. |
| 305 | * |
| 306 | * This regular expression matches a subset of URLs that will not cause script |
| 307 | * execution if used in URL context within a HTML document. Specifically, this |
| 308 | * regular expression matches if (comment from here on and regex copied from |
| 309 | * Soy's EscapingConventions): |
| 310 | * (1) Either a protocol in a whitelist (http, https, mailto or ftp). |
| 311 | * (2) or no protocol. A protocol must be followed by a colon. The below |
| 312 | * allows that by allowing colons only after one of the characters [/?#]. |
| 313 | * A colon after a hash (#) must be in the fragment. |
| 314 | * Otherwise, a colon after a (?) must be in a query. |
| 315 | * Otherwise, a colon after a single solidus (/) must be in a path. |
| 316 | * Otherwise, a colon after a double solidus (//) must be in the authority |
| 317 | * (before port). |
| 318 | * |
| 319 | * The pattern disallows &, used in HTML entity declarations before |
| 320 | * one of the characters in [/?#]. This disallows HTML entities used in the |
| 321 | * protocol name, which should never happen, e.g. "http" for "http". |
| 322 | * It also disallows HTML entities in the first path part of a relative path, |
| 323 | * e.g. "foo<bar/baz". Our existing escaping functions should not produce |
| 324 | * that. More importantly, it disallows masking of a colon, |
| 325 | * e.g. "javascript:...". |
| 326 | * |
| 327 | * @private |
| 328 | * @const {!RegExp} |
| 329 | */ |
| 330 | goog.html.SAFE_URL_PATTERN_ = |
| 331 | /^(?:(?:https?|mailto|ftp):|[^&:/?#]*(?:[/?#]|$))/i; |
| 332 | |
| 333 | |
| 334 | /** |
| 335 | * Creates a SafeUrl object from {@code url}. If {@code url} is a |
| 336 | * goog.html.SafeUrl then it is simply returned. Otherwise the input string is |
| 337 | * validated to match a pattern of commonly used safe URLs. The string is |
| 338 | * converted to UTF-8 and non-whitelisted characters are percent-encoded. The |
| 339 | * string wrapped by the created SafeUrl will thus contain only ASCII printable |
| 340 | * characters. |
| 341 | * |
| 342 | * {@code url} may be a URL with the http, https, mailto or ftp scheme, |
| 343 | * or a relative URL (i.e., a URL without a scheme; specifically, a |
| 344 | * scheme-relative, absolute-path-relative, or path-relative URL). |
| 345 | * |
| 346 | * {@code url} is converted to UTF-8 and non-whitelisted characters are |
| 347 | * percent-encoded. Whitelisted characters are '%' and, from RFC 3986, |
| 348 | * unreserved characters and reserved characters, with the exception of '\'', |
| 349 | * '(' and ')'. This ensures the the SafeUrl contains only ASCII-printable |
| 350 | * characters and reduces the chance of security bugs were it to be |
| 351 | * interpolated into a specific context without the necessary escaping. |
| 352 | * |
| 353 | * If {@code url} fails validation or does not UTF-16 decode correctly |
| 354 | * (JavaScript strings are UTF-16 encoded), this function returns a SafeUrl |
| 355 | * object containing an innocuous string, goog.html.SafeUrl.INNOCUOUS_STRING. |
| 356 | * |
| 357 | * @see http://url.spec.whatwg.org/#concept-relative-url |
| 358 | * @param {string|!goog.string.TypedString} url The URL to validate. |
| 359 | * @return {!goog.html.SafeUrl} The validated URL, wrapped as a SafeUrl. |
| 360 | */ |
| 361 | goog.html.SafeUrl.sanitize = function(url) { |
| 362 | if (url instanceof goog.html.SafeUrl) { |
| 363 | return url; |
| 364 | } |
| 365 | else if (url.implementsGoogStringTypedString) { |
| 366 | url = url.getTypedStringValue(); |
| 367 | } else { |
| 368 | url = String(url); |
| 369 | } |
| 370 | if (!goog.html.SAFE_URL_PATTERN_.test(url)) { |
| 371 | url = goog.html.SafeUrl.INNOCUOUS_STRING; |
| 372 | } |
| 373 | return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url); |
| 374 | }; |
| 375 | |
| 376 | |
| 377 | /** |
| 378 | * Type marker for the SafeUrl type, used to implement additional run-time |
| 379 | * type checking. |
| 380 | * @const {!Object} |
| 381 | * @private |
| 382 | */ |
| 383 | goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ = {}; |
| 384 | |
| 385 | |
| 386 | /** |
| 387 | * Package-internal utility method to create SafeUrl instances. |
| 388 | * |
| 389 | * @param {string} url The string to initialize the SafeUrl object with. |
| 390 | * @return {!goog.html.SafeUrl} The initialized SafeUrl object. |
| 391 | * @package |
| 392 | */ |
| 393 | goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse = function( |
| 394 | url) { |
| 395 | var safeUrl = new goog.html.SafeUrl(); |
| 396 | safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = url; |
| 397 | return safeUrl; |
| 398 | }; |