| 1 | // Copyright 2013 The Closure Library Authors. All Rights Reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS-IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | /** |
| 16 | * @fileoverview The SafeUrl type and its builders. |
| 17 | * |
| 18 | * TODO(user): Link to document stating type contract. |
| 19 | */ |
| 20 | |
| 21 | goog.provide('goog.html.SafeUrl'); |
| 22 | |
| 23 | goog.require('goog.asserts'); |
| 24 | goog.require('goog.fs.url'); |
| 25 | goog.require('goog.i18n.bidi.Dir'); |
| 26 | goog.require('goog.i18n.bidi.DirectionalString'); |
| 27 | goog.require('goog.string.Const'); |
| 28 | goog.require('goog.string.TypedString'); |
| 29 | |
| 30 | |
| 31 | |
| 32 | /** |
| 33 | * A string that is safe to use in URL context in DOM APIs and HTML documents. |
| 34 | * |
| 35 | * A SafeUrl is a string-like object that carries the security type contract |
| 36 | * that its value as a string will not cause untrusted script execution |
| 37 | * when evaluated as a hyperlink URL in a browser. |
| 38 | * |
| 39 | * Values of this type are guaranteed to be safe to use in URL/hyperlink |
| 40 | * contexts, such as, assignment to URL-valued DOM properties, or |
| 41 | * interpolation into a HTML template in URL context (e.g., inside a href |
| 42 | * attribute), in the sense that the use will not result in a |
| 43 | * Cross-Site-Scripting vulnerability. |
| 44 | * |
| 45 | * Note that, as documented in {@code goog.html.SafeUrl.unwrap}, this type's |
| 46 | * contract does not guarantee that instances are safe to interpolate into HTML |
| 47 | * without appropriate escaping. |
| 48 | * |
| 49 | * Note also that this type's contract does not imply any guarantees regarding |
| 50 | * the resource the URL refers to. In particular, SafeUrls are <b>not</b> |
| 51 | * safe to use in a context where the referred-to resource is interpreted as |
| 52 | * trusted code, e.g., as the src of a script tag. |
| 53 | * |
| 54 | * Instances of this type must be created via the factory methods |
| 55 | * ({@code goog.html.SafeUrl.fromConstant}, {@code goog.html.SafeUrl.sanitize}), |
| 56 | * etc and not by invoking its constructor. The constructor intentionally |
| 57 | * takes no parameters and the type is immutable; hence only a default instance |
| 58 | * corresponding to the empty string can be obtained via constructor invocation. |
| 59 | * |
| 60 | * @see goog.html.SafeUrl#fromConstant |
| 61 | * @see goog.html.SafeUrl#from |
| 62 | * @see goog.html.SafeUrl#sanitize |
| 63 | * @constructor |
| 64 | * @final |
| 65 | * @struct |
| 66 | * @implements {goog.i18n.bidi.DirectionalString} |
| 67 | * @implements {goog.string.TypedString} |
| 68 | */ |
| 69 | goog.html.SafeUrl = function() { |
| 70 | /** |
| 71 | * The contained value of this SafeUrl. The field has a purposely ugly |
| 72 | * name to make (non-compiled) code that attempts to directly access this |
| 73 | * field stand out. |
| 74 | * @private {string} |
| 75 | */ |
| 76 | this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = ''; |
| 77 | |
| 78 | /** |
| 79 | * A type marker used to implement additional run-time type checking. |
| 80 | * @see goog.html.SafeUrl#unwrap |
| 81 | * @const |
| 82 | * @private |
| 83 | */ |
| 84 | this.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ = |
| 85 | goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_; |
| 86 | }; |
| 87 | |
| 88 | |
| 89 | /** |
| 90 | * The innocuous string generated by goog.html.SafeUrl.sanitize when passed |
| 91 | * an unsafe URL. |
| 92 | * |
| 93 | * about:invalid is registered in |
| 94 | * http://www.w3.org/TR/css3-values/#about-invalid. |
| 95 | * http://tools.ietf.org/html/rfc6694#section-2.2.1 permits about URLs to |
| 96 | * contain a fragment, which is not to be considered when determining if an |
| 97 | * about URL is well-known. |
| 98 | * |
| 99 | * Using about:invalid seems preferable to using a fixed data URL, since |
| 100 | * browsers might choose to not report CSP violations on it, as legitimate |
| 101 | * CSS function calls to attr() can result in this URL being produced. It is |
| 102 | * also a standard URL which matches exactly the semantics we need: |
| 103 | * "The about:invalid URI references a non-existent document with a generic |
| 104 | * error condition. It can be used when a URI is necessary, but the default |
| 105 | * value shouldn't be resolveable as any type of document". |
| 106 | * |
| 107 | * @const {string} |
| 108 | */ |
| 109 | goog.html.SafeUrl.INNOCUOUS_STRING = 'about:invalid#zClosurez'; |
| 110 | |
| 111 | |
| 112 | /** |
| 113 | * @override |
| 114 | * @const |
| 115 | */ |
| 116 | goog.html.SafeUrl.prototype.implementsGoogStringTypedString = true; |
| 117 | |
| 118 | |
| 119 | /** |
| 120 | * Returns this SafeUrl's value a string. |
| 121 | * |
| 122 | * IMPORTANT: In code where it is security relevant that an object's type is |
| 123 | * indeed {@code SafeUrl}, use {@code goog.html.SafeUrl.unwrap} instead of this |
| 124 | * method. If in doubt, assume that it's security relevant. In particular, note |
| 125 | * that goog.html functions which return a goog.html type do not guarantee that |
| 126 | * the returned instance is of the right type. For example: |
| 127 | * |
| 128 | * <pre> |
| 129 | * var fakeSafeHtml = new String('fake'); |
| 130 | * fakeSafeHtml.__proto__ = goog.html.SafeHtml.prototype; |
| 131 | * var newSafeHtml = goog.html.SafeHtml.htmlEscape(fakeSafeHtml); |
| 132 | * // newSafeHtml is just an alias for fakeSafeHtml, it's passed through by |
| 133 | * // goog.html.SafeHtml.htmlEscape() as fakeSafeHtml instanceof |
| 134 | * // goog.html.SafeHtml. |
| 135 | * </pre> |
| 136 | * |
| 137 | * IMPORTANT: The guarantees of the SafeUrl type contract only extend to the |
| 138 | * behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST |
| 139 | * be appropriately escaped before embedding in a HTML document. Note that the |
| 140 | * required escaping is context-sensitive (e.g. a different escaping is |
| 141 | * required for embedding a URL in a style property within a style |
| 142 | * attribute, as opposed to embedding in a href attribute). |
| 143 | * |
| 144 | * @see goog.html.SafeUrl#unwrap |
| 145 | * @override |
| 146 | */ |
| 147 | goog.html.SafeUrl.prototype.getTypedStringValue = function() { |
| 148 | return this.privateDoNotAccessOrElseSafeHtmlWrappedValue_; |
| 149 | }; |
| 150 | |
| 151 | |
| 152 | /** |
| 153 | * @override |
| 154 | * @const |
| 155 | */ |
| 156 | goog.html.SafeUrl.prototype.implementsGoogI18nBidiDirectionalString = true; |
| 157 | |
| 158 | |
| 159 | /** |
| 160 | * Returns this URLs directionality, which is always {@code LTR}. |
| 161 | * @override |
| 162 | */ |
| 163 | goog.html.SafeUrl.prototype.getDirection = function() { |
| 164 | return goog.i18n.bidi.Dir.LTR; |
| 165 | }; |
| 166 | |
| 167 | |
| 168 | if (goog.DEBUG) { |
| 169 | /** |
| 170 | * Returns a debug string-representation of this value. |
| 171 | * |
| 172 | * To obtain the actual string value wrapped in a SafeUrl, use |
| 173 | * {@code goog.html.SafeUrl.unwrap}. |
| 174 | * |
| 175 | * @see goog.html.SafeUrl#unwrap |
| 176 | * @override |
| 177 | */ |
| 178 | goog.html.SafeUrl.prototype.toString = function() { |
| 179 | return 'SafeUrl{' + this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ + |
| 180 | '}'; |
| 181 | }; |
| 182 | } |
| 183 | |
| 184 | |
| 185 | /** |
| 186 | * Performs a runtime check that the provided object is indeed a SafeUrl |
| 187 | * object, and returns its value. |
| 188 | * |
| 189 | * IMPORTANT: The guarantees of the SafeUrl type contract only extend to the |
| 190 | * behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST |
| 191 | * be appropriately escaped before embedding in a HTML document. Note that the |
| 192 | * required escaping is context-sensitive (e.g. a different escaping is |
| 193 | * required for embedding a URL in a style property within a style |
| 194 | * attribute, as opposed to embedding in a href attribute). |
| 195 | * |
| 196 | * Note that the returned value does not necessarily correspond to the string |
| 197 | * with which the SafeUrl was constructed, since goog.html.SafeUrl.sanitize |
| 198 | * will percent-encode many characters. |
| 199 | * |
| 200 | * @param {!goog.html.SafeUrl} safeUrl The object to extract from. |
| 201 | * @return {string} The SafeUrl object's contained string, unless the run-time |
| 202 | * type check fails. In that case, {@code unwrap} returns an innocuous |
| 203 | * string, or, if assertions are enabled, throws |
| 204 | * {@code goog.asserts.AssertionError}. |
| 205 | */ |
| 206 | goog.html.SafeUrl.unwrap = function(safeUrl) { |
| 207 | // Perform additional Run-time type-checking to ensure that safeUrl is indeed |
| 208 | // an instance of the expected type. This provides some additional protection |
| 209 | // against security bugs due to application code that disables type checks. |
| 210 | // Specifically, the following checks are performed: |
| 211 | // 1. The object is an instance of the expected type. |
| 212 | // 2. The object is not an instance of a subclass. |
| 213 | // 3. The object carries a type marker for the expected type. "Faking" an |
| 214 | // object requires a reference to the type marker, which has names intended |
| 215 | // to stand out in code reviews. |
| 216 | if (safeUrl instanceof goog.html.SafeUrl && |
| 217 | safeUrl.constructor === goog.html.SafeUrl && |
| 218 | safeUrl.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ === |
| 219 | goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_) { |
| 220 | return safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_; |
| 221 | } else { |
| 222 | goog.asserts.fail('expected object of type SafeUrl, got \'' + |
| 223 | safeUrl + '\''); |
| 224 | return 'type_error:SafeUrl'; |
| 225 | |
| 226 | } |
| 227 | }; |
| 228 | |
| 229 | |
| 230 | /** |
| 231 | * Creates a SafeUrl object from a compile-time constant string. |
| 232 | * |
| 233 | * Compile-time constant strings are inherently program-controlled and hence |
| 234 | * trusted. |
| 235 | * |
| 236 | * @param {!goog.string.Const} url A compile-time-constant string from which to |
| 237 | * create a SafeUrl. |
| 238 | * @return {!goog.html.SafeUrl} A SafeUrl object initialized to {@code url}. |
| 239 | */ |
| 240 | goog.html.SafeUrl.fromConstant = function(url) { |
| 241 | return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse( |
| 242 | goog.string.Const.unwrap(url)); |
| 243 | }; |
| 244 | |
| 245 | |
| 246 | /** |
| 247 | * A pattern that matches Blob types that can have SafeUrls created from |
| 248 | * URL.createObjectURL(blob). Only matches image types, currently. |
| 249 | * @const |
| 250 | * @private |
| 251 | */ |
| 252 | goog.html.SAFE_BLOB_TYPE_PATTERN_ = |
| 253 | /^image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)$/i; |
| 254 | |
| 255 | |
| 256 | /** |
| 257 | * Creates a SafeUrl wrapping a blob URL for the given {@code blob}. The |
| 258 | * blob URL is created with {@code URL.createObjectURL}. If the MIME type |
| 259 | * for {@code blob} is not of a known safe image MIME type, then the |
| 260 | * SafeUrl will wrap {@link #INNOCUOUS_STRING}. |
| 261 | * @see http://www.w3.org/TR/FileAPI/#url |
| 262 | * @param {!Blob} blob |
| 263 | * @return {!goog.html.SafeUrl} The blob URL, or an innocuous string wrapped |
| 264 | * as a SafeUrl. |
| 265 | */ |
| 266 | goog.html.SafeUrl.fromBlob = function(blob) { |
| 267 | var url = goog.html.SAFE_BLOB_TYPE_PATTERN_.test(blob.type) ? |
| 268 | goog.fs.url.createObjectUrl(blob) : goog.html.SafeUrl.INNOCUOUS_STRING; |
| 269 | return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url); |
| 270 | }; |
| 271 | |
| 272 | |
| 273 | /** |
| 274 | * A pattern that recognizes a commonly useful subset of URLs that satisfy |
| 275 | * the SafeUrl contract. |
| 276 | * |
| 277 | * This regular expression matches a subset of URLs that will not cause script |
| 278 | * execution if used in URL context within a HTML document. Specifically, this |
| 279 | * regular expression matches if (comment from here on and regex copied from |
| 280 | * Soy's EscapingConventions): |
| 281 | * (1) Either a protocol in a whitelist (http, https, mailto or ftp). |
| 282 | * (2) or no protocol. A protocol must be followed by a colon. The below |
| 283 | * allows that by allowing colons only after one of the characters [/?#]. |
| 284 | * A colon after a hash (#) must be in the fragment. |
| 285 | * Otherwise, a colon after a (?) must be in a query. |
| 286 | * Otherwise, a colon after a single solidus (/) must be in a path. |
| 287 | * Otherwise, a colon after a double solidus (//) must be in the authority |
| 288 | * (before port). |
| 289 | * |
| 290 | * The pattern disallows &, used in HTML entity declarations before |
| 291 | * one of the characters in [/?#]. This disallows HTML entities used in the |
| 292 | * protocol name, which should never happen, e.g. "http" for "http". |
| 293 | * It also disallows HTML entities in the first path part of a relative path, |
| 294 | * e.g. "foo<bar/baz". Our existing escaping functions should not produce |
| 295 | * that. More importantly, it disallows masking of a colon, |
| 296 | * e.g. "javascript:...". |
| 297 | * |
| 298 | * @private |
| 299 | * @const {!RegExp} |
| 300 | */ |
| 301 | goog.html.SAFE_URL_PATTERN_ = |
| 302 | /^(?:(?:https?|mailto|ftp):|[^&:/?#]*(?:[/?#]|$))/i; |
| 303 | |
| 304 | |
| 305 | /** |
| 306 | * Creates a SafeUrl object from {@code url}. If {@code url} is a |
| 307 | * goog.html.SafeUrl then it is simply returned. Otherwise the input string is |
| 308 | * validated to match a pattern of commonly used safe URLs. The string is |
| 309 | * converted to UTF-8 and non-whitelisted characters are percent-encoded. The |
| 310 | * string wrapped by the created SafeUrl will thus contain only ASCII printable |
| 311 | * characters. |
| 312 | * |
| 313 | * {@code url} may be a URL with the http, https, mailto or ftp scheme, |
| 314 | * or a relative URL (i.e., a URL without a scheme; specifically, a |
| 315 | * scheme-relative, absolute-path-relative, or path-relative URL). |
| 316 | * |
| 317 | * {@code url} is converted to UTF-8 and non-whitelisted characters are |
| 318 | * percent-encoded. Whitelisted characters are '%' and, from RFC 3986, |
| 319 | * unreserved characters and reserved characters, with the exception of '\'', |
| 320 | * '(' and ')'. This ensures the the SafeUrl contains only ASCII-printable |
| 321 | * characters and reduces the chance of security bugs were it to be |
| 322 | * interpolated into a specific context without the necessary escaping. |
| 323 | * |
| 324 | * If {@code url} fails validation or does not UTF-16 decode correctly |
| 325 | * (JavaScript strings are UTF-16 encoded), this function returns a SafeUrl |
| 326 | * object containing an innocuous string, goog.html.SafeUrl.INNOCUOUS_STRING. |
| 327 | * |
| 328 | * @see http://url.spec.whatwg.org/#concept-relative-url |
| 329 | * @param {string|!goog.string.TypedString} url The URL to validate. |
| 330 | * @return {!goog.html.SafeUrl} The validated URL, wrapped as a SafeUrl. |
| 331 | */ |
| 332 | goog.html.SafeUrl.sanitize = function(url) { |
| 333 | if (url instanceof goog.html.SafeUrl) { |
| 334 | return url; |
| 335 | } |
| 336 | else if (url.implementsGoogStringTypedString) { |
| 337 | url = url.getTypedStringValue(); |
| 338 | } else { |
| 339 | url = String(url); |
| 340 | } |
| 341 | if (!goog.html.SAFE_URL_PATTERN_.test(url)) { |
| 342 | url = goog.html.SafeUrl.INNOCUOUS_STRING; |
| 343 | } else { |
| 344 | url = goog.html.SafeUrl.normalize_(url); |
| 345 | } |
| 346 | return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url); |
| 347 | }; |
| 348 | |
| 349 | |
| 350 | /** |
| 351 | * Normalizes {@code url} the UTF-8 encoding of url, using a whitelist of |
| 352 | * characters. Whitelisted characters are not percent-encoded. |
| 353 | * @param {string} url The URL to normalize. |
| 354 | * @return {string} The normalized URL. |
| 355 | * @private |
| 356 | */ |
| 357 | goog.html.SafeUrl.normalize_ = function(url) { |
| 358 | try { |
| 359 | var normalized = encodeURI(url); |
| 360 | } catch (e) { // Happens if url contains invalid surrogate sequences. |
| 361 | return goog.html.SafeUrl.INNOCUOUS_STRING; |
| 362 | } |
| 363 | |
| 364 | return normalized.replace( |
| 365 | goog.html.SafeUrl.NORMALIZE_MATCHER_, |
| 366 | function(match) { |
| 367 | return goog.html.SafeUrl.NORMALIZE_REPLACER_MAP_[match]; |
| 368 | }); |
| 369 | }; |
| 370 | |
| 371 | |
| 372 | /** |
| 373 | * Matches characters and strings which need to be replaced in the string |
| 374 | * generated by encodeURI. Specifically: |
| 375 | * |
| 376 | * - '\'', '(' and ')' are not encoded. They are part of the reserved |
| 377 | * characters group in RFC 3986 but only appear in the obsolete mark |
| 378 | * production in Appendix D.2 of RFC 3986, so they can be encoded without |
| 379 | * changing semantics. |
| 380 | * - '[' and ']' are encoded by encodeURI, despite being reserved characters |
| 381 | * which can be used to represent IPv6 addresses. So they need to be decoded. |
| 382 | * - '%' is encoded by encodeURI. However, encoding '%' characters that are |
| 383 | * already part of a valid percent-encoded sequence changes the semantics of a |
| 384 | * URL, and hence we need to preserve them. Note that this may allow |
| 385 | * non-encoded '%' characters to remain in the URL (i.e., occurrences of '%' |
| 386 | * that are not part of a valid percent-encoded sequence, for example, |
| 387 | * 'ab%xy'). |
| 388 | * |
| 389 | * @const {!RegExp} |
| 390 | * @private |
| 391 | */ |
| 392 | goog.html.SafeUrl.NORMALIZE_MATCHER_ = /[()']|%5B|%5D|%25/g; |
| 393 | |
| 394 | |
| 395 | /** |
| 396 | * Map of replacements to be done in string generated by encodeURI. |
| 397 | * @const {!Object<string, string>} |
| 398 | * @private |
| 399 | */ |
| 400 | goog.html.SafeUrl.NORMALIZE_REPLACER_MAP_ = { |
| 401 | '\'': '%27', |
| 402 | '(': '%28', |
| 403 | ')': '%29', |
| 404 | '%5B': '[', |
| 405 | '%5D': ']', |
| 406 | '%25': '%' |
| 407 | }; |
| 408 | |
| 409 | |
| 410 | /** |
| 411 | * Type marker for the SafeUrl type, used to implement additional run-time |
| 412 | * type checking. |
| 413 | * @const |
| 414 | * @private |
| 415 | */ |
| 416 | goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ = {}; |
| 417 | |
| 418 | |
| 419 | /** |
| 420 | * Package-internal utility method to create SafeUrl instances. |
| 421 | * |
| 422 | * @param {string} url The string to initialize the SafeUrl object with. |
| 423 | * @return {!goog.html.SafeUrl} The initialized SafeUrl object. |
| 424 | * @package |
| 425 | */ |
| 426 | goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse = function( |
| 427 | url) { |
| 428 | var safeUrl = new goog.html.SafeUrl(); |
| 429 | safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = url; |
| 430 | return safeUrl; |
| 431 | }; |