// Copyright 2021 Fastly, Inc. import { SPECIAL_SCHEMES, throwInvalidUrlError } from "./util"; import { URLProperties } from "./url-properties"; export class URLParser { static isAbsoluteUrl(url: string): boolean { if (url.startsWith("//")) { // Protocol Relative URL return false; } if (url.indexOf(":") > 0) { return true; } return false; } static applySchemeOrPathRelativeUrl( relativeUrl: string, urlProps: URLProperties ): void { // We already have our absolute URL when this is called // So we just need to work off the existing properties // Figure out our type of relative URL // https://url.spec.whatwg.org/#relative-url-string if (relativeUrl.startsWith("//")) { // Protocol (scheme) relative URLs let urlAfterAuth = URLParser.parseAuth( relativeUrl.substring(2), urlProps ); let urlAfterHost = URLParser.parseHost(urlAfterAuth, urlProps); let urlAfterPath = URLParser.parsePath(urlAfterHost, urlProps); let urlAfterSearch = URLParser.parseSearch(urlAfterPath, urlProps); URLParser.parseHash(urlAfterSearch, urlProps); return; } // Check if we need to preserve the original pathname if (relativeUrl.startsWith(".")) { // Do Nothing } else { urlProps.pathname = ""; urlProps.search = ""; urlProps.hash = ""; } // Must be a path relative URL URLParser.applyPathRelativeUrl(relativeUrl, urlProps); } static applyPathRelativeUrl( relativeUrl: string, urlProps: URLProperties ): void { // We already have our absolute URL when this is called // So we just need to work off the existing properties // Handle path navigation in path relative URLs (e.g, ../ or ./) or path absolute urls (e.g, /path/absolute) let appliedRelativeUrl = urlProps.pathname; if (relativeUrl.startsWith("/")) { appliedRelativeUrl = relativeUrl; } else { if (!appliedRelativeUrl.endsWith("/") && !relativeUrl.startsWith("/")) { appliedRelativeUrl += "/" + relativeUrl; } else { appliedRelativeUrl += relativeUrl; } } // Ensure our navigation identifiers have the correct slashes if (appliedRelativeUrl.endsWith(".")) { appliedRelativeUrl += "/"; } // Remove any filler navigation (e.g ./) while (appliedRelativeUrl.includes("/./")) { appliedRelativeUrl = appliedRelativeUrl.replace("/./", "/"); } // Do any parent navigation while (appliedRelativeUrl.includes("/../")) { let parentDirectoryIndex = appliedRelativeUrl.indexOf("../"); // > 1 because the leading slash will be there if (parentDirectoryIndex > 1) { // Remove this directory, and the one before let parentIndex = appliedRelativeUrl.lastIndexOf( "/", parentDirectoryIndex - 2 ); let parentReplaceTerm = appliedRelativeUrl.slice( parentIndex, parentDirectoryIndex + 3 ); appliedRelativeUrl = appliedRelativeUrl.replace(parentReplaceTerm, "/"); } else { throw new Error( "Relative url " + relativeUrl + " cannot be applied to the url " + urlProps.toString() ); } } relativeUrl = appliedRelativeUrl; if (!relativeUrl.startsWith("/")) { relativeUrl = "/" + relativeUrl; } // We hit a normal path relative URL (e.g hello/goodbye/) // Just continue on our current path let urlAfterHost = relativeUrl + urlProps.search + urlProps.hash; let urlAfterPath = URLParser.parsePath(urlAfterHost, urlProps); let urlAfterSearch = URLParser.parseSearch(urlAfterPath, urlProps); URLParser.parseHash(urlAfterSearch, urlProps); // Also, if our original relative URL had a trailing slash, we need to re-add that trailing slash if (relativeUrl.endsWith("/") && !urlProps.pathname.endsWith("/")) { urlProps.pathname += "/"; } return; } static parseAbsoluteUrl(absoluteUrl: string, urlProps: URLProperties): void { // For file URLs, we should replace | with : if (absoluteUrl.startsWith("file:")) { absoluteUrl = absoluteUrl.replaceAll("|", ":"); } // Chain our independent parsing functions let urlAfterProtocol = URLParser.parseProtocol(absoluteUrl, urlProps); let urlAfterAuth = URLParser.parseAuth(urlAfterProtocol, urlProps); let urlAfterHost = URLParser.parseHost(urlAfterAuth, urlProps); // Now we are at the path, let's apply the relative URL on top of our empty path if (urlAfterHost.length > 0) { urlProps.pathname = ""; urlProps.search = ""; urlProps.hash = ""; // Lets remove all parent navigation as they are ignored on absolute URLs while (urlAfterHost.startsWith("/../")) { urlAfterHost = urlAfterHost.replace("/../", "/"); } URLParser.applyPathRelativeUrl(urlAfterHost, urlProps); } } // Takes in an absolute URL, // Apply the URL protocol from the absolute url // and returns a partial url with everything after the protocol (auth, host, pathname, search, hash). static parseProtocol(absoluteUrl: string, urlProps: URLProperties): string { // Get the protocol and remaining URL let protocolIndex = absoluteUrl.indexOf(":"); // Find where the slashes end after the : if (protocolIndex > -1) { urlProps.protocol = absoluteUrl.substring(0, protocolIndex + 1); // Files need to have :// , and the path starts at the third slash // Ignore anything in between if (urlProps.protocol == "file:") { let absoluteUrlNoProtocol = absoluteUrl.replace( urlProps.protocol + "//", "" ); if (absoluteUrlNoProtocol.indexOf("/") > -1) { return absoluteUrlNoProtocol.substring( absoluteUrlNoProtocol.indexOf("/") ); } else { return ""; } } let protocolEndIndex = protocolIndex + 1; while ( absoluteUrl.charAt(protocolEndIndex) == "/" && protocolEndIndex < absoluteUrl.length - 1 ) { protocolEndIndex++; } return absoluteUrl.substring(protocolEndIndex); } // Did not have a protocol return absoluteUrl; } // Takes in a partial URL without the protocol // Applies auth from the partial URL (if there is one) // Returns a partial URL with everything after auth (host, pathname, search, hash). static parseAuth(urlAfterProtocol: string, urlProps: URLProperties): string { // Next, try to get a username and password let authIndex = urlAfterProtocol.indexOf("@"); // This will require finding out the remaining pieces of the url // We can check for the path, because the host wont neccesariy have a // '.' for the domain (localhost) or a ':' for the port let pathIndex = urlAfterProtocol.indexOf("/"); // @ must proceed the path (/) as stated in the W3 URI Spec if (authIndex > 0 && (pathIndex == -1 || authIndex < pathIndex)) { let auth = urlAfterProtocol.substring(0, authIndex); if (auth.includes(":")) { let authSplit = auth.split(":"); urlProps.username = authSplit[0]; urlProps.password = authSplit[1]; } else { urlProps.username = auth; } // Return the remaining url return urlAfterProtocol.substring(auth.length + 1); } // If there was no auth, just return the url return urlAfterProtocol; } // Takes in a partial URL without the protocol or auth // Applies the host from the partial url // Returns a partial URL with everything after the host (pathname, search, hash). static parseHost(urlAfterAuth: string, urlProps: URLProperties): string { // Create our return value let urlAfterHost = ""; // Next let's get the hostname and port // This will require finding out the remaining pieces of the url let hostnameAndPort = ""; let pathIndex = urlAfterAuth.indexOf("/"); let searchIndex = urlAfterAuth.indexOf("?"); let hashIndex = urlAfterAuth.indexOf("#"); if (pathIndex > -1) { hostnameAndPort = urlAfterAuth.substring(0, pathIndex); urlAfterHost = urlAfterAuth.substring(pathIndex); } else if (searchIndex > -1) { hostnameAndPort = urlAfterAuth.substring(0, searchIndex); urlAfterHost = urlAfterAuth.substring(searchIndex); } else if (hashIndex > -1) { hostnameAndPort = urlAfterAuth.substring(0, hashIndex); urlAfterHost = urlAfterAuth.substring(hashIndex); } else { hostnameAndPort = urlAfterAuth; urlAfterHost = ""; } let hostname = ""; let port = ""; if (hostnameAndPort.includes("[")) { // This could be an ipv6 address // https://url.spec.whatwg.org/#host-writing if (!hostnameAndPort.startsWith("[") || !hostnameAndPort.endsWith("]")) { throwInvalidUrlError(); } let splitAddress = hostnameAndPort.split(":"); if (splitAddress.length != 8) { throwInvalidUrlError(); } hostname = hostnameAndPort; } else if (hostnameAndPort.includes(":")) { let hostnameAndPortSplit = hostnameAndPort.split(":"); hostname = hostnameAndPortSplit[0]; // The port must be a number, so try to parse it let portOrNaN = F32.parseInt(hostnameAndPortSplit[1], 10); if (isNaN(portOrNaN) || portOrNaN <= 0 || portOrNaN >= 65536) { throwInvalidUrlError(); } port = I32.parseInt(hostnameAndPortSplit[1], 10).toString(); } else { hostname = hostnameAndPort; } // Ensure that port is null if the port is a default port per the protocol // https://url.spec.whatwg.org/#default-port if ( port.length > 0 && ((urlProps.protocol == "ftp:" && port == "22") || (urlProps.protocol == "http:" && port == "80") || (urlProps.protocol == "https:" && port == "443") || (urlProps.protocol == "ws:" && port == "80") || (urlProps.protocol == "wss:" && port == "443")) ) { port = ""; } urlProps.hostname = hostname; urlProps.port = port; // return our resulting URL return urlAfterHost; } // Takes in a parital URL without the protocol, auth, or host // Applies the path from the partial url // Returns a partial URL with everything after the path (search, hash). static parsePath(urlAfterHost: string, urlProps: URLProperties): string { if (urlAfterHost.length == 0) { return ""; } let pathIndex = urlAfterHost.indexOf("/"); let searchIndex = urlAfterHost.indexOf("?"); let hashIndex = urlAfterHost.indexOf("#"); if (pathIndex > -1) { // Get the pathname if (searchIndex > -1) { urlProps.pathname = urlAfterHost.substring(0, searchIndex); } else if (hashIndex > -1) { urlProps.pathname = urlAfterHost.substring(0, hashIndex); } else { urlProps.pathname = urlAfterHost; } // Remove any trailing slash, if the character before is not a slash if ( urlProps.pathname.endsWith("/") && !urlProps.pathname.endsWith("//") ) { urlProps.pathname = urlProps.pathname.slice( 0, urlProps.pathname.length - 1 ); } // Return the reamaining string without the path if (searchIndex > -1) { return urlAfterHost.substring(searchIndex); } else if (hashIndex > -1) { return urlAfterHost.substring(hashIndex); } else { return ""; } } // Just return the string if there was no path return urlAfterHost; } // Takes in a partial URL without the protocol, auth, host, or path // Applies the search from the partial url // Returns a partial URL with everything after the search (hash). static parseSearch(urlAfterPath: string, urlProps: URLProperties): string { if (urlAfterPath.length == 0) { return ""; } let searchIndex = urlAfterPath.indexOf("?"); let hashIndex = urlAfterPath.indexOf("#"); if (searchIndex > -1) { if (hashIndex > -1) { urlProps.search = urlAfterPath.substring(0, hashIndex); return urlAfterPath.substring(hashIndex); } else { urlProps.search = urlAfterPath; return ""; } } // Just return the string if there was no search return urlAfterPath; } // Takes in a partial URL without the protocol, auth, host, path, or search (Only a hash) // Applies the hash from the partial url static parseHash(urlAfterSearch: string, urlProps: URLProperties): void { let hashIndex = urlAfterSearch.indexOf("#"); if (urlAfterSearch.length > 0 && hashIndex > -1) { urlProps.hash = urlAfterSearch.substring(hashIndex); } } static validateUrl(urlProps: URLProperties): void { // Hostname checks if (urlProps.hostname.includes(".")) { // Check if the hostname is a domain or IPv4 address if (!SPECIAL_SCHEMES.includes(urlProps.protocol)) { // This is an invlid URL according to the spec: // https://url.spec.whatwg.org/#url-representation // However, this is supported by node and chrome: // https://nodejs.org/api/url.html#url_special_schemes // Do Nothing, instead of throwing the error below: // throw new Error("Failed to construct 'URL': Invalid URL"); } } else if (urlProps.hostname == "") { // Check for empty host if ( SPECIAL_SCHEMES.includes(urlProps.protocol) && urlProps.protocol != "file:" ) { throwInvalidUrlError(); } } else { // Must be an opaque host (e.g localhost), or ipv6 if (urlProps.hostname.includes("[")) { // This url is ipv6, we are good! } else if ( SPECIAL_SCHEMES.includes(urlProps.protocol) && urlProps.protocol != "http:" && urlProps.protocol != "https:" ) { // We do not want to allow special schemes for opaque hosts, // but for opaque hosts like localhost, http: and https: is valid in v8. // Thus, we should allow those, but not other special schemes. throwInvalidUrlError(); } } // The Url is valid! } }