UNPKG

14.3 kBPlain TextView Raw
1// Copyright 2021 Fastly, Inc.
2
3import { SPECIAL_SCHEMES, throwInvalidUrlError } from "./util";
4
5import { URLProperties } from "./url-properties";
6
7export class URLParser {
8 static isAbsoluteUrl(url: string): boolean {
9 if (url.startsWith("//")) {
10 // Protocol Relative URL
11 return false;
12 }
13 if (url.indexOf(":") > 0) {
14 return true;
15 }
16
17 return false;
18 }
19
20 static applySchemeOrPathRelativeUrl(
21 relativeUrl: string,
22 urlProps: URLProperties
23 ): void {
24 // We already have our absolute URL when this is called
25 // So we just need to work off the existing properties
26
27 // Figure out our type of relative URL
28 // https://url.spec.whatwg.org/#relative-url-string
29
30 if (relativeUrl.startsWith("//")) {
31 // Protocol (scheme) relative URLs
32 let urlAfterAuth = URLParser.parseAuth(
33 relativeUrl.substring(2),
34 urlProps
35 );
36 let urlAfterHost = URLParser.parseHost(urlAfterAuth, urlProps);
37 let urlAfterPath = URLParser.parsePath(urlAfterHost, urlProps);
38 let urlAfterSearch = URLParser.parseSearch(urlAfterPath, urlProps);
39 URLParser.parseHash(urlAfterSearch, urlProps);
40 return;
41 }
42
43 // Check if we need to preserve the original pathname
44 if (relativeUrl.startsWith(".")) {
45 // Do Nothing
46 } else {
47 urlProps.pathname = "";
48 urlProps.search = "";
49 urlProps.hash = "";
50 }
51
52 // Must be a path relative URL
53 URLParser.applyPathRelativeUrl(relativeUrl, urlProps);
54 }
55
56 static applyPathRelativeUrl(
57 relativeUrl: string,
58 urlProps: URLProperties
59 ): void {
60 // We already have our absolute URL when this is called
61 // So we just need to work off the existing properties
62
63 // Handle path navigation in path relative URLs (e.g, ../ or ./) or path absolute urls (e.g, /path/absolute)
64 let appliedRelativeUrl = urlProps.pathname;
65 if (relativeUrl.startsWith("/")) {
66 appliedRelativeUrl = relativeUrl;
67 } else {
68 if (!appliedRelativeUrl.endsWith("/") && !relativeUrl.startsWith("/")) {
69 appliedRelativeUrl += "/" + relativeUrl;
70 } else {
71 appliedRelativeUrl += relativeUrl;
72 }
73 }
74
75 // Ensure our navigation identifiers have the correct slashes
76 if (appliedRelativeUrl.endsWith(".")) {
77 appliedRelativeUrl += "/";
78 }
79
80 // Remove any filler navigation (e.g ./)
81 while (appliedRelativeUrl.includes("/./")) {
82 appliedRelativeUrl = appliedRelativeUrl.replace("/./", "/");
83 }
84
85 // Do any parent navigation
86 while (appliedRelativeUrl.includes("/../")) {
87 let parentDirectoryIndex = appliedRelativeUrl.indexOf("../");
88
89 // > 1 because the leading slash will be there
90 if (parentDirectoryIndex > 1) {
91 // Remove this directory, and the one before
92 let parentIndex = appliedRelativeUrl.lastIndexOf(
93 "/",
94 parentDirectoryIndex - 2
95 );
96 let parentReplaceTerm = appliedRelativeUrl.slice(
97 parentIndex,
98 parentDirectoryIndex + 3
99 );
100
101 appliedRelativeUrl = appliedRelativeUrl.replace(parentReplaceTerm, "/");
102 } else {
103 throw new Error(
104 "Relative url " +
105 relativeUrl +
106 " cannot be applied to the url " +
107 urlProps.toString()
108 );
109 }
110 }
111
112 relativeUrl = appliedRelativeUrl;
113 if (!relativeUrl.startsWith("/")) {
114 relativeUrl = "/" + relativeUrl;
115 }
116
117 // We hit a normal path relative URL (e.g hello/goodbye/)
118 // Just continue on our current path
119 let urlAfterHost = relativeUrl + urlProps.search + urlProps.hash;
120 let urlAfterPath = URLParser.parsePath(urlAfterHost, urlProps);
121 let urlAfterSearch = URLParser.parseSearch(urlAfterPath, urlProps);
122 URLParser.parseHash(urlAfterSearch, urlProps);
123
124 // Also, if our original relative URL had a trailing slash, we need to re-add that trailing slash
125 if (relativeUrl.endsWith("/") && !urlProps.pathname.endsWith("/")) {
126 urlProps.pathname += "/";
127 }
128
129 return;
130 }
131
132 static parseAbsoluteUrl(absoluteUrl: string, urlProps: URLProperties): void {
133 // For file URLs, we should replace | with :
134 if (absoluteUrl.startsWith("file:")) {
135 absoluteUrl = absoluteUrl.replaceAll("|", ":");
136 }
137
138 // Chain our independent parsing functions
139 let urlAfterProtocol = URLParser.parseProtocol(absoluteUrl, urlProps);
140 let urlAfterAuth = URLParser.parseAuth(urlAfterProtocol, urlProps);
141 let urlAfterHost = URLParser.parseHost(urlAfterAuth, urlProps);
142
143 // Now we are at the path, let's apply the relative URL on top of our empty path
144 if (urlAfterHost.length > 0) {
145 urlProps.pathname = "";
146 urlProps.search = "";
147 urlProps.hash = "";
148
149 // Lets remove all parent navigation as they are ignored on absolute URLs
150 while (urlAfterHost.startsWith("/../")) {
151 urlAfterHost = urlAfterHost.replace("/../", "/");
152 }
153
154 URLParser.applyPathRelativeUrl(urlAfterHost, urlProps);
155 }
156 }
157
158 // Takes in an absolute URL,
159 // Apply the URL protocol from the absolute url
160 // and returns a partial url with everything after the protocol (auth, host, pathname, search, hash).
161 static parseProtocol(absoluteUrl: string, urlProps: URLProperties): string {
162 // Get the protocol and remaining URL
163 let protocolIndex = absoluteUrl.indexOf(":");
164
165 // Find where the slashes end after the :
166 if (protocolIndex > -1) {
167 urlProps.protocol = absoluteUrl.substring(0, protocolIndex + 1);
168
169 // Files need to have :// , and the path starts at the third slash
170 // Ignore anything in between
171 if (urlProps.protocol == "file:") {
172 let absoluteUrlNoProtocol = absoluteUrl.replace(
173 urlProps.protocol + "//",
174 ""
175 );
176
177 if (absoluteUrlNoProtocol.indexOf("/") > -1) {
178 return absoluteUrlNoProtocol.substring(
179 absoluteUrlNoProtocol.indexOf("/")
180 );
181 } else {
182 return "";
183 }
184 }
185
186 let protocolEndIndex = protocolIndex + 1;
187 while (
188 absoluteUrl.charAt(protocolEndIndex) == "/" &&
189 protocolEndIndex < absoluteUrl.length - 1
190 ) {
191 protocolEndIndex++;
192 }
193
194 return absoluteUrl.substring(protocolEndIndex);
195 }
196
197 // Did not have a protocol
198 return absoluteUrl;
199 }
200
201 // Takes in a partial URL without the protocol
202 // Applies auth from the partial URL (if there is one)
203 // Returns a partial URL with everything after auth (host, pathname, search, hash).
204 static parseAuth(urlAfterProtocol: string, urlProps: URLProperties): string {
205 // Next, try to get a username and password
206 let authIndex = urlAfterProtocol.indexOf("@");
207 // This will require finding out the remaining pieces of the url
208 // We can check for the path, because the host wont neccesariy have a
209 // '.' for the domain (localhost) or a ':' for the port
210 let pathIndex = urlAfterProtocol.indexOf("/");
211 // @ must proceed the path (/) as stated in the W3 URI Spec
212 if (authIndex > 0 && (pathIndex == -1 || authIndex < pathIndex)) {
213 let auth = urlAfterProtocol.substring(0, authIndex);
214
215 if (auth.includes(":")) {
216 let authSplit = auth.split(":");
217 urlProps.username = authSplit[0];
218 urlProps.password = authSplit[1];
219 } else {
220 urlProps.username = auth;
221 }
222
223 // Return the remaining url
224 return urlAfterProtocol.substring(auth.length + 1);
225 }
226
227 // If there was no auth, just return the url
228 return urlAfterProtocol;
229 }
230
231 // Takes in a partial URL without the protocol or auth
232 // Applies the host from the partial url
233 // Returns a partial URL with everything after the host (pathname, search, hash).
234 static parseHost(urlAfterAuth: string, urlProps: URLProperties): string {
235 // Create our return value
236 let urlAfterHost = "";
237
238 // Next let's get the hostname and port
239 // This will require finding out the remaining pieces of the url
240 let hostnameAndPort = "";
241 let pathIndex = urlAfterAuth.indexOf("/");
242 let searchIndex = urlAfterAuth.indexOf("?");
243 let hashIndex = urlAfterAuth.indexOf("#");
244 if (pathIndex > -1) {
245 hostnameAndPort = urlAfterAuth.substring(0, pathIndex);
246 urlAfterHost = urlAfterAuth.substring(pathIndex);
247 } else if (searchIndex > -1) {
248 hostnameAndPort = urlAfterAuth.substring(0, searchIndex);
249 urlAfterHost = urlAfterAuth.substring(searchIndex);
250 } else if (hashIndex > -1) {
251 hostnameAndPort = urlAfterAuth.substring(0, hashIndex);
252 urlAfterHost = urlAfterAuth.substring(hashIndex);
253 } else {
254 hostnameAndPort = urlAfterAuth;
255 urlAfterHost = "";
256 }
257
258 let hostname = "";
259 let port = "";
260
261 if (hostnameAndPort.includes("[")) {
262 // This could be an ipv6 address
263 // https://url.spec.whatwg.org/#host-writing
264
265 if (!hostnameAndPort.startsWith("[") || !hostnameAndPort.endsWith("]")) {
266 throwInvalidUrlError();
267 }
268
269 let splitAddress = hostnameAndPort.split(":");
270 if (splitAddress.length != 8) {
271 throwInvalidUrlError();
272 }
273
274 hostname = hostnameAndPort;
275 } else if (hostnameAndPort.includes(":")) {
276 let hostnameAndPortSplit = hostnameAndPort.split(":");
277 hostname = hostnameAndPortSplit[0];
278
279 // The port must be a number, so try to parse it
280 let portOrNaN = F32.parseInt(hostnameAndPortSplit[1], 10);
281 if (isNaN(portOrNaN) || portOrNaN <= 0 || portOrNaN >= 65536) {
282 throwInvalidUrlError();
283 }
284
285 port = I32.parseInt(hostnameAndPortSplit[1], 10).toString();
286 } else {
287 hostname = hostnameAndPort;
288 }
289
290 // Ensure that port is null if the port is a default port per the protocol
291 // https://url.spec.whatwg.org/#default-port
292 if (
293 port.length > 0 &&
294 ((urlProps.protocol == "ftp:" && port == "22") ||
295 (urlProps.protocol == "http:" && port == "80") ||
296 (urlProps.protocol == "https:" && port == "443") ||
297 (urlProps.protocol == "ws:" && port == "80") ||
298 (urlProps.protocol == "wss:" && port == "443"))
299 ) {
300 port = "";
301 }
302
303 urlProps.hostname = hostname;
304 urlProps.port = port;
305
306 // return our resulting URL
307 return urlAfterHost;
308 }
309
310 // Takes in a parital URL without the protocol, auth, or host
311 // Applies the path from the partial url
312 // Returns a partial URL with everything after the path (search, hash).
313 static parsePath(urlAfterHost: string, urlProps: URLProperties): string {
314 if (urlAfterHost.length == 0) {
315 return "";
316 }
317
318 let pathIndex = urlAfterHost.indexOf("/");
319 let searchIndex = urlAfterHost.indexOf("?");
320 let hashIndex = urlAfterHost.indexOf("#");
321
322 if (pathIndex > -1) {
323 // Get the pathname
324 if (searchIndex > -1) {
325 urlProps.pathname = urlAfterHost.substring(0, searchIndex);
326 } else if (hashIndex > -1) {
327 urlProps.pathname = urlAfterHost.substring(0, hashIndex);
328 } else {
329 urlProps.pathname = urlAfterHost;
330 }
331
332 // Remove any trailing slash, if the character before is not a slash
333 if (
334 urlProps.pathname.endsWith("/") &&
335 !urlProps.pathname.endsWith("//")
336 ) {
337 urlProps.pathname = urlProps.pathname.slice(
338 0,
339 urlProps.pathname.length - 1
340 );
341 }
342
343 // Return the reamaining string without the path
344 if (searchIndex > -1) {
345 return urlAfterHost.substring(searchIndex);
346 } else if (hashIndex > -1) {
347 return urlAfterHost.substring(hashIndex);
348 } else {
349 return "";
350 }
351 }
352
353 // Just return the string if there was no path
354 return urlAfterHost;
355 }
356
357 // Takes in a partial URL without the protocol, auth, host, or path
358 // Applies the search from the partial url
359 // Returns a partial URL with everything after the search (hash).
360 static parseSearch(urlAfterPath: string, urlProps: URLProperties): string {
361 if (urlAfterPath.length == 0) {
362 return "";
363 }
364
365 let searchIndex = urlAfterPath.indexOf("?");
366 let hashIndex = urlAfterPath.indexOf("#");
367 if (searchIndex > -1) {
368 if (hashIndex > -1) {
369 urlProps.search = urlAfterPath.substring(0, hashIndex);
370 return urlAfterPath.substring(hashIndex);
371 } else {
372 urlProps.search = urlAfterPath;
373 return "";
374 }
375 }
376
377 // Just return the string if there was no search
378 return urlAfterPath;
379 }
380
381 // Takes in a partial URL without the protocol, auth, host, path, or search (Only a hash)
382 // Applies the hash from the partial url
383 static parseHash(urlAfterSearch: string, urlProps: URLProperties): void {
384 let hashIndex = urlAfterSearch.indexOf("#");
385 if (urlAfterSearch.length > 0 && hashIndex > -1) {
386 urlProps.hash = urlAfterSearch.substring(hashIndex);
387 }
388 }
389
390 static validateUrl(urlProps: URLProperties): void {
391 // Hostname checks
392
393 if (urlProps.hostname.includes(".")) {
394 // Check if the hostname is a domain or IPv4 address
395
396 if (!SPECIAL_SCHEMES.includes(urlProps.protocol)) {
397 // This is an invlid URL according to the spec:
398 // https://url.spec.whatwg.org/#url-representation
399 // However, this is supported by node and chrome:
400 // https://nodejs.org/api/url.html#url_special_schemes
401 // Do Nothing, instead of throwing the error below:
402 // throw new Error("Failed to construct 'URL': Invalid URL");
403 }
404 } else if (urlProps.hostname == "") {
405 // Check for empty host
406
407 if (
408 SPECIAL_SCHEMES.includes(urlProps.protocol) &&
409 urlProps.protocol != "file:"
410 ) {
411 throwInvalidUrlError();
412 }
413 } else {
414 // Must be an opaque host (e.g localhost), or ipv6
415 if (urlProps.hostname.includes("[")) {
416 // This url is ipv6, we are good!
417 } else if (
418 SPECIAL_SCHEMES.includes(urlProps.protocol) &&
419 urlProps.protocol != "http:" &&
420 urlProps.protocol != "https:"
421 ) {
422 // We do not want to allow special schemes for opaque hosts,
423 // but for opaque hosts like localhost, http: and https: is valid in v8.
424 // Thus, we should allow those, but not other special schemes.
425 throwInvalidUrlError();
426 }
427 }
428
429 // The Url is valid!
430 }
431}