UNPKG

16.6 kBJavaScriptView Raw
1'use strict';
2
3var required = require('requires-port')
4 , qs = require('querystringify')
5 , controlOrWhitespace = /^[\x00-\x20\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]+/
6 , CRHTLF = /[\n\r\t]/g
7 , slashes = /^[A-Za-z][A-Za-z0-9+-.]*:\/\//
8 , port = /:\d+$/
9 , protocolre = /^([a-z][a-z0-9.+-]*:)?(\/\/)?([\\/]+)?([\S\s]*)/i
10 , windowsDriveLetter = /^[a-zA-Z]:/;
11
12/**
13 * Remove control characters and whitespace from the beginning of a string.
14 *
15 * @param {Object|String} str String to trim.
16 * @returns {String} A new string representing `str` stripped of control
17 * characters and whitespace from its beginning.
18 * @public
19 */
20function trimLeft(str) {
21 return (str ? str : '').toString().replace(controlOrWhitespace, '');
22}
23
24/**
25 * These are the parse rules for the URL parser, it informs the parser
26 * about:
27 *
28 * 0. The char it Needs to parse, if it's a string it should be done using
29 * indexOf, RegExp using exec and NaN means set as current value.
30 * 1. The property we should set when parsing this value.
31 * 2. Indication if it's backwards or forward parsing, when set as number it's
32 * the value of extra chars that should be split off.
33 * 3. Inherit from location if non existing in the parser.
34 * 4. `toLowerCase` the resulting value.
35 */
36var rules = [
37 ['#', 'hash'], // Extract from the back.
38 ['?', 'query'], // Extract from the back.
39 function sanitize(address, url) { // Sanitize what is left of the address
40 return isSpecial(url.protocol) ? address.replace(/\\/g, '/') : address;
41 },
42 ['/', 'pathname'], // Extract from the back.
43 ['@', 'auth', 1], // Extract from the front.
44 [NaN, 'host', undefined, 1, 1], // Set left over value.
45 [/:(\d*)$/, 'port', undefined, 1], // RegExp the back.
46 [NaN, 'hostname', undefined, 1, 1] // Set left over.
47];
48
49/**
50 * These properties should not be copied or inherited from. This is only needed
51 * for all non blob URL's as a blob URL does not include a hash, only the
52 * origin.
53 *
54 * @type {Object}
55 * @private
56 */
57var ignore = { hash: 1, query: 1 };
58
59/**
60 * The location object differs when your code is loaded through a normal page,
61 * Worker or through a worker using a blob. And with the blobble begins the
62 * trouble as the location object will contain the URL of the blob, not the
63 * location of the page where our code is loaded in. The actual origin is
64 * encoded in the `pathname` so we can thankfully generate a good "default"
65 * location from it so we can generate proper relative URL's again.
66 *
67 * @param {Object|String} loc Optional default location object.
68 * @returns {Object} lolcation object.
69 * @public
70 */
71function lolcation(loc) {
72 var globalVar;
73
74 if (typeof window !== 'undefined') globalVar = window;
75 else if (typeof global !== 'undefined') globalVar = global;
76 else if (typeof self !== 'undefined') globalVar = self;
77 else globalVar = {};
78
79 var location = globalVar.location || {};
80 loc = loc || location;
81
82 var finaldestination = {}
83 , type = typeof loc
84 , key;
85
86 if ('blob:' === loc.protocol) {
87 finaldestination = new Url(unescape(loc.pathname), {});
88 } else if ('string' === type) {
89 finaldestination = new Url(loc, {});
90 for (key in ignore) delete finaldestination[key];
91 } else if ('object' === type) {
92 for (key in loc) {
93 if (key in ignore) continue;
94 finaldestination[key] = loc[key];
95 }
96
97 if (finaldestination.slashes === undefined) {
98 finaldestination.slashes = slashes.test(loc.href);
99 }
100 }
101
102 return finaldestination;
103}
104
105/**
106 * Check whether a protocol scheme is special.
107 *
108 * @param {String} The protocol scheme of the URL
109 * @return {Boolean} `true` if the protocol scheme is special, else `false`
110 * @private
111 */
112function isSpecial(scheme) {
113 return (
114 scheme === 'file:' ||
115 scheme === 'ftp:' ||
116 scheme === 'http:' ||
117 scheme === 'https:' ||
118 scheme === 'ws:' ||
119 scheme === 'wss:'
120 );
121}
122
123/**
124 * @typedef ProtocolExtract
125 * @type Object
126 * @property {String} protocol Protocol matched in the URL, in lowercase.
127 * @property {Boolean} slashes `true` if protocol is followed by "//", else `false`.
128 * @property {String} rest Rest of the URL that is not part of the protocol.
129 */
130
131/**
132 * Extract protocol information from a URL with/without double slash ("//").
133 *
134 * @param {String} address URL we want to extract from.
135 * @param {Object} location
136 * @return {ProtocolExtract} Extracted information.
137 * @private
138 */
139function extractProtocol(address, location) {
140 address = trimLeft(address);
141 address = address.replace(CRHTLF, '');
142 location = location || {};
143
144 var match = protocolre.exec(address);
145 var protocol = match[1] ? match[1].toLowerCase() : '';
146 var forwardSlashes = !!match[2];
147 var otherSlashes = !!match[3];
148 var slashesCount = 0;
149 var rest;
150
151 if (forwardSlashes) {
152 if (otherSlashes) {
153 rest = match[2] + match[3] + match[4];
154 slashesCount = match[2].length + match[3].length;
155 } else {
156 rest = match[2] + match[4];
157 slashesCount = match[2].length;
158 }
159 } else {
160 if (otherSlashes) {
161 rest = match[3] + match[4];
162 slashesCount = match[3].length;
163 } else {
164 rest = match[4]
165 }
166 }
167
168 if (protocol === 'file:') {
169 if (slashesCount >= 2) {
170 rest = rest.slice(2);
171 }
172 } else if (isSpecial(protocol)) {
173 rest = match[4];
174 } else if (protocol) {
175 if (forwardSlashes) {
176 rest = rest.slice(2);
177 }
178 } else if (slashesCount >= 2 && isSpecial(location.protocol)) {
179 rest = match[4];
180 }
181
182 return {
183 protocol: protocol,
184 slashes: forwardSlashes || isSpecial(protocol),
185 slashesCount: slashesCount,
186 rest: rest
187 };
188}
189
190/**
191 * Resolve a relative URL pathname against a base URL pathname.
192 *
193 * @param {String} relative Pathname of the relative URL.
194 * @param {String} base Pathname of the base URL.
195 * @return {String} Resolved pathname.
196 * @private
197 */
198function resolve(relative, base) {
199 if (relative === '') return base;
200
201 var path = (base || '/').split('/').slice(0, -1).concat(relative.split('/'))
202 , i = path.length
203 , last = path[i - 1]
204 , unshift = false
205 , up = 0;
206
207 while (i--) {
208 if (path[i] === '.') {
209 path.splice(i, 1);
210 } else if (path[i] === '..') {
211 path.splice(i, 1);
212 up++;
213 } else if (up) {
214 if (i === 0) unshift = true;
215 path.splice(i, 1);
216 up--;
217 }
218 }
219
220 if (unshift) path.unshift('');
221 if (last === '.' || last === '..') path.push('');
222
223 return path.join('/');
224}
225
226/**
227 * The actual URL instance. Instead of returning an object we've opted-in to
228 * create an actual constructor as it's much more memory efficient and
229 * faster and it pleases my OCD.
230 *
231 * It is worth noting that we should not use `URL` as class name to prevent
232 * clashes with the global URL instance that got introduced in browsers.
233 *
234 * @constructor
235 * @param {String} address URL we want to parse.
236 * @param {Object|String} [location] Location defaults for relative paths.
237 * @param {Boolean|Function} [parser] Parser for the query string.
238 * @private
239 */
240function Url(address, location, parser) {
241 address = trimLeft(address);
242 address = address.replace(CRHTLF, '');
243
244 if (!(this instanceof Url)) {
245 return new Url(address, location, parser);
246 }
247
248 var relative, extracted, parse, instruction, index, key
249 , instructions = rules.slice()
250 , type = typeof location
251 , url = this
252 , i = 0;
253
254 //
255 // The following if statements allows this module two have compatibility with
256 // 2 different API:
257 //
258 // 1. Node.js's `url.parse` api which accepts a URL, boolean as arguments
259 // where the boolean indicates that the query string should also be parsed.
260 //
261 // 2. The `URL` interface of the browser which accepts a URL, object as
262 // arguments. The supplied object will be used as default values / fall-back
263 // for relative paths.
264 //
265 if ('object' !== type && 'string' !== type) {
266 parser = location;
267 location = null;
268 }
269
270 if (parser && 'function' !== typeof parser) parser = qs.parse;
271
272 location = lolcation(location);
273
274 //
275 // Extract protocol information before running the instructions.
276 //
277 extracted = extractProtocol(address || '', location);
278 relative = !extracted.protocol && !extracted.slashes;
279 url.slashes = extracted.slashes || relative && location.slashes;
280 url.protocol = extracted.protocol || location.protocol || '';
281 address = extracted.rest;
282
283 //
284 // When the authority component is absent the URL starts with a path
285 // component.
286 //
287 if (
288 extracted.protocol === 'file:' && (
289 extracted.slashesCount !== 2 || windowsDriveLetter.test(address)) ||
290 (!extracted.slashes &&
291 (extracted.protocol ||
292 extracted.slashesCount < 2 ||
293 !isSpecial(url.protocol)))
294 ) {
295 instructions[3] = [/(.*)/, 'pathname'];
296 }
297
298 for (; i < instructions.length; i++) {
299 instruction = instructions[i];
300
301 if (typeof instruction === 'function') {
302 address = instruction(address, url);
303 continue;
304 }
305
306 parse = instruction[0];
307 key = instruction[1];
308
309 if (parse !== parse) {
310 url[key] = address;
311 } else if ('string' === typeof parse) {
312 index = parse === '@'
313 ? address.lastIndexOf(parse)
314 : address.indexOf(parse);
315
316 if (~index) {
317 if ('number' === typeof instruction[2]) {
318 url[key] = address.slice(0, index);
319 address = address.slice(index + instruction[2]);
320 } else {
321 url[key] = address.slice(index);
322 address = address.slice(0, index);
323 }
324 }
325 } else if ((index = parse.exec(address))) {
326 url[key] = index[1];
327 address = address.slice(0, index.index);
328 }
329
330 url[key] = url[key] || (
331 relative && instruction[3] ? location[key] || '' : ''
332 );
333
334 //
335 // Hostname, host and protocol should be lowercased so they can be used to
336 // create a proper `origin`.
337 //
338 if (instruction[4]) url[key] = url[key].toLowerCase();
339 }
340
341 //
342 // Also parse the supplied query string in to an object. If we're supplied
343 // with a custom parser as function use that instead of the default build-in
344 // parser.
345 //
346 if (parser) url.query = parser(url.query);
347
348 //
349 // If the URL is relative, resolve the pathname against the base URL.
350 //
351 if (
352 relative
353 && location.slashes
354 && url.pathname.charAt(0) !== '/'
355 && (url.pathname !== '' || location.pathname !== '')
356 ) {
357 url.pathname = resolve(url.pathname, location.pathname);
358 }
359
360 //
361 // Default to a / for pathname if none exists. This normalizes the URL
362 // to always have a /
363 //
364 if (url.pathname.charAt(0) !== '/' && isSpecial(url.protocol)) {
365 url.pathname = '/' + url.pathname;
366 }
367
368 //
369 // We should not add port numbers if they are already the default port number
370 // for a given protocol. As the host also contains the port number we're going
371 // override it with the hostname which contains no port number.
372 //
373 if (!required(url.port, url.protocol)) {
374 url.host = url.hostname;
375 url.port = '';
376 }
377
378 //
379 // Parse down the `auth` for the username and password.
380 //
381 url.username = url.password = '';
382
383 if (url.auth) {
384 index = url.auth.indexOf(':');
385
386 if (~index) {
387 url.username = url.auth.slice(0, index);
388 url.username = encodeURIComponent(decodeURIComponent(url.username));
389
390 url.password = url.auth.slice(index + 1);
391 url.password = encodeURIComponent(decodeURIComponent(url.password))
392 } else {
393 url.username = encodeURIComponent(decodeURIComponent(url.auth));
394 }
395
396 url.auth = url.password ? url.username +':'+ url.password : url.username;
397 }
398
399 url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host
400 ? url.protocol +'//'+ url.host
401 : 'null';
402
403 //
404 // The href is just the compiled result.
405 //
406 url.href = url.toString();
407}
408
409/**
410 * This is convenience method for changing properties in the URL instance to
411 * insure that they all propagate correctly.
412 *
413 * @param {String} part Property we need to adjust.
414 * @param {Mixed} value The newly assigned value.
415 * @param {Boolean|Function} fn When setting the query, it will be the function
416 * used to parse the query.
417 * When setting the protocol, double slash will be
418 * removed from the final url if it is true.
419 * @returns {URL} URL instance for chaining.
420 * @public
421 */
422function set(part, value, fn) {
423 var url = this;
424
425 switch (part) {
426 case 'query':
427 if ('string' === typeof value && value.length) {
428 value = (fn || qs.parse)(value);
429 }
430
431 url[part] = value;
432 break;
433
434 case 'port':
435 url[part] = value;
436
437 if (!required(value, url.protocol)) {
438 url.host = url.hostname;
439 url[part] = '';
440 } else if (value) {
441 url.host = url.hostname +':'+ value;
442 }
443
444 break;
445
446 case 'hostname':
447 url[part] = value;
448
449 if (url.port) value += ':'+ url.port;
450 url.host = value;
451 break;
452
453 case 'host':
454 url[part] = value;
455
456 if (port.test(value)) {
457 value = value.split(':');
458 url.port = value.pop();
459 url.hostname = value.join(':');
460 } else {
461 url.hostname = value;
462 url.port = '';
463 }
464
465 break;
466
467 case 'protocol':
468 url.protocol = value.toLowerCase();
469 url.slashes = !fn;
470 break;
471
472 case 'pathname':
473 case 'hash':
474 if (value) {
475 var char = part === 'pathname' ? '/' : '#';
476 url[part] = value.charAt(0) !== char ? char + value : value;
477 } else {
478 url[part] = value;
479 }
480 break;
481
482 case 'username':
483 case 'password':
484 url[part] = encodeURIComponent(value);
485 break;
486
487 case 'auth':
488 var index = value.indexOf(':');
489
490 if (~index) {
491 url.username = value.slice(0, index);
492 url.username = encodeURIComponent(decodeURIComponent(url.username));
493
494 url.password = value.slice(index + 1);
495 url.password = encodeURIComponent(decodeURIComponent(url.password));
496 } else {
497 url.username = encodeURIComponent(decodeURIComponent(value));
498 }
499 }
500
501 for (var i = 0; i < rules.length; i++) {
502 var ins = rules[i];
503
504 if (ins[4]) url[ins[1]] = url[ins[1]].toLowerCase();
505 }
506
507 url.auth = url.password ? url.username +':'+ url.password : url.username;
508
509 url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host
510 ? url.protocol +'//'+ url.host
511 : 'null';
512
513 url.href = url.toString();
514
515 return url;
516}
517
518/**
519 * Transform the properties back in to a valid and full URL string.
520 *
521 * @param {Function} stringify Optional query stringify function.
522 * @returns {String} Compiled version of the URL.
523 * @public
524 */
525function toString(stringify) {
526 if (!stringify || 'function' !== typeof stringify) stringify = qs.stringify;
527
528 var query
529 , url = this
530 , host = url.host
531 , protocol = url.protocol;
532
533 if (protocol && protocol.charAt(protocol.length - 1) !== ':') protocol += ':';
534
535 var result =
536 protocol +
537 ((url.protocol && url.slashes) || isSpecial(url.protocol) ? '//' : '');
538
539 if (url.username) {
540 result += url.username;
541 if (url.password) result += ':'+ url.password;
542 result += '@';
543 } else if (url.password) {
544 result += ':'+ url.password;
545 result += '@';
546 } else if (
547 url.protocol !== 'file:' &&
548 isSpecial(url.protocol) &&
549 !host &&
550 url.pathname !== '/'
551 ) {
552 //
553 // Add back the empty userinfo, otherwise the original invalid URL
554 // might be transformed into a valid one with `url.pathname` as host.
555 //
556 result += '@';
557 }
558
559 //
560 // Trailing colon is removed from `url.host` when it is parsed. If it still
561 // ends with a colon, then add back the trailing colon that was removed. This
562 // prevents an invalid URL from being transformed into a valid one.
563 //
564 if (host[host.length - 1] === ':' || (port.test(url.hostname) && !url.port)) {
565 host += ':';
566 }
567
568 result += host + url.pathname;
569
570 query = 'object' === typeof url.query ? stringify(url.query) : url.query;
571 if (query) result += '?' !== query.charAt(0) ? '?'+ query : query;
572
573 if (url.hash) result += url.hash;
574
575 return result;
576}
577
578Url.prototype = { set: set, toString: toString };
579
580//
581// Expose the URL parser and some additional properties that might be useful for
582// others or testing.
583//
584Url.extractProtocol = extractProtocol;
585Url.location = lolcation;
586Url.trimLeft = trimLeft;
587Url.qs = qs;
588
589module.exports = Url;