UNPKG

4.43 kBJavaScriptView Raw
1'use strict';
2// TODO: Use the `URL` global when targeting Node.js 10
3const URLParser = typeof URL === 'undefined' ? require('url').URL : URL;
4
5const testParameter = (name, filters) => {
6 return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
7};
8
9module.exports = (urlString, options) => {
10 options = {
11 defaultProtocol: 'http:',
12 normalizeProtocol: true,
13 forceHttp: false,
14 forceHttps: false,
15 stripAuthentication: true,
16 stripHash: false,
17 stripWWW: true,
18 removeQueryParameters: [/^utm_\w+/i],
19 removeTrailingSlash: true,
20 removeDirectoryIndex: false,
21 sortQueryParameters: true,
22 ...options
23 };
24
25 // TODO: Remove this at some point in the future
26 if (Reflect.has(options, 'normalizeHttps')) {
27 throw new Error('options.normalizeHttps is renamed to options.forceHttp');
28 }
29
30 if (Reflect.has(options, 'normalizeHttp')) {
31 throw new Error('options.normalizeHttp is renamed to options.forceHttps');
32 }
33
34 if (Reflect.has(options, 'stripFragment')) {
35 throw new Error('options.stripFragment is renamed to options.stripHash');
36 }
37
38 urlString = urlString.trim();
39
40 const hasRelativeProtocol = urlString.startsWith('//');
41 const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);
42
43 // Prepend protocol
44 if (!isRelativeUrl) {
45 urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol);
46 }
47
48 const urlObj = new URLParser(urlString);
49
50 if (options.forceHttp && options.forceHttps) {
51 throw new Error('The `forceHttp` and `forceHttps` options cannot be used together');
52 }
53
54 if (options.forceHttp && urlObj.protocol === 'https:') {
55 urlObj.protocol = 'http:';
56 }
57
58 if (options.forceHttps && urlObj.protocol === 'http:') {
59 urlObj.protocol = 'https:';
60 }
61
62 // Remove auth
63 if (options.stripAuthentication) {
64 urlObj.username = '';
65 urlObj.password = '';
66 }
67
68 // Remove hash
69 if (options.stripHash) {
70 urlObj.hash = '';
71 }
72
73 // Remove duplicate slashes if not preceded by a protocol
74 if (urlObj.pathname) {
75 // TODO: Use the following instead when targeting Node.js 10
76 // `urlObj.pathname = urlObj.pathname.replace(/(?<!https?:)\/{2,}/g, '/');`
77 urlObj.pathname = urlObj.pathname.replace(/((?!:).|^)\/{2,}/g, (_, p1) => {
78 if (/^(?!\/)/g.test(p1)) {
79 return `${p1}/`;
80 }
81 return '/';
82 });
83 }
84
85 // Decode URI octets
86 if (urlObj.pathname) {
87 urlObj.pathname = decodeURI(urlObj.pathname);
88 }
89
90 // Remove directory index
91 if (options.removeDirectoryIndex === true) {
92 options.removeDirectoryIndex = [/^index\.[a-z]+$/];
93 }
94
95 if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) {
96 let pathComponents = urlObj.pathname.split('/');
97 const lastComponent = pathComponents[pathComponents.length - 1];
98
99 if (testParameter(lastComponent, options.removeDirectoryIndex)) {
100 pathComponents = pathComponents.slice(0, pathComponents.length - 1);
101 urlObj.pathname = pathComponents.slice(1).join('/') + '/';
102 }
103 }
104
105 if (urlObj.hostname) {
106 // Remove trailing dot
107 urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
108
109 // Remove `www.`
110 if (options.stripWWW && /^www\.([a-z\-\d]{2,63})\.([a-z.]{2,5})$/.test(urlObj.hostname)) {
111 // Each label should be max 63 at length (min: 2).
112 // The extension should be max 5 at length (min: 2).
113 // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
114 urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
115 }
116 }
117
118 // Remove query unwanted parameters
119 if (Array.isArray(options.removeQueryParameters)) {
120 for (const key of [...urlObj.searchParams.keys()]) {
121 if (testParameter(key, options.removeQueryParameters)) {
122 urlObj.searchParams.delete(key);
123 }
124 }
125 }
126
127 // Sort query parameters
128 if (options.sortQueryParameters) {
129 urlObj.searchParams.sort();
130 }
131
132 if (options.removeTrailingSlash) {
133 urlObj.pathname = urlObj.pathname.replace(/\/$/, '');
134 }
135
136 // Take advantage of many of the Node `url` normalizations
137 urlString = urlObj.toString();
138
139 // Remove ending `/`
140 if ((options.removeTrailingSlash || urlObj.pathname === '/') && urlObj.hash === '') {
141 urlString = urlString.replace(/\/$/, '');
142 }
143
144 // Restore relative protocol, if applicable
145 if (hasRelativeProtocol && !options.normalizeProtocol) {
146 urlString = urlString.replace(/^http:\/\//, '//');
147 }
148
149 // Remove http/https
150 if (options.stripProtocol) {
151 urlString = urlString.replace(/^(?:https?:)?\/\//, '');
152 }
153
154 return urlString;
155};