UNPKG

3.99 kBJavaScriptView Raw
1'use strict';
2const url = require('url');
3const punycode = require('punycode');
4const queryString = require('query-string');
5const prependHttp = require('prepend-http');
6const sortKeys = require('sort-keys');
7
8const DEFAULT_PORTS = {
9 'http:': 80,
10 'https:': 443,
11 'ftp:': 21
12};
13
14// Protocols that always contain a `//`` bit
15const slashedProtocol = {
16 http: true,
17 https: true,
18 ftp: true,
19 gopher: true,
20 file: true,
21 'http:': true,
22 'https:': true,
23 'ftp:': true,
24 'gopher:': true,
25 'file:': true
26};
27
28function testParameter(name, filters) {
29 return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
30}
31
32module.exports = (str, opts) => {
33 opts = Object.assign({
34 normalizeProtocol: true,
35 normalizeHttps: false,
36 stripFragment: true,
37 stripWWW: true,
38 removeQueryParameters: [/^utm_\w+/i],
39 removeTrailingSlash: true,
40 removeDirectoryIndex: false,
41 sortQueryParameters: true
42 }, opts);
43
44 if (typeof str !== 'string') {
45 throw new TypeError('Expected a string');
46 }
47
48 const hasRelativeProtocol = str.startsWith('//');
49
50 // Prepend protocol
51 str = prependHttp(str.trim()).replace(/^\/\//, 'http://');
52
53 const urlObj = url.parse(str);
54
55 if (opts.normalizeHttps && urlObj.protocol === 'https:') {
56 urlObj.protocol = 'http:';
57 }
58
59 if (!urlObj.hostname && !urlObj.pathname) {
60 throw new Error('Invalid URL');
61 }
62
63 // Prevent these from being used by `url.format`
64 delete urlObj.host;
65 delete urlObj.query;
66
67 // Remove fragment
68 if (opts.stripFragment) {
69 delete urlObj.hash;
70 }
71
72 // Remove default port
73 const port = DEFAULT_PORTS[urlObj.protocol];
74 if (Number(urlObj.port) === port) {
75 delete urlObj.port;
76 }
77
78 // Remove duplicate slashes
79 if (urlObj.pathname) {
80 urlObj.pathname = urlObj.pathname.replace(/\/{2,}/g, '/');
81 }
82
83 // Decode URI octets
84 if (urlObj.pathname) {
85 urlObj.pathname = decodeURI(urlObj.pathname);
86 }
87
88 // Remove directory index
89 if (opts.removeDirectoryIndex === true) {
90 opts.removeDirectoryIndex = [/^index\.[a-z]+$/];
91 }
92
93 if (Array.isArray(opts.removeDirectoryIndex) && opts.removeDirectoryIndex.length > 0) {
94 let pathComponents = urlObj.pathname.split('/');
95 const lastComponent = pathComponents[pathComponents.length - 1];
96
97 if (testParameter(lastComponent, opts.removeDirectoryIndex)) {
98 pathComponents = pathComponents.slice(0, pathComponents.length - 1);
99 urlObj.pathname = pathComponents.slice(1).join('/') + '/';
100 }
101 }
102
103 // Resolve relative paths, but only for slashed protocols
104 if (slashedProtocol[urlObj.protocol]) {
105 const domain = urlObj.protocol + '//' + urlObj.hostname;
106 const relative = url.resolve(domain, urlObj.pathname);
107 urlObj.pathname = relative.replace(domain, '');
108 }
109
110 if (urlObj.hostname) {
111 // IDN to Unicode
112 urlObj.hostname = punycode.toUnicode(urlObj.hostname).toLowerCase();
113
114 // Remove trailing dot
115 urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
116
117 // Remove `www.`
118 if (opts.stripWWW) {
119 urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
120 }
121 }
122
123 // Remove URL with empty query string
124 if (urlObj.search === '?') {
125 delete urlObj.search;
126 }
127
128 const queryParameters = queryString.parse(urlObj.search);
129
130 // Remove query unwanted parameters
131 if (Array.isArray(opts.removeQueryParameters)) {
132 for (const key in queryParameters) {
133 if (testParameter(key, opts.removeQueryParameters)) {
134 delete queryParameters[key];
135 }
136 }
137 }
138
139 // Sort query parameters
140 if (opts.sortQueryParameters) {
141 urlObj.search = queryString.stringify(sortKeys(queryParameters));
142 }
143
144 // Decode query parameters
145 if (urlObj.search !== null) {
146 urlObj.search = decodeURIComponent(urlObj.search);
147 }
148
149 // Take advantage of many of the Node `url` normalizations
150 str = url.format(urlObj);
151
152 // Remove ending `/`
153 if (opts.removeTrailingSlash || urlObj.pathname === '/') {
154 str = str.replace(/\/$/, '');
155 }
156
157 // Restore relative protocol, if applicable
158 if (hasRelativeProtocol && !opts.normalizeProtocol) {
159 str = str.replace(/^http:\/\//, '//');
160 }
161
162 return str;
163};