UNPKG

5.9 kBJavaScriptView Raw
1'use strict';
2
3// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
4const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain';
5const DATA_URL_DEFAULT_CHARSET = 'us-ascii';
6
7const testParameter = (name, filters) => {
8 return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
9};
10
11const normalizeDataURL = (urlString, {stripHash}) => {
12 const match = /^data:(?<type>.*?),(?<data>.*?)(?:#(?<hash>.*))?$/.exec(urlString);
13
14 if (!match) {
15 throw new Error(`Invalid URL: ${urlString}`);
16 }
17
18 let {type, data, hash} = match.groups;
19 const mediaType = type.split(';');
20 hash = stripHash ? '' : hash;
21
22 let isBase64 = false;
23 if (mediaType[mediaType.length - 1] === 'base64') {
24 mediaType.pop();
25 isBase64 = true;
26 }
27
28 // Lowercase MIME type
29 const mimeType = (mediaType.shift() || '').toLowerCase();
30 const attributes = mediaType
31 .map(attribute => {
32 let [key, value = ''] = attribute.split('=').map(string => string.trim());
33
34 // Lowercase `charset`
35 if (key === 'charset') {
36 value = value.toLowerCase();
37
38 if (value === DATA_URL_DEFAULT_CHARSET) {
39 return '';
40 }
41 }
42
43 return `${key}${value ? `=${value}` : ''}`;
44 })
45 .filter(Boolean);
46
47 const normalizedMediaType = [
48 ...attributes
49 ];
50
51 if (isBase64) {
52 normalizedMediaType.push('base64');
53 }
54
55 if (normalizedMediaType.length !== 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) {
56 normalizedMediaType.unshift(mimeType);
57 }
58
59 return `data:${normalizedMediaType.join(';')},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ''}`;
60};
61
62const normalizeUrl = (urlString, options) => {
63 options = {
64 defaultProtocol: 'http:',
65 normalizeProtocol: true,
66 forceHttp: false,
67 forceHttps: false,
68 stripAuthentication: true,
69 stripHash: false,
70 stripTextFragment: true,
71 stripWWW: true,
72 removeQueryParameters: [/^utm_\w+/i],
73 removeTrailingSlash: true,
74 removeSingleSlash: true,
75 removeDirectoryIndex: false,
76 sortQueryParameters: true,
77 ...options
78 };
79
80 urlString = urlString.trim();
81
82 // Data URL
83 if (/^data:/i.test(urlString)) {
84 return normalizeDataURL(urlString, options);
85 }
86
87 if (/^view-source:/i.test(urlString)) {
88 throw new Error('`view-source:` is not supported as it is a non-standard protocol');
89 }
90
91 const hasRelativeProtocol = urlString.startsWith('//');
92 const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);
93
94 // Prepend protocol
95 if (!isRelativeUrl) {
96 urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol);
97 }
98
99 const urlObj = new URL(urlString);
100
101 if (options.forceHttp && options.forceHttps) {
102 throw new Error('The `forceHttp` and `forceHttps` options cannot be used together');
103 }
104
105 if (options.forceHttp && urlObj.protocol === 'https:') {
106 urlObj.protocol = 'http:';
107 }
108
109 if (options.forceHttps && urlObj.protocol === 'http:') {
110 urlObj.protocol = 'https:';
111 }
112
113 // Remove auth
114 if (options.stripAuthentication) {
115 urlObj.username = '';
116 urlObj.password = '';
117 }
118
119 // Remove hash
120 if (options.stripHash) {
121 urlObj.hash = '';
122 } else if (options.stripTextFragment) {
123 urlObj.hash = urlObj.hash.replace(/#?:~:text.*?$/i, '');
124 }
125
126 // Remove duplicate slashes if not preceded by a protocol
127 if (urlObj.pathname) {
128 urlObj.pathname = urlObj.pathname.replace(/(?<!\b(?:[a-z][a-z\d+\-.]{1,50}:))\/{2,}/g, '/');
129 }
130
131 // Decode URI octets
132 if (urlObj.pathname) {
133 try {
134 urlObj.pathname = decodeURI(urlObj.pathname);
135 } catch (_) {}
136 }
137
138 // Remove directory index
139 if (options.removeDirectoryIndex === true) {
140 options.removeDirectoryIndex = [/^index\.[a-z]+$/];
141 }
142
143 if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) {
144 let pathComponents = urlObj.pathname.split('/');
145 const lastComponent = pathComponents[pathComponents.length - 1];
146
147 if (testParameter(lastComponent, options.removeDirectoryIndex)) {
148 pathComponents = pathComponents.slice(0, pathComponents.length - 1);
149 urlObj.pathname = pathComponents.slice(1).join('/') + '/';
150 }
151 }
152
153 if (urlObj.hostname) {
154 // Remove trailing dot
155 urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
156
157 // Remove `www.`
158 if (options.stripWWW && /^www\.(?!www\.)(?:[a-z\-\d]{1,63})\.(?:[a-z.\-\d]{2,63})$/.test(urlObj.hostname)) {
159 // Each label should be max 63 at length (min: 1).
160 // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
161 // Each TLD should be up to 63 characters long (min: 2).
162 // It is technically possible to have a single character TLD, but none currently exist.
163 urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
164 }
165 }
166
167 // Remove query unwanted parameters
168 if (Array.isArray(options.removeQueryParameters)) {
169 for (const key of [...urlObj.searchParams.keys()]) {
170 if (testParameter(key, options.removeQueryParameters)) {
171 urlObj.searchParams.delete(key);
172 }
173 }
174 }
175
176 // Sort query parameters
177 if (options.sortQueryParameters) {
178 urlObj.searchParams.sort();
179 }
180
181 if (options.removeTrailingSlash) {
182 urlObj.pathname = urlObj.pathname.replace(/\/$/, '');
183 }
184
185 const oldUrlString = urlString;
186
187 // Take advantage of many of the Node `url` normalizations
188 urlString = urlObj.toString();
189
190 if (!options.removeSingleSlash && urlObj.pathname === '/' && !oldUrlString.endsWith('/') && urlObj.hash === '') {
191 urlString = urlString.replace(/\/$/, '');
192 }
193
194 // Remove ending `/` unless removeSingleSlash is false
195 if ((options.removeTrailingSlash || urlObj.pathname === '/') && urlObj.hash === '' && options.removeSingleSlash) {
196 urlString = urlString.replace(/\/$/, '');
197 }
198
199 // Restore relative protocol, if applicable
200 if (hasRelativeProtocol && !options.normalizeProtocol) {
201 urlString = urlString.replace(/^http:\/\//, '//');
202 }
203
204 // Remove http/https
205 if (options.stripProtocol) {
206 urlString = urlString.replace(/^(?:https?:)?\/\//, '');
207 }
208
209 return urlString;
210};
211
212module.exports = normalizeUrl;