UNPKG

autolinker/dist/commonjs/parser/uri-utils.js

Version:

9.2 kBJavaScriptView Raw

1"use strict";
2Object.defineProperty(exports, "__esModule", { value: true });
3exports.isValidIpV4Address = exports.isValidTldMatch = exports.isValidSchemeUrl = exports.isKnownTld = exports.isUrlSuffixStartChar = exports.isPathChar = exports.isDomainLabelChar = exports.isDomainLabelStartChar = exports.isSchemeChar = exports.isSchemeStartChar = exports.tldUrlHostRe = exports.schemeUrlRe = exports.invalidSchemeRe = exports.urlSuffixedCharsNotAllowedAtEndRe = exports.httpSchemePrefixRe = exports.httpSchemeRe = exports.urlSuffixNotAllowedAsLastCharRe = exports.urlSuffixAllowedSpecialCharsRe = exports.urlSuffixStartCharsRe = exports.domainNameCharRegex = void 0;
4var regex_lib_1 = require("../regex-lib");
5var tld_regex_1 = require("./tld-regex");
6/**
7 * A regular expression that is simply the character class of the characters
8 * that may be used in a domain name, minus the '-' or '.'
9 */
10exports.domainNameCharRegex = regex_lib_1.alphaNumericAndMarksRe;
11/**
12 * The set of characters that will start a URL suffix (i.e. the path, query, and
13 * hash part of the URL)
14 */
15exports.urlSuffixStartCharsRe = /[\/?#]/;
16/**
17 * The set of characters that are allowed in the URL suffix (i.e. the path,
18 * query, and hash part of the URL) which may also form the ending character of
19 * the URL.
20 *
21 * The {@link #urlSuffixNotAllowedAsLastCharRe} are additional allowed URL
22 * suffix characters, but (generally) should not be the last character of a URL.
23 */
24exports.urlSuffixAllowedSpecialCharsRe = /[-+&@#/%=~_()|'$*\[\]{}\u2713]/;
25/**
26 * URL suffix characters (i.e. path, query, and has part of the URL) that are
27 * not allowed as the *last character* in the URL suffix as they would normally
28 * form the end of a sentence.
29 *
30 * The {@link #urlSuffixAllowedSpecialCharsRe} contains additional allowed URL
31 * suffix characters which are allowed as the last character.
32 */
33exports.urlSuffixNotAllowedAsLastCharRe = /[?!:,.;^]/;
34/**
35 * Regular expression to match an http:// or https:// scheme.
36 */
37exports.httpSchemeRe = /https?:\/\//i;
38/**
39 * Regular expression to match an http:// or https:// scheme as the prefix of
40 * a string.
41 */
42exports.httpSchemePrefixRe = new RegExp('^' + exports.httpSchemeRe.source, 'i');
43exports.urlSuffixedCharsNotAllowedAtEndRe = new RegExp(exports.urlSuffixNotAllowedAsLastCharRe.source + '$');
44/**
45 * A regular expression used to determine the schemes we should not autolink
46 */
47exports.invalidSchemeRe = /^(javascript|vbscript):/i;
48// A regular expression used to determine if the URL is a scheme match (such as
49// 'http://google.com', and as opposed to a "TLD match"). This regular
50// expression is used to parse out the host along with if the URL has an
51// authority component (i.e. '//')
52//
53// Capturing groups:
54//    1. '//' if the URL has an authority component, empty string otherwise
55//    2. The host (if one exists). Ex: 'google.com'
56//
57// See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology
58exports.schemeUrlRe = /^[A-Za-z][-.+A-Za-z0-9]*:(\/\/)?([^:/]*)/;
59// A regular expression used to determine if the URL is a TLD match (such as
60// 'google.com', and as opposed to a "scheme match"). This regular
61// expression is used to help parse out the TLD (top-level domain) of the host.
62//
63// See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology
64exports.tldUrlHostRe = /^(?:\/\/)?([^/#?:]+)/; // optionally prefixed with protocol-relative '//' chars
65/**
66 * Determines if the given character may start a scheme (ex: 'http').
67 */
68function isSchemeStartChar(char) {
69    return regex_lib_1.letterRe.test(char);
70}
71exports.isSchemeStartChar = isSchemeStartChar;
72/**
73 * Determines if the given character is a valid character in a scheme (such as
74 * 'http' or 'ssh+git'), but only after the start char (which is handled by
75 * {@link isSchemeStartChar}.
76 */
77function isSchemeChar(char) {
78    return (regex_lib_1.letterRe.test(char) || regex_lib_1.digitRe.test(char) || char === '+' || char === '-' || char === '.');
79}
80exports.isSchemeChar = isSchemeChar;
81/**
82 * Determines if the character can begin a domain label, which must be an
83 * alphanumeric character and not an underscore or dash.
84 *
85 * A domain label is a segment of a hostname such as subdomain.google.com.
86 */
87function isDomainLabelStartChar(char) {
88    return regex_lib_1.alphaNumericAndMarksRe.test(char);
89}
90exports.isDomainLabelStartChar = isDomainLabelStartChar;
91/**
92 * Determines if the character is part of a domain label (but not a domain label
93 * start character).
94 *
95 * A domain label is a segment of a hostname such as subdomain.google.com.
96 */
97function isDomainLabelChar(char) {
98    return char === '_' || isDomainLabelStartChar(char);
99}
100exports.isDomainLabelChar = isDomainLabelChar;
101/**
102 * Determines if the character is a path character ("pchar") as defined by
103 * https://tools.ietf.org/html/rfc3986#appendix-A
104 *
105 *     pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
106 *
107 *     unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
108 *     pct-encoded   = "%" HEXDIG HEXDIG
109 *     sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
110 *                   / "*" / "+" / "," / ";" / "="
111 *
112 * Note that this implementation doesn't follow the spec exactly, but rather
113 * follows URL path characters found out in the wild (spec might be out of date?)
114 */
115function isPathChar(char) {
116    return (regex_lib_1.alphaNumericAndMarksRe.test(char) ||
117        exports.urlSuffixAllowedSpecialCharsRe.test(char) ||
118        exports.urlSuffixNotAllowedAsLastCharRe.test(char));
119}
120exports.isPathChar = isPathChar;
121/**
122 * Determines if the character given may begin the "URL Suffix" section of a
123 * URI (i.e. the path, query, or hash section). These are the '/', '?' and '#'
124 * characters.
125 *
126 * See https://tools.ietf.org/html/rfc3986#appendix-A
127 */
128function isUrlSuffixStartChar(char) {
129    return exports.urlSuffixStartCharsRe.test(char);
130}
131exports.isUrlSuffixStartChar = isUrlSuffixStartChar;
132/**
133 * Determines if the TLD read in the host is a known TLD (Top-Level Domain).
134 *
135 * Example: 'com' would be a known TLD (for a host of 'google.com'), but
136 * 'local' would not (for a domain name of 'my-computer.local').
137 */
138function isKnownTld(tld) {
139    return tld_regex_1.tldRegex.test(tld.toLowerCase()); // make sure the tld is lowercase for the regex
140}
141exports.isKnownTld = isKnownTld;
142/**
143 * Determines if the given `url` is a valid scheme-prefixed URL.
144 */
145function isValidSchemeUrl(url) {
146    // If the scheme is 'javascript:' or 'vbscript:', these link
147    // types can be dangerous. Don't link them.
148    if (exports.invalidSchemeRe.test(url)) {
149        return false;
150    }
151    var schemeMatch = url.match(exports.schemeUrlRe);
152    if (!schemeMatch) {
153        return false;
154    }
155    var isAuthorityMatch = !!schemeMatch[1];
156    var host = schemeMatch[2];
157    if (isAuthorityMatch) {
158        // Any match that has an authority ('//' chars) after the scheme is
159        // valid, such as 'http://anything'
160        return true;
161    }
162    // If there's no authority ('//' chars), check that we have a hostname
163    // that looks valid.
164    //
165    // The host must contain at least one '.' char and have a domain label
166    // with at least one letter to be considered valid.
167    //
168    // Accept:
169    //   - git:domain.com (scheme followed by a host
170    // Do not accept:
171    //   - git:something ('something' doesn't look like a host)
172    //   - version:1.0   ('1.0' doesn't look like a host)
173    if (host.indexOf('.') === -1 || !regex_lib_1.letterRe.test(host)) {
174        return false;
175    }
176    return true;
177}
178exports.isValidSchemeUrl = isValidSchemeUrl;
179/**
180 * Determines if the given `url` is a match with a valid TLD.
181 */
182function isValidTldMatch(url) {
183    // TLD URL such as 'google.com', we need to confirm that we have a valid
184    // top-level domain
185    var tldUrlHostMatch = url.match(exports.tldUrlHostRe);
186    if (!tldUrlHostMatch) {
187        // At this point, if the URL didn't match our TLD re, it must be invalid
188        // (highly unlikely to happen, but just in case)
189        return false;
190    }
191    var host = tldUrlHostMatch[0];
192    var hostLabels = host.split('.');
193    if (hostLabels.length < 2) {
194        // 0 or 1 host label, there's no TLD. Ex: 'localhost'
195        return false;
196    }
197    var tld = hostLabels[hostLabels.length - 1];
198    if (!isKnownTld(tld)) {
199        return false;
200    }
201    // TODO: Implement these conditions for TLD matcher:
202    // (
203    //     this.longestDomainLabelLength <= 63 &&
204    //     this.domainNameLength <= 255
205    // );
206    return true;
207}
208exports.isValidTldMatch = isValidTldMatch;
209// Regular expression to confirm a valid IPv4 address (ex: '192.168.0.1')
210var ipV4Re = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
211// Regular expression used to split the IPv4 address itself from any port/path/query/hash
212var ipV4PartRe = /[:/?#]/;
213/**
214 * Determines if the given URL is a valid IPv4-prefixed URL.
215 */
216function isValidIpV4Address(url) {
217    // Grab just the IP address
218    var ipV4Part = url.split(ipV4PartRe, 1)[0]; // only 1 result needed
219    return ipV4Re.test(ipV4Part);
220}
221exports.isValidIpV4Address = isValidIpV4Address;
222//# sourceMappingURL=uri-utils.js.map
\No newline at end of file

1	`"use strict";`
2	`Object.defineProperty(exports, "__esModule", { value: true });`
3	exports.isValidIpV4Address = exports.isValidTldMatch = exports.isValidSchemeUrl = exports.isKnownTld = exports.isUrlSuffixStartChar = exports.isPathChar = exports.isDomainLabelChar = exports.isDomainLabelStartChar = exports.isSchemeChar = exports.isSchemeStartChar = exports.tldUrlHostRe = exports.schemeUrlRe = exports.invalidSchemeRe = exports.urlSuffixedCharsNotAllowedAtEndRe = exports.httpSchemePrefixRe = exports.httpSchemeRe = exports.urlSuffixNotAllowedAsLastCharRe = exports.urlSuffixAllowedSpecialCharsRe = exports.urlSuffixStartCharsRe = exports.domainNameCharRegex = void 0;
4	`var regex_lib_1 = require("../regex-lib");`
5	`var tld_regex_1 = require("./tld-regex");`
6	`/**`
7	`* A regular expression that is simply the character class of the characters`
8	`* that may be used in a domain name, minus the '-' or '.'`
9	`*/`
10	`exports.domainNameCharRegex = regex_lib_1.alphaNumericAndMarksRe;`
11	`/**`
12	`* The set of characters that will start a URL suffix (i.e. the path, query, and`
13	`* hash part of the URL)`
14	`*/`
15	`exports.urlSuffixStartCharsRe = /[\/?#]/;`
16	`/**`
17	`* The set of characters that are allowed in the URL suffix (i.e. the path,`
18	`* query, and hash part of the URL) which may also form the ending character of`
19	`* the URL.`
20	`*`
21	`* The {@link #urlSuffixNotAllowedAsLastCharRe} are additional allowed URL`
22	`* suffix characters, but (generally) should not be the last character of a URL.`
23	`*/`
24	`exports.urlSuffixAllowedSpecialCharsRe = /[-+&@#/%=~_()\|'$*\[\]{}\u2713]/;`
25	`/**`
26	`* URL suffix characters (i.e. path, query, and has part of the URL) that are`
27	`* not allowed as the last character in the URL suffix as they would normally`
28	`* form the end of a sentence.`
29	`*`
30	`* The {@link #urlSuffixAllowedSpecialCharsRe} contains additional allowed URL`
31	`* suffix characters which are allowed as the last character.`
32	`*/`
33	`exports.urlSuffixNotAllowedAsLastCharRe = /[?!:,.;^]/;`
34	`/**`
35	`* Regular expression to match an http:// or https:// scheme.`
36	`*/`
37	`exports.httpSchemeRe = /https?:\/\//i;`
38	`/**`
39	`* Regular expression to match an http:// or https:// scheme as the prefix of`
40	`* a string.`
41	`*/`
42	`exports.httpSchemePrefixRe = new RegExp('^' + exports.httpSchemeRe.source, 'i');`
43	`exports.urlSuffixedCharsNotAllowedAtEndRe = new RegExp(exports.urlSuffixNotAllowedAsLastCharRe.source + '$');`
44	`/**`
45	`* A regular expression used to determine the schemes we should not autolink`
46	`*/`
47	`exports.invalidSchemeRe = /^(javascript\|vbscript):/i;`
48	`// A regular expression used to determine if the URL is a scheme match (such as`
49	`// 'http://google.com', and as opposed to a "TLD match"). This regular`
50	`// expression is used to parse out the host along with if the URL has an`
51	`// authority component (i.e. '//')`
52	`//`
53	`// Capturing groups:`
54	`// 1. '//' if the URL has an authority component, empty string otherwise`
55	`// 2. The host (if one exists). Ex: 'google.com'`
56	`//`
57	`// See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology`
58	`exports.schemeUrlRe = /^[A-Za-z][-.+A-Za-z0-9]:(\/\/)?([^:/])/;`
59	`// A regular expression used to determine if the URL is a TLD match (such as`
60	`// 'google.com', and as opposed to a "scheme match"). This regular`
61	`// expression is used to help parse out the TLD (top-level domain) of the host.`
62	`//`
63	`// See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology`
64	`exports.tldUrlHostRe = /^(?:\/\/)?([^/#?:]+)/; // optionally prefixed with protocol-relative '//' chars`
65	`/**`
66	`* Determines if the given character may start a scheme (ex: 'http').`
67	`*/`
68	`function isSchemeStartChar(char) {`
69	`return regex_lib_1.letterRe.test(char);`
70	`}`
71	`exports.isSchemeStartChar = isSchemeStartChar;`
72	`/**`
73	`* Determines if the given character is a valid character in a scheme (such as`
74	`* 'http' or 'ssh+git'), but only after the start char (which is handled by`
75	`* {@link isSchemeStartChar}.`
76	`*/`
77	`function isSchemeChar(char) {`
78	`return (regex_lib_1.letterRe.test(char) \|\| regex_lib_1.digitRe.test(char) \|\| char === '+' \|\| char === '-' \|\| char === '.');`
79	`}`
80	`exports.isSchemeChar = isSchemeChar;`
81	`/**`
82	`* Determines if the character can begin a domain label, which must be an`
83	`* alphanumeric character and not an underscore or dash.`
84	`*`
85	`* A domain label is a segment of a hostname such as subdomain.google.com.`
86	`*/`
87	`function isDomainLabelStartChar(char) {`
88	`return regex_lib_1.alphaNumericAndMarksRe.test(char);`
89	`}`
90	`exports.isDomainLabelStartChar = isDomainLabelStartChar;`
91	`/**`
92	`* Determines if the character is part of a domain label (but not a domain label`
93	`* start character).`
94	`*`
95	`* A domain label is a segment of a hostname such as subdomain.google.com.`
96	`*/`
97	`function isDomainLabelChar(char) {`
98	`return char === '_' \|\| isDomainLabelStartChar(char);`
99	`}`
100	`exports.isDomainLabelChar = isDomainLabelChar;`
101	`/**`
102	`* Determines if the character is a path character ("pchar") as defined by`
103	`* https://tools.ietf.org/html/rfc3986#appendix-A`
104	`*`
105	`* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"`
106	`*`
107	`* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"`
108	`* pct-encoded = "%" HEXDIG HEXDIG`
109	`* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"`
110	`* / "*" / "+" / "," / ";" / "="`
111	`*`
112	`* Note that this implementation doesn't follow the spec exactly, but rather`
113	`* follows URL path characters found out in the wild (spec might be out of date?)`
114	`*/`
115	`function isPathChar(char) {`
116	`return (regex_lib_1.alphaNumericAndMarksRe.test(char) \|\|`
117	`exports.urlSuffixAllowedSpecialCharsRe.test(char) \|\|`
118	`exports.urlSuffixNotAllowedAsLastCharRe.test(char));`
119	`}`
120	`exports.isPathChar = isPathChar;`
121	`/**`
122	`* Determines if the character given may begin the "URL Suffix" section of a`
123	`* URI (i.e. the path, query, or hash section). These are the '/', '?' and '#'`
124	`* characters.`
125	`*`
126	`* See https://tools.ietf.org/html/rfc3986#appendix-A`
127	`*/`
128	`function isUrlSuffixStartChar(char) {`
129	`return exports.urlSuffixStartCharsRe.test(char);`
130	`}`
131	`exports.isUrlSuffixStartChar = isUrlSuffixStartChar;`
132	`/**`
133	`* Determines if the TLD read in the host is a known TLD (Top-Level Domain).`
134	`*`
135	`* Example: 'com' would be a known TLD (for a host of 'google.com'), but`
136	`* 'local' would not (for a domain name of 'my-computer.local').`
137	`*/`
138	`function isKnownTld(tld) {`
139	`return tld_regex_1.tldRegex.test(tld.toLowerCase()); // make sure the tld is lowercase for the regex`
140	`}`
141	`exports.isKnownTld = isKnownTld;`
142	`/**`
143	* Determines if the given `url` is a valid scheme-prefixed URL.
144	`*/`
145	`function isValidSchemeUrl(url) {`
146	`// If the scheme is 'javascript:' or 'vbscript:', these link`
147	`// types can be dangerous. Don't link them.`
148	`if (exports.invalidSchemeRe.test(url)) {`
149	`return false;`
150	`}`
151	`var schemeMatch = url.match(exports.schemeUrlRe);`
152	`if (!schemeMatch) {`
153	`return false;`
154	`}`
155	`var isAuthorityMatch = !!schemeMatch[1];`
156	`var host = schemeMatch[2];`
157	`if (isAuthorityMatch) {`
158	`// Any match that has an authority ('//' chars) after the scheme is`
159	`// valid, such as 'http://anything'`
160	`return true;`
161	`}`
162	`// If there's no authority ('//' chars), check that we have a hostname`
163	`// that looks valid.`
164	`//`
165	`// The host must contain at least one '.' char and have a domain label`
166	`// with at least one letter to be considered valid.`
167	`//`
168	`// Accept:`
169	`// - git:domain.com (scheme followed by a host`
170	`// Do not accept:`
171	`// - git:something ('something' doesn't look like a host)`
172	`// - version:1.0 ('1.0' doesn't look like a host)`
173	`if (host.indexOf('.') === -1 \|\| !regex_lib_1.letterRe.test(host)) {`
174	`return false;`
175	`}`
176	`return true;`
177	`}`
178	`exports.isValidSchemeUrl = isValidSchemeUrl;`
179	`/**`
180	* Determines if the given `url` is a match with a valid TLD.
181	`*/`
182	`function isValidTldMatch(url) {`
183	`// TLD URL such as 'google.com', we need to confirm that we have a valid`
184	`// top-level domain`
185	`var tldUrlHostMatch = url.match(exports.tldUrlHostRe);`
186	`if (!tldUrlHostMatch) {`
187	`// At this point, if the URL didn't match our TLD re, it must be invalid`
188	`// (highly unlikely to happen, but just in case)`
189	`return false;`
190	`}`
191	`var host = tldUrlHostMatch[0];`
192	`var hostLabels = host.split('.');`
193	`if (hostLabels.length < 2) {`
194	`// 0 or 1 host label, there's no TLD. Ex: 'localhost'`
195	`return false;`
196	`}`
197	`var tld = hostLabels[hostLabels.length - 1];`
198	`if (!isKnownTld(tld)) {`
199	`return false;`
200	`}`
201	`// TODO: Implement these conditions for TLD matcher:`
202	`// (`
203	`// this.longestDomainLabelLength <= 63 &&`
204	`// this.domainNameLength <= 255`
205	`// );`
206	`return true;`
207	`}`
208	`exports.isValidTldMatch = isValidTldMatch;`
209	`// Regular expression to confirm a valid IPv4 address (ex: '192.168.0.1')`
210	`var ipV4Re = /^(?:(?:25[0-5]\|2[0-4][0-9]\|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]\|2[0-4][0-9]\|[01]?[0-9][0-9]?)$/;`
211	`// Regular expression used to split the IPv4 address itself from any port/path/query/hash`
212	`var ipV4PartRe = /[:/?#]/;`
213	`/**`
214	`* Determines if the given URL is a valid IPv4-prefixed URL.`
215	`*/`
216	`function isValidIpV4Address(url) {`
217	`// Grab just the IP address`
218	`var ipV4Part = url.split(ipV4PartRe, 1)[0]; // only 1 result needed`
219	`return ipV4Re.test(ipV4Part);`
220	`}`
221	`exports.isValidIpV4Address = isValidIpV4Address;`
222	`//# sourceMappingURL=uri-utils.js.map`
\	No newline at end of file