1 | ;
|
2 | Object.defineProperty(exports, "__esModule", { value: true });
|
3 | exports.isValidIpV4Address = exports.isValidTldMatch = exports.isValidSchemeUrl = exports.isKnownTld = exports.isUrlSuffixStartChar = exports.isPathChar = exports.isDomainLabelChar = exports.isDomainLabelStartChar = exports.isSchemeChar = exports.isSchemeStartChar = exports.tldUrlHostRe = exports.schemeUrlRe = exports.invalidSchemeRe = exports.urlSuffixedCharsNotAllowedAtEndRe = exports.httpSchemePrefixRe = exports.httpSchemeRe = exports.urlSuffixNotAllowedAsLastCharRe = exports.urlSuffixAllowedSpecialCharsRe = exports.urlSuffixStartCharsRe = exports.domainNameCharRegex = void 0;
|
4 | var regex_lib_1 = require("../regex-lib");
|
5 | var tld_regex_1 = require("./tld-regex");
|
6 | /**
|
7 | * A regular expression that is simply the character class of the characters
|
8 | * that may be used in a domain name, minus the '-' or '.'
|
9 | */
|
10 | exports.domainNameCharRegex = regex_lib_1.alphaNumericAndMarksRe;
|
11 | /**
|
12 | * The set of characters that will start a URL suffix (i.e. the path, query, and
|
13 | * hash part of the URL)
|
14 | */
|
15 | exports.urlSuffixStartCharsRe = /[\/?#]/;
|
16 | /**
|
17 | * The set of characters that are allowed in the URL suffix (i.e. the path,
|
18 | * query, and hash part of the URL) which may also form the ending character of
|
19 | * the URL.
|
20 | *
|
21 | * The {@link #urlSuffixNotAllowedAsLastCharRe} are additional allowed URL
|
22 | * suffix characters, but (generally) should not be the last character of a URL.
|
23 | */
|
24 | exports.urlSuffixAllowedSpecialCharsRe = /[-+&@#/%=~_()|'$*\[\]{}\u2713]/;
|
25 | /**
|
26 | * URL suffix characters (i.e. path, query, and has part of the URL) that are
|
27 | * not allowed as the *last character* in the URL suffix as they would normally
|
28 | * form the end of a sentence.
|
29 | *
|
30 | * The {@link #urlSuffixAllowedSpecialCharsRe} contains additional allowed URL
|
31 | * suffix characters which are allowed as the last character.
|
32 | */
|
33 | exports.urlSuffixNotAllowedAsLastCharRe = /[?!:,.;^]/;
|
34 | /**
|
35 | * Regular expression to match an http:// or https:// scheme.
|
36 | */
|
37 | exports.httpSchemeRe = /https?:\/\//i;
|
38 | /**
|
39 | * Regular expression to match an http:// or https:// scheme as the prefix of
|
40 | * a string.
|
41 | */
|
42 | exports.httpSchemePrefixRe = new RegExp('^' + exports.httpSchemeRe.source, 'i');
|
43 | exports.urlSuffixedCharsNotAllowedAtEndRe = new RegExp(exports.urlSuffixNotAllowedAsLastCharRe.source + '$');
|
44 | /**
|
45 | * A regular expression used to determine the schemes we should not autolink
|
46 | */
|
47 | exports.invalidSchemeRe = /^(javascript|vbscript):/i;
|
48 | // A regular expression used to determine if the URL is a scheme match (such as
|
49 | // 'http://google.com', and as opposed to a "TLD match"). This regular
|
50 | // expression is used to parse out the host along with if the URL has an
|
51 | // authority component (i.e. '//')
|
52 | //
|
53 | // Capturing groups:
|
54 | // 1. '//' if the URL has an authority component, empty string otherwise
|
55 | // 2. The host (if one exists). Ex: 'google.com'
|
56 | //
|
57 | // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology
|
58 | exports.schemeUrlRe = /^[A-Za-z][-.+A-Za-z0-9]*:(\/\/)?([^:/]*)/;
|
59 | // A regular expression used to determine if the URL is a TLD match (such as
|
60 | // 'google.com', and as opposed to a "scheme match"). This regular
|
61 | // expression is used to help parse out the TLD (top-level domain) of the host.
|
62 | //
|
63 | // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology
|
64 | exports.tldUrlHostRe = /^(?:\/\/)?([^/#?:]+)/; // optionally prefixed with protocol-relative '//' chars
|
65 | /**
|
66 | * Determines if the given character may start a scheme (ex: 'http').
|
67 | */
|
68 | function isSchemeStartChar(char) {
|
69 | return regex_lib_1.letterRe.test(char);
|
70 | }
|
71 | exports.isSchemeStartChar = isSchemeStartChar;
|
72 | /**
|
73 | * Determines if the given character is a valid character in a scheme (such as
|
74 | * 'http' or 'ssh+git'), but only after the start char (which is handled by
|
75 | * {@link isSchemeStartChar}.
|
76 | */
|
77 | function isSchemeChar(char) {
|
78 | return (regex_lib_1.letterRe.test(char) || regex_lib_1.digitRe.test(char) || char === '+' || char === '-' || char === '.');
|
79 | }
|
80 | exports.isSchemeChar = isSchemeChar;
|
81 | /**
|
82 | * Determines if the character can begin a domain label, which must be an
|
83 | * alphanumeric character and not an underscore or dash.
|
84 | *
|
85 | * A domain label is a segment of a hostname such as subdomain.google.com.
|
86 | */
|
87 | function isDomainLabelStartChar(char) {
|
88 | return regex_lib_1.alphaNumericAndMarksRe.test(char);
|
89 | }
|
90 | exports.isDomainLabelStartChar = isDomainLabelStartChar;
|
91 | /**
|
92 | * Determines if the character is part of a domain label (but not a domain label
|
93 | * start character).
|
94 | *
|
95 | * A domain label is a segment of a hostname such as subdomain.google.com.
|
96 | */
|
97 | function isDomainLabelChar(char) {
|
98 | return char === '_' || isDomainLabelStartChar(char);
|
99 | }
|
100 | exports.isDomainLabelChar = isDomainLabelChar;
|
101 | /**
|
102 | * Determines if the character is a path character ("pchar") as defined by
|
103 | * https://tools.ietf.org/html/rfc3986#appendix-A
|
104 | *
|
105 | * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
|
106 | *
|
107 | * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
108 | * pct-encoded = "%" HEXDIG HEXDIG
|
109 | * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
|
110 | * / "*" / "+" / "," / ";" / "="
|
111 | *
|
112 | * Note that this implementation doesn't follow the spec exactly, but rather
|
113 | * follows URL path characters found out in the wild (spec might be out of date?)
|
114 | */
|
115 | function isPathChar(char) {
|
116 | return (regex_lib_1.alphaNumericAndMarksRe.test(char) ||
|
117 | exports.urlSuffixAllowedSpecialCharsRe.test(char) ||
|
118 | exports.urlSuffixNotAllowedAsLastCharRe.test(char));
|
119 | }
|
120 | exports.isPathChar = isPathChar;
|
121 | /**
|
122 | * Determines if the character given may begin the "URL Suffix" section of a
|
123 | * URI (i.e. the path, query, or hash section). These are the '/', '?' and '#'
|
124 | * characters.
|
125 | *
|
126 | * See https://tools.ietf.org/html/rfc3986#appendix-A
|
127 | */
|
128 | function isUrlSuffixStartChar(char) {
|
129 | return exports.urlSuffixStartCharsRe.test(char);
|
130 | }
|
131 | exports.isUrlSuffixStartChar = isUrlSuffixStartChar;
|
132 | /**
|
133 | * Determines if the TLD read in the host is a known TLD (Top-Level Domain).
|
134 | *
|
135 | * Example: 'com' would be a known TLD (for a host of 'google.com'), but
|
136 | * 'local' would not (for a domain name of 'my-computer.local').
|
137 | */
|
138 | function isKnownTld(tld) {
|
139 | return tld_regex_1.tldRegex.test(tld.toLowerCase()); // make sure the tld is lowercase for the regex
|
140 | }
|
141 | exports.isKnownTld = isKnownTld;
|
142 | /**
|
143 | * Determines if the given `url` is a valid scheme-prefixed URL.
|
144 | */
|
145 | function isValidSchemeUrl(url) {
|
146 | // If the scheme is 'javascript:' or 'vbscript:', these link
|
147 | // types can be dangerous. Don't link them.
|
148 | if (exports.invalidSchemeRe.test(url)) {
|
149 | return false;
|
150 | }
|
151 | var schemeMatch = url.match(exports.schemeUrlRe);
|
152 | if (!schemeMatch) {
|
153 | return false;
|
154 | }
|
155 | var isAuthorityMatch = !!schemeMatch[1];
|
156 | var host = schemeMatch[2];
|
157 | if (isAuthorityMatch) {
|
158 | // Any match that has an authority ('//' chars) after the scheme is
|
159 | // valid, such as 'http://anything'
|
160 | return true;
|
161 | }
|
162 | // If there's no authority ('//' chars), check that we have a hostname
|
163 | // that looks valid.
|
164 | //
|
165 | // The host must contain at least one '.' char and have a domain label
|
166 | // with at least one letter to be considered valid.
|
167 | //
|
168 | // Accept:
|
169 | // - git:domain.com (scheme followed by a host
|
170 | // Do not accept:
|
171 | // - git:something ('something' doesn't look like a host)
|
172 | // - version:1.0 ('1.0' doesn't look like a host)
|
173 | if (host.indexOf('.') === -1 || !regex_lib_1.letterRe.test(host)) {
|
174 | return false;
|
175 | }
|
176 | return true;
|
177 | }
|
178 | exports.isValidSchemeUrl = isValidSchemeUrl;
|
179 | /**
|
180 | * Determines if the given `url` is a match with a valid TLD.
|
181 | */
|
182 | function isValidTldMatch(url) {
|
183 | // TLD URL such as 'google.com', we need to confirm that we have a valid
|
184 | // top-level domain
|
185 | var tldUrlHostMatch = url.match(exports.tldUrlHostRe);
|
186 | if (!tldUrlHostMatch) {
|
187 | // At this point, if the URL didn't match our TLD re, it must be invalid
|
188 | // (highly unlikely to happen, but just in case)
|
189 | return false;
|
190 | }
|
191 | var host = tldUrlHostMatch[0];
|
192 | var hostLabels = host.split('.');
|
193 | if (hostLabels.length < 2) {
|
194 | // 0 or 1 host label, there's no TLD. Ex: 'localhost'
|
195 | return false;
|
196 | }
|
197 | var tld = hostLabels[hostLabels.length - 1];
|
198 | if (!isKnownTld(tld)) {
|
199 | return false;
|
200 | }
|
201 | // TODO: Implement these conditions for TLD matcher:
|
202 | // (
|
203 | // this.longestDomainLabelLength <= 63 &&
|
204 | // this.domainNameLength <= 255
|
205 | // );
|
206 | return true;
|
207 | }
|
208 | exports.isValidTldMatch = isValidTldMatch;
|
209 | // Regular expression to confirm a valid IPv4 address (ex: '192.168.0.1')
|
210 | var ipV4Re = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
|
211 | // Regular expression used to split the IPv4 address itself from any port/path/query/hash
|
212 | var ipV4PartRe = /[:/?#]/;
|
213 | /**
|
214 | * Determines if the given URL is a valid IPv4-prefixed URL.
|
215 | */
|
216 | function isValidIpV4Address(url) {
|
217 | // Grab just the IP address
|
218 | var ipV4Part = url.split(ipV4PartRe, 1)[0]; // only 1 result needed
|
219 | return ipV4Re.test(ipV4Part);
|
220 | }
|
221 | exports.isValidIpV4Address = isValidIpV4Address;
|
222 | //# sourceMappingURL=uri-utils.js.map |
\ | No newline at end of file |