UNPKG

5.85 kBTypeScriptView Raw
1import { Matcher, MatcherConfig } from "./matcher";
2import { StripPrefixConfigObj } from "../autolinker";
3import { Match } from "../match/match";
4/**
5 * @class Autolinker.matcher.Url
6 * @extends Autolinker.matcher.Matcher
7 *
8 * Matcher to find URL matches in an input string.
9 *
10 * See this class's superclass ({@link Autolinker.matcher.Matcher}) for more details.
11 */
12export declare class UrlMatcher extends Matcher {
13 /**
14 * @cfg {Object} stripPrefix (required)
15 *
16 * The Object form of {@link Autolinker#cfg-stripPrefix}.
17 */
18 protected stripPrefix: Required<StripPrefixConfigObj>;
19 /**
20 * @cfg {Boolean} stripTrailingSlash (required)
21 * @inheritdoc Autolinker#stripTrailingSlash
22 */
23 protected stripTrailingSlash: boolean;
24 /**
25 * @cfg {Boolean} decodePercentEncoding (required)
26 * @inheritdoc Autolinker#decodePercentEncoding
27 */
28 protected decodePercentEncoding: boolean;
29 /**
30 * @protected
31 * @property {RegExp} matcherRegex
32 *
33 * The regular expression to match URLs with an optional scheme, port
34 * number, path, query string, and hash anchor.
35 *
36 * Example matches:
37 *
38 * http://google.com
39 * www.google.com
40 * google.com/path/to/file?q1=1&q2=2#myAnchor
41 *
42 *
43 * This regular expression will have the following capturing groups:
44 *
45 * 1. Group that matches a scheme-prefixed URL (i.e. 'http://google.com').
46 * This is used to match scheme URLs with just a single word, such as
47 * 'http://localhost', where we won't double check that the domain name
48 * has at least one dot ('.') in it.
49 * 2. Group that matches a 'www.' prefixed URL. This is only matched if the
50 * 'www.' text was not prefixed by a scheme (i.e.: not prefixed by
51 * 'http://', 'ftp:', etc.)
52 * 3. A protocol-relative ('//') match for the case of a 'www.' prefixed
53 * URL. Will be an empty string if it is not a protocol-relative match.
54 * We need to know the character before the '//' in order to determine
55 * if it is a valid match or the // was in a string we don't want to
56 * auto-link.
57 * 4. Group that matches a known TLD (top level domain), when a scheme
58 * or 'www.'-prefixed domain is not matched.
59 * 5. A protocol-relative ('//') match for the case of a known TLD prefixed
60 * URL. Will be an empty string if it is not a protocol-relative match.
61 * See #3 for more info.
62 */
63 protected matcherRegex: RegExp;
64 /**
65 * A regular expression to use to check the character before a protocol-relative
66 * URL match. We don't want to match a protocol-relative URL if it is part
67 * of another word.
68 *
69 * For example, we want to match something like "Go to: //google.com",
70 * but we don't want to match something like "abc//google.com"
71 *
72 * This regular expression is used to test the character before the '//'.
73 *
74 * @protected
75 * @type {RegExp} wordCharRegExp
76 */
77 protected wordCharRegExp: RegExp;
78 /**
79 * @method constructor
80 * @param {Object} cfg The configuration properties for the Match instance,
81 * specified in an Object (map).
82 */
83 constructor(cfg: UrlMatcherConfig);
84 /**
85 * @inheritdoc
86 */
87 parseMatches(text: string): Match[];
88 /**
89 * Determines if a match found has an unmatched closing parenthesis,
90 * square bracket or curly bracket. If so, the symbol will be removed
91 * from the match itself, and appended after the generated anchor tag.
92 *
93 * A match may have an extra closing parenthesis at the end of the match
94 * because the regular expression must include parenthesis for URLs such as
95 * "wikipedia.com/something_(disambiguation)", which should be auto-linked.
96 *
97 * However, an extra parenthesis *will* be included when the URL itself is
98 * wrapped in parenthesis, such as in the case of:
99 * "(wikipedia.com/something_(disambiguation))"
100 * In this case, the last closing parenthesis should *not* be part of the
101 * URL itself, and this method will return `true`.
102 *
103 * For square brackets in URLs such as in PHP arrays, the same behavior as
104 * parenthesis discussed above should happen:
105 * "[http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3]"
106 * The closing square bracket should not be part of the URL itself, and this
107 * method will return `true`.
108 *
109 * @protected
110 * @param {String} matchStr The full match string from the {@link #matcherRegex}.
111 * @return {Boolean} `true` if there is an unbalanced closing parenthesis or
112 * square bracket at the end of the `matchStr`, `false` otherwise.
113 */
114 protected matchHasUnbalancedClosingParen(matchStr: string): boolean;
115 /**
116 * Determine if there's an invalid character after the TLD in a URL. Valid
117 * characters after TLD are ':/?#'. Exclude scheme matched URLs from this
118 * check.
119 *
120 * @protected
121 * @param {String} urlMatch The matched URL, if there was one. Will be an
122 * empty string if the match is not a URL match.
123 * @param {String} schemeUrlMatch The match URL string for a scheme
124 * match. Ex: 'http://yahoo.com'. This is used to match something like
125 * 'http://localhost', where we won't double check that the domain name
126 * has at least one '.' in it.
127 * @return {Number} the position where the invalid character was found. If
128 * no such character was found, returns -1
129 */
130 protected matchHasInvalidCharAfterTld(urlMatch: string, schemeUrlMatch: string): number;
131}
132export interface UrlMatcherConfig extends MatcherConfig {
133 stripPrefix: Required<StripPrefixConfigObj>;
134 stripTrailingSlash: boolean;
135 decodePercentEncoding: boolean;
136}