UNPKG

13.5 kBJavaScriptView Raw
1"use strict";
2Object.defineProperty(exports, "__esModule", { value: true });
3exports.EmailMatcher = void 0;
4var tslib_1 = require("tslib");
5var matcher_1 = require("./matcher");
6var regex_lib_1 = require("../regex-lib");
7var email_match_1 = require("../match/email-match");
8var utils_1 = require("../utils");
9var tld_regex_1 = require("./tld-regex");
10// For debugging: search for other "For debugging" lines
11// import CliTable from 'cli-table';
12// RegExp objects which are shared by all instances of EmailMatcher. These are
13// here to avoid re-instantiating the RegExp objects if `Autolinker.link()` is
14// called multiple times, thus instantiating EmailMatcher and its RegExp
15// objects each time (which is very expensive - see https://github.com/gregjacobs/Autolinker.js/issues/314).
16// See descriptions of the properties where they are used for details about them
17var localPartCharRegex = new RegExp("[" + regex_lib_1.alphaNumericAndMarksCharsStr + "!#$%&'*+/=?^_`{|}~-]");
18var strictTldRegex = new RegExp("^" + tld_regex_1.tldRegex.source + "$");
19/**
20 * @class Autolinker.matcher.Email
21 * @extends Autolinker.matcher.Matcher
22 *
23 * Matcher to find email matches in an input string.
24 *
25 * See this class's superclass ({@link Autolinker.matcher.Matcher}) for more details.
26 */
27var EmailMatcher = /** @class */ (function (_super) {
28 tslib_1.__extends(EmailMatcher, _super);
29 function EmailMatcher() {
30 var _this = _super !== null && _super.apply(this, arguments) || this;
31 /**
32 * Valid characters that can be used in the "local" part of an email address,
33 * i.e. the "name" part of "name@site.com"
34 */
35 _this.localPartCharRegex = localPartCharRegex;
36 /**
37 * Stricter TLD regex which adds a beginning and end check to ensure
38 * the string is a valid TLD
39 */
40 _this.strictTldRegex = strictTldRegex;
41 return _this;
42 }
43 /**
44 * @inheritdoc
45 */
46 EmailMatcher.prototype.parseMatches = function (text) {
47 var tagBuilder = this.tagBuilder, localPartCharRegex = this.localPartCharRegex, strictTldRegex = this.strictTldRegex, matches = [], len = text.length, noCurrentEmailMatch = new CurrentEmailMatch();
48 // for matching a 'mailto:' prefix
49 var mailtoTransitions = {
50 'm': 'a',
51 'a': 'i',
52 'i': 'l',
53 'l': 't',
54 't': 'o',
55 'o': ':',
56 };
57 var charIdx = 0, state = 0 /* NonEmailMatch */, currentEmailMatch = noCurrentEmailMatch;
58 // For debugging: search for other "For debugging" lines
59 // const table = new CliTable( {
60 // head: [ 'charIdx', 'char', 'state', 'charIdx', 'currentEmailAddress.idx', 'hasDomainDot' ]
61 // } );
62 while (charIdx < len) {
63 var char = text.charAt(charIdx);
64 // For debugging: search for other "For debugging" lines
65 // table.push(
66 // [ charIdx, char, State[ state ], charIdx, currentEmailAddress.idx, currentEmailAddress.hasDomainDot ]
67 // );
68 switch (state) {
69 case 0 /* NonEmailMatch */:
70 stateNonEmailAddress(char);
71 break;
72 case 1 /* Mailto */:
73 stateMailTo(text.charAt(charIdx - 1), char);
74 break;
75 case 2 /* LocalPart */:
76 stateLocalPart(char);
77 break;
78 case 3 /* LocalPartDot */:
79 stateLocalPartDot(char);
80 break;
81 case 4 /* AtSign */:
82 stateAtSign(char);
83 break;
84 case 5 /* DomainChar */:
85 stateDomainChar(char);
86 break;
87 case 6 /* DomainHyphen */:
88 stateDomainHyphen(char);
89 break;
90 case 7 /* DomainDot */:
91 stateDomainDot(char);
92 break;
93 default:
94 utils_1.throwUnhandledCaseError(state);
95 }
96 // For debugging: search for other "For debugging" lines
97 // table.push(
98 // [ charIdx, char, State[ state ], charIdx, currentEmailAddress.idx, currentEmailAddress.hasDomainDot ]
99 // );
100 charIdx++;
101 }
102 // Capture any valid match at the end of the string
103 captureMatchIfValidAndReset();
104 // For debugging: search for other "For debugging" lines
105 //console.log( '\n' + table.toString() );
106 return matches;
107 // Handles the state when we're not in an email address
108 function stateNonEmailAddress(char) {
109 if (char === 'm') {
110 beginEmailMatch(1 /* Mailto */);
111 }
112 else if (localPartCharRegex.test(char)) {
113 beginEmailMatch();
114 }
115 else {
116 // not an email address character, continue
117 }
118 }
119 // Handles if we're reading a 'mailto:' prefix on the string
120 function stateMailTo(prevChar, char) {
121 if (prevChar === ':') {
122 // We've reached the end of the 'mailto:' prefix
123 if (localPartCharRegex.test(char)) {
124 state = 2 /* LocalPart */;
125 currentEmailMatch = new CurrentEmailMatch(tslib_1.__assign(tslib_1.__assign({}, currentEmailMatch), { hasMailtoPrefix: true }));
126 }
127 else {
128 // we've matched 'mailto:' but didn't get anything meaningful
129 // immediately afterwards (for example, we encountered a
130 // space character, or an '@' character which formed 'mailto:@'
131 resetToNonEmailMatchState();
132 }
133 }
134 else if (mailtoTransitions[prevChar] === char) {
135 // We're currently reading the 'mailto:' prefix, stay in
136 // Mailto state
137 }
138 else if (localPartCharRegex.test(char)) {
139 // We we're reading a prefix of 'mailto:', but encountered a
140 // different character that didn't continue the prefix
141 state = 2 /* LocalPart */;
142 }
143 else if (char === '.') {
144 // We we're reading a prefix of 'mailto:', but encountered a
145 // dot character
146 state = 3 /* LocalPartDot */;
147 }
148 else if (char === '@') {
149 // We we're reading a prefix of 'mailto:', but encountered a
150 // an @ character
151 state = 4 /* AtSign */;
152 }
153 else {
154 // not an email address character, return to "NonEmailAddress" state
155 resetToNonEmailMatchState();
156 }
157 }
158 // Handles the state when we're currently in the "local part" of an
159 // email address (as opposed to the "domain part")
160 function stateLocalPart(char) {
161 if (char === '.') {
162 state = 3 /* LocalPartDot */;
163 }
164 else if (char === '@') {
165 state = 4 /* AtSign */;
166 }
167 else if (localPartCharRegex.test(char)) {
168 // stay in the "local part" of the email address
169 }
170 else {
171 // not an email address character, return to "NonEmailAddress" state
172 resetToNonEmailMatchState();
173 }
174 }
175 // Handles the state where we've read
176 function stateLocalPartDot(char) {
177 if (char === '.') {
178 // We read a second '.' in a row, not a valid email address
179 // local part
180 resetToNonEmailMatchState();
181 }
182 else if (char === '@') {
183 // We read the '@' character immediately after a dot ('.'), not
184 // an email address
185 resetToNonEmailMatchState();
186 }
187 else if (localPartCharRegex.test(char)) {
188 state = 2 /* LocalPart */;
189 }
190 else {
191 // Anything else, not an email address
192 resetToNonEmailMatchState();
193 }
194 }
195 function stateAtSign(char) {
196 if (regex_lib_1.domainNameCharRegex.test(char)) {
197 state = 5 /* DomainChar */;
198 }
199 else {
200 // Anything else, not an email address
201 resetToNonEmailMatchState();
202 }
203 }
204 function stateDomainChar(char) {
205 if (char === '.') {
206 state = 7 /* DomainDot */;
207 }
208 else if (char === '-') {
209 state = 6 /* DomainHyphen */;
210 }
211 else if (regex_lib_1.domainNameCharRegex.test(char)) {
212 // Stay in the DomainChar state
213 }
214 else {
215 // Anything else, we potentially matched if the criteria has
216 // been met
217 captureMatchIfValidAndReset();
218 }
219 }
220 function stateDomainHyphen(char) {
221 if (char === '-' || char === '.') {
222 // Not valid to have two hyphens ("--") or hypen+dot ("-.")
223 captureMatchIfValidAndReset();
224 }
225 else if (regex_lib_1.domainNameCharRegex.test(char)) {
226 state = 5 /* DomainChar */;
227 }
228 else {
229 // Anything else
230 captureMatchIfValidAndReset();
231 }
232 }
233 function stateDomainDot(char) {
234 if (char === '.' || char === '-') {
235 // not valid to have two dots ("..") or dot+hypen (".-")
236 captureMatchIfValidAndReset();
237 }
238 else if (regex_lib_1.domainNameCharRegex.test(char)) {
239 state = 5 /* DomainChar */;
240 // After having read a '.' and then a valid domain character,
241 // we now know that the domain part of the email is valid, and
242 // we have found at least a partial EmailMatch (however, the
243 // email address may have additional characters from this point)
244 currentEmailMatch = new CurrentEmailMatch(tslib_1.__assign(tslib_1.__assign({}, currentEmailMatch), { hasDomainDot: true }));
245 }
246 else {
247 // Anything else
248 captureMatchIfValidAndReset();
249 }
250 }
251 function beginEmailMatch(newState) {
252 if (newState === void 0) { newState = 2 /* LocalPart */; }
253 state = newState;
254 currentEmailMatch = new CurrentEmailMatch({ idx: charIdx });
255 }
256 function resetToNonEmailMatchState() {
257 state = 0 /* NonEmailMatch */;
258 currentEmailMatch = noCurrentEmailMatch;
259 }
260 /*
261 * Captures the current email address as an EmailMatch if it's valid,
262 * and resets the state to read another email address.
263 */
264 function captureMatchIfValidAndReset() {
265 if (currentEmailMatch.hasDomainDot) { // we need at least one dot in the domain to be considered a valid email address
266 var matchedText = text.slice(currentEmailMatch.idx, charIdx);
267 // If we read a '.' or '-' char that ended the email address
268 // (valid domain name characters, but only valid email address
269 // characters if they are followed by something else), strip
270 // it off now
271 if (/[-.]$/.test(matchedText)) {
272 matchedText = matchedText.slice(0, -1);
273 }
274 var emailAddress = currentEmailMatch.hasMailtoPrefix
275 ? matchedText.slice('mailto:'.length)
276 : matchedText;
277 // if the email address has a valid TLD, add it to the list of matches
278 if (doesEmailHaveValidTld(emailAddress)) {
279 matches.push(new email_match_1.EmailMatch({
280 tagBuilder: tagBuilder,
281 matchedText: matchedText,
282 offset: currentEmailMatch.idx,
283 email: emailAddress
284 }));
285 }
286 }
287 resetToNonEmailMatchState();
288 /**
289 * Determines if the given email address has a valid TLD or not
290 * @param {string} emailAddress - email address
291 * @return {Boolean} - true is email have valid TLD, false otherwise
292 */
293 function doesEmailHaveValidTld(emailAddress) {
294 var emailAddressTld = emailAddress.split('.').pop() || '';
295 var emailAddressNormalized = emailAddressTld.toLowerCase();
296 var isValidTld = strictTldRegex.test(emailAddressNormalized);
297 return isValidTld;
298 }
299 }
300 };
301 return EmailMatcher;
302}(matcher_1.Matcher));
303exports.EmailMatcher = EmailMatcher;
304var CurrentEmailMatch = /** @class */ (function () {
305 function CurrentEmailMatch(cfg) {
306 if (cfg === void 0) { cfg = {}; }
307 this.idx = cfg.idx !== undefined ? cfg.idx : -1;
308 this.hasMailtoPrefix = !!cfg.hasMailtoPrefix;
309 this.hasDomainDot = !!cfg.hasDomainDot;
310 }
311 return CurrentEmailMatch;
312}());
313
314//# sourceMappingURL=email-matcher.js.map