1 | ;
|
2 | Object.defineProperty(exports, "__esModule", { value: true });
|
3 | exports.excludeUnbalancedTrailingBracesAndPunctuation = exports.parseMatches = void 0;
|
4 | var regex_lib_1 = require("../regex-lib");
|
5 | var url_match_1 = require("../match/url-match");
|
6 | var utils_1 = require("../utils");
|
7 | var uri_utils_1 = require("./uri-utils");
|
8 | var email_utils_1 = require("./email-utils");
|
9 | var email_match_1 = require("../match/email-match");
|
10 | var hashtag_utils_1 = require("./hashtag-utils");
|
11 | var hashtag_match_1 = require("../match/hashtag-match");
|
12 | var mention_utils_1 = require("./mention-utils");
|
13 | var mention_match_1 = require("../match/mention-match");
|
14 | var phone_number_utils_1 = require("./phone-number-utils");
|
15 | var phone_match_1 = require("../match/phone-match");
|
16 | // For debugging: search for and uncomment other "For debugging" lines
|
17 | // import CliTable from 'cli-table';
|
18 | /**
|
19 | * Parses URL, email, twitter, mention, and hashtag matches from the given
|
20 | * `text`.
|
21 | */
|
22 | function parseMatches(text, args) {
|
23 | var tagBuilder = args.tagBuilder;
|
24 | var stripPrefix = args.stripPrefix;
|
25 | var stripTrailingSlash = args.stripTrailingSlash;
|
26 | var decodePercentEncoding = args.decodePercentEncoding;
|
27 | var hashtagServiceName = args.hashtagServiceName;
|
28 | var mentionServiceName = args.mentionServiceName;
|
29 | var matches = [];
|
30 | var textLen = text.length;
|
31 | // An array of all active state machines. Empty array means we're in the
|
32 | // "no url" state
|
33 | var stateMachines = [];
|
34 | // For debugging: search for and uncomment other "For debugging" lines
|
35 | // const table = new CliTable({
|
36 | // head: ['charIdx', 'char', 'states', 'charIdx', 'startIdx', 'reached accept state'],
|
37 | // });
|
38 | var charIdx = 0;
|
39 | for (; charIdx < textLen; charIdx++) {
|
40 | var char = text.charAt(charIdx);
|
41 | if (stateMachines.length === 0) {
|
42 | stateNoMatch(char);
|
43 | }
|
44 | else {
|
45 | // Must loop through the state machines backwards for when one
|
46 | // is removed
|
47 | for (var stateIdx = stateMachines.length - 1; stateIdx >= 0; stateIdx--) {
|
48 | var stateMachine = stateMachines[stateIdx];
|
49 | switch (stateMachine.state) {
|
50 | // Protocol-relative URL states
|
51 | case 11 /* ProtocolRelativeSlash1 */:
|
52 | stateProtocolRelativeSlash1(stateMachine, char);
|
53 | break;
|
54 | case 12 /* ProtocolRelativeSlash2 */:
|
55 | stateProtocolRelativeSlash2(stateMachine, char);
|
56 | break;
|
57 | case 0 /* SchemeChar */:
|
58 | stateSchemeChar(stateMachine, char);
|
59 | break;
|
60 | case 1 /* SchemeHyphen */:
|
61 | stateSchemeHyphen(stateMachine, char);
|
62 | break;
|
63 | case 2 /* SchemeColon */:
|
64 | stateSchemeColon(stateMachine, char);
|
65 | break;
|
66 | case 3 /* SchemeSlash1 */:
|
67 | stateSchemeSlash1(stateMachine, char);
|
68 | break;
|
69 | case 4 /* SchemeSlash2 */:
|
70 | stateSchemeSlash2(stateMachine, char);
|
71 | break;
|
72 | case 5 /* DomainLabelChar */:
|
73 | stateDomainLabelChar(stateMachine, char);
|
74 | break;
|
75 | case 6 /* DomainHyphen */:
|
76 | stateDomainHyphen(stateMachine, char);
|
77 | break;
|
78 | case 7 /* DomainDot */:
|
79 | stateDomainDot(stateMachine, char);
|
80 | break;
|
81 | case 13 /* IpV4Digit */:
|
82 | stateIpV4Digit(stateMachine, char);
|
83 | break;
|
84 | case 14 /* IpV4Dot */:
|
85 | stateIPv4Dot(stateMachine, char);
|
86 | break;
|
87 | case 8 /* PortColon */:
|
88 | statePortColon(stateMachine, char);
|
89 | break;
|
90 | case 9 /* PortNumber */:
|
91 | statePortNumber(stateMachine, char);
|
92 | break;
|
93 | case 10 /* Path */:
|
94 | statePath(stateMachine, char);
|
95 | break;
|
96 | // Email States
|
97 | case 15 /* EmailMailto_M */:
|
98 | stateEmailMailto_M(stateMachine, char);
|
99 | break;
|
100 | case 16 /* EmailMailto_A */:
|
101 | stateEmailMailto_A(stateMachine, char);
|
102 | break;
|
103 | case 17 /* EmailMailto_I */:
|
104 | stateEmailMailto_I(stateMachine, char);
|
105 | break;
|
106 | case 18 /* EmailMailto_L */:
|
107 | stateEmailMailto_L(stateMachine, char);
|
108 | break;
|
109 | case 19 /* EmailMailto_T */:
|
110 | stateEmailMailto_T(stateMachine, char);
|
111 | break;
|
112 | case 20 /* EmailMailto_O */:
|
113 | stateEmailMailto_O(stateMachine, char);
|
114 | break;
|
115 | case 21 /* EmailMailto_Colon */:
|
116 | stateEmailMailtoColon(stateMachine, char);
|
117 | break;
|
118 | case 22 /* EmailLocalPart */:
|
119 | stateEmailLocalPart(stateMachine, char);
|
120 | break;
|
121 | case 23 /* EmailLocalPartDot */:
|
122 | stateEmailLocalPartDot(stateMachine, char);
|
123 | break;
|
124 | case 24 /* EmailAtSign */:
|
125 | stateEmailAtSign(stateMachine, char);
|
126 | break;
|
127 | case 25 /* EmailDomainChar */:
|
128 | stateEmailDomainChar(stateMachine, char);
|
129 | break;
|
130 | case 26 /* EmailDomainHyphen */:
|
131 | stateEmailDomainHyphen(stateMachine, char);
|
132 | break;
|
133 | case 27 /* EmailDomainDot */:
|
134 | stateEmailDomainDot(stateMachine, char);
|
135 | break;
|
136 | // Hashtag states
|
137 | case 28 /* HashtagHashChar */:
|
138 | stateHashtagHashChar(stateMachine, char);
|
139 | break;
|
140 | case 29 /* HashtagTextChar */:
|
141 | stateHashtagTextChar(stateMachine, char);
|
142 | break;
|
143 | // Mention states
|
144 | case 30 /* MentionAtChar */:
|
145 | stateMentionAtChar(stateMachine, char);
|
146 | break;
|
147 | case 31 /* MentionTextChar */:
|
148 | stateMentionTextChar(stateMachine, char);
|
149 | break;
|
150 | // Phone number states
|
151 | case 32 /* PhoneNumberOpenParen */:
|
152 | statePhoneNumberOpenParen(stateMachine, char);
|
153 | break;
|
154 | case 33 /* PhoneNumberAreaCodeDigit1 */:
|
155 | statePhoneNumberAreaCodeDigit1(stateMachine, char);
|
156 | break;
|
157 | case 34 /* PhoneNumberAreaCodeDigit2 */:
|
158 | statePhoneNumberAreaCodeDigit2(stateMachine, char);
|
159 | break;
|
160 | case 35 /* PhoneNumberAreaCodeDigit3 */:
|
161 | statePhoneNumberAreaCodeDigit3(stateMachine, char);
|
162 | break;
|
163 | case 36 /* PhoneNumberCloseParen */:
|
164 | statePhoneNumberCloseParen(stateMachine, char);
|
165 | break;
|
166 | case 37 /* PhoneNumberPlus */:
|
167 | statePhoneNumberPlus(stateMachine, char);
|
168 | break;
|
169 | case 38 /* PhoneNumberDigit */:
|
170 | statePhoneNumberDigit(stateMachine, char);
|
171 | break;
|
172 | case 39 /* PhoneNumberSeparator */:
|
173 | statePhoneNumberSeparator(stateMachine, char);
|
174 | break;
|
175 | case 40 /* PhoneNumberControlChar */:
|
176 | statePhoneNumberControlChar(stateMachine, char);
|
177 | break;
|
178 | case 41 /* PhoneNumberPoundChar */:
|
179 | statePhoneNumberPoundChar(stateMachine, char);
|
180 | break;
|
181 | default:
|
182 | (0, utils_1.assertNever)(stateMachine.state);
|
183 | }
|
184 | }
|
185 | }
|
186 | // For debugging: search for and uncomment other "For debugging" lines
|
187 | // table.push([
|
188 | // charIdx,
|
189 | // char,
|
190 | // stateMachines.map(machine => State[machine.state]).join('\n') || '(none)',
|
191 | // charIdx,
|
192 | // stateMachines.map(m => m.startIdx).join('\n'),
|
193 | // stateMachines.map(m => m.acceptStateReached).join('\n'),
|
194 | // ]);
|
195 | }
|
196 | // Capture any valid match at the end of the string
|
197 | // Note: this loop must happen in reverse because
|
198 | // captureMatchIfValidAndRemove() removes state machines from the array
|
199 | // and we'll end up skipping every other one if we remove while looping
|
200 | // forward
|
201 | for (var i = stateMachines.length - 1; i >= 0; i--) {
|
202 | stateMachines.forEach(function (stateMachine) { return captureMatchIfValidAndRemove(stateMachine); });
|
203 | }
|
204 | // For debugging: search for and uncomment other "For debugging" lines
|
205 | // console.log(`\nRead string:\n ${text}`);
|
206 | // console.log(table.toString());
|
207 | return matches;
|
208 | // Handles the state when we're not in a URL/email/etc. (i.e. when no state machines exist)
|
209 | function stateNoMatch(char) {
|
210 | if (char === '#') {
|
211 | // Hash char, start a Hashtag match
|
212 | stateMachines.push(createHashtagStateMachine(charIdx, 28 /* HashtagHashChar */));
|
213 | }
|
214 | else if (char === '@') {
|
215 | // '@' char, start a Mention match
|
216 | stateMachines.push(createMentionStateMachine(charIdx, 30 /* MentionAtChar */));
|
217 | }
|
218 | else if (char === '/') {
|
219 | // A slash could begin a protocol-relative URL
|
220 | stateMachines.push(createTldUrlStateMachine(charIdx, 11 /* ProtocolRelativeSlash1 */));
|
221 | }
|
222 | else if (char === '+') {
|
223 | // A '+' char can start a Phone number
|
224 | stateMachines.push(createPhoneNumberStateMachine(charIdx, 37 /* PhoneNumberPlus */));
|
225 | }
|
226 | else if (char === '(') {
|
227 | stateMachines.push(createPhoneNumberStateMachine(charIdx, 32 /* PhoneNumberOpenParen */));
|
228 | }
|
229 | else {
|
230 | if (regex_lib_1.digitRe.test(char)) {
|
231 | // A digit could start a phone number
|
232 | stateMachines.push(createPhoneNumberStateMachine(charIdx, 38 /* PhoneNumberDigit */));
|
233 | // A digit could start an IP address
|
234 | stateMachines.push(createIpV4UrlStateMachine(charIdx, 13 /* IpV4Digit */));
|
235 | }
|
236 | if ((0, email_utils_1.isEmailLocalPartStartChar)(char)) {
|
237 | // Any email local part. An 'm' character in particular could
|
238 | // start a 'mailto:' match
|
239 | var startState = char.toLowerCase() === 'm' ? 15 /* EmailMailto_M */ : 22 /* EmailLocalPart */;
|
240 | stateMachines.push(createEmailStateMachine(charIdx, startState));
|
241 | }
|
242 | if ((0, uri_utils_1.isSchemeStartChar)(char)) {
|
243 | // An uppercase or lowercase letter may start a scheme match
|
244 | stateMachines.push(createSchemeUrlStateMachine(charIdx, 0 /* SchemeChar */));
|
245 | }
|
246 | if (regex_lib_1.alphaNumericAndMarksRe.test(char)) {
|
247 | // A unicode alpha character or digit could start a domain name
|
248 | // label for a TLD match
|
249 | stateMachines.push(createTldUrlStateMachine(charIdx, 5 /* DomainLabelChar */));
|
250 | }
|
251 | }
|
252 | // Anything else, remain in the "non-url" state by not creating any
|
253 | // state machines
|
254 | }
|
255 | // Implements ABNF: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
256 | function stateSchemeChar(stateMachine, char) {
|
257 | if (char === ':') {
|
258 | stateMachine.state = 2 /* SchemeColon */;
|
259 | }
|
260 | else if (char === '-') {
|
261 | stateMachine.state = 1 /* SchemeHyphen */;
|
262 | }
|
263 | else if ((0, uri_utils_1.isSchemeChar)(char)) {
|
264 | // Stay in SchemeChar state
|
265 | }
|
266 | else {
|
267 | // Any other character, not a scheme
|
268 | (0, utils_1.remove)(stateMachines, stateMachine);
|
269 | }
|
270 | }
|
271 | function stateSchemeHyphen(stateMachine, char) {
|
272 | if (char === '-') {
|
273 | // Stay in SchemeHyphen state
|
274 | // TODO: Should a colon following a dash be counted as the end of the scheme?
|
275 | // } else if (char === ':') {
|
276 | // stateMachine.state = State.SchemeColon;
|
277 | }
|
278 | else if (char === '/') {
|
279 | // Not a valid scheme match, but may be the start of a
|
280 | // protocol-relative match (such as //google.com)
|
281 | (0, utils_1.remove)(stateMachines, stateMachine);
|
282 | stateMachines.push(createTldUrlStateMachine(charIdx, 11 /* ProtocolRelativeSlash1 */));
|
283 | }
|
284 | else if ((0, uri_utils_1.isSchemeChar)(char)) {
|
285 | stateMachine.state = 0 /* SchemeChar */;
|
286 | }
|
287 | else {
|
288 | // Any other character, not a scheme
|
289 | (0, utils_1.remove)(stateMachines, stateMachine);
|
290 | }
|
291 | }
|
292 | function stateSchemeColon(stateMachine, char) {
|
293 | if (char === '/') {
|
294 | stateMachine.state = 3 /* SchemeSlash1 */;
|
295 | }
|
296 | else if (char === '.') {
|
297 | // We've read something like 'hello:.' - don't capture
|
298 | (0, utils_1.remove)(stateMachines, stateMachine);
|
299 | }
|
300 | else if ((0, uri_utils_1.isDomainLabelStartChar)(char)) {
|
301 | stateMachine.state = 5 /* DomainLabelChar */;
|
302 | // It's possible that we read an "introduction" piece of text,
|
303 | // and the character after the current colon actually starts an
|
304 | // actual scheme. An example of this is:
|
305 | // "The link:http://google.com"
|
306 | // Hence, start a new machine to capture this match if so
|
307 | if ((0, uri_utils_1.isSchemeStartChar)(char)) {
|
308 | stateMachines.push(createSchemeUrlStateMachine(charIdx, 0 /* SchemeChar */));
|
309 | }
|
310 | }
|
311 | else {
|
312 | (0, utils_1.remove)(stateMachines, stateMachine);
|
313 | }
|
314 | }
|
315 | function stateSchemeSlash1(stateMachine, char) {
|
316 | if (char === '/') {
|
317 | stateMachine.state = 4 /* SchemeSlash2 */;
|
318 | }
|
319 | else if ((0, uri_utils_1.isPathChar)(char)) {
|
320 | stateMachine.state = 10 /* Path */;
|
321 | stateMachine.acceptStateReached = true;
|
322 | }
|
323 | else {
|
324 | captureMatchIfValidAndRemove(stateMachine);
|
325 | }
|
326 | }
|
327 | function stateSchemeSlash2(stateMachine, char) {
|
328 | if (char === '/') {
|
329 | // 3rd slash, must be an absolute path (path-absolute in the
|
330 | // ABNF), such as in a file:///c:/windows/etc. See
|
331 | // https://tools.ietf.org/html/rfc3986#appendix-A
|
332 | stateMachine.state = 10 /* Path */;
|
333 | }
|
334 | else if ((0, uri_utils_1.isDomainLabelStartChar)(char)) {
|
335 | // start of "authority" section - see https://tools.ietf.org/html/rfc3986#appendix-A
|
336 | stateMachine.state = 5 /* DomainLabelChar */;
|
337 | stateMachine.acceptStateReached = true;
|
338 | }
|
339 | else {
|
340 | // not valid
|
341 | (0, utils_1.remove)(stateMachines, stateMachine);
|
342 | }
|
343 | }
|
344 | // Handles reading a '/' from the NonUrl state
|
345 | function stateProtocolRelativeSlash1(stateMachine, char) {
|
346 | if (char === '/') {
|
347 | stateMachine.state = 12 /* ProtocolRelativeSlash2 */;
|
348 | }
|
349 | else {
|
350 | // Anything else, cannot be the start of a protocol-relative
|
351 | // URL.
|
352 | (0, utils_1.remove)(stateMachines, stateMachine);
|
353 | }
|
354 | }
|
355 | // Handles reading a second '/', which could start a protocol-relative URL
|
356 | function stateProtocolRelativeSlash2(stateMachine, char) {
|
357 | if ((0, uri_utils_1.isDomainLabelStartChar)(char)) {
|
358 | stateMachine.state = 5 /* DomainLabelChar */;
|
359 | }
|
360 | else {
|
361 | // Anything else, not a URL
|
362 | (0, utils_1.remove)(stateMachines, stateMachine);
|
363 | }
|
364 | }
|
365 | // Handles when we have read a domain label character
|
366 | function stateDomainLabelChar(stateMachine, char) {
|
367 | if (char === '.') {
|
368 | stateMachine.state = 7 /* DomainDot */;
|
369 | }
|
370 | else if (char === '-') {
|
371 | stateMachine.state = 6 /* DomainHyphen */;
|
372 | }
|
373 | else if (char === ':') {
|
374 | // Beginning of a port number, end the domain name
|
375 | stateMachine.state = 8 /* PortColon */;
|
376 | }
|
377 | else if ((0, uri_utils_1.isUrlSuffixStartChar)(char)) {
|
378 | // '/', '?', or '#'
|
379 | stateMachine.state = 10 /* Path */;
|
380 | }
|
381 | else if ((0, uri_utils_1.isDomainLabelChar)(char)) {
|
382 | // Stay in the DomainLabelChar state
|
383 | }
|
384 | else {
|
385 | // Anything else, end the domain name
|
386 | captureMatchIfValidAndRemove(stateMachine);
|
387 | }
|
388 | }
|
389 | function stateDomainHyphen(stateMachine, char) {
|
390 | if (char === '-') {
|
391 | // Remain in the DomainHyphen state
|
392 | }
|
393 | else if (char === '.') {
|
394 | // Not valid to have a '-.' in a domain label
|
395 | captureMatchIfValidAndRemove(stateMachine);
|
396 | }
|
397 | else if ((0, uri_utils_1.isDomainLabelStartChar)(char)) {
|
398 | stateMachine.state = 5 /* DomainLabelChar */;
|
399 | }
|
400 | else {
|
401 | captureMatchIfValidAndRemove(stateMachine);
|
402 | }
|
403 | }
|
404 | function stateDomainDot(stateMachine, char) {
|
405 | if (char === '.') {
|
406 | // domain names cannot have multiple '.'s next to each other.
|
407 | // It's possible we've already read a valid domain name though,
|
408 | // and that the '..' sequence just forms an ellipsis at the end
|
409 | // of a sentence
|
410 | captureMatchIfValidAndRemove(stateMachine);
|
411 | }
|
412 | else if ((0, uri_utils_1.isDomainLabelStartChar)(char)) {
|
413 | stateMachine.state = 5 /* DomainLabelChar */;
|
414 | stateMachine.acceptStateReached = true; // after hitting a dot, and then another domain label, we've reached an accept state
|
415 | }
|
416 | else {
|
417 | // Anything else, end the domain name
|
418 | captureMatchIfValidAndRemove(stateMachine);
|
419 | }
|
420 | }
|
421 | function stateIpV4Digit(stateMachine, char) {
|
422 | if (char === '.') {
|
423 | stateMachine.state = 14 /* IpV4Dot */;
|
424 | }
|
425 | else if (char === ':') {
|
426 | // Beginning of a port number
|
427 | stateMachine.state = 8 /* PortColon */;
|
428 | }
|
429 | else if (regex_lib_1.digitRe.test(char)) {
|
430 | // stay in the IPv4 digit state
|
431 | }
|
432 | else if ((0, uri_utils_1.isUrlSuffixStartChar)(char)) {
|
433 | stateMachine.state = 10 /* Path */;
|
434 | }
|
435 | else if (regex_lib_1.alphaNumericAndMarksRe.test(char)) {
|
436 | // If we hit an alpha character, must not be an IPv4
|
437 | // Example of this: 1.2.3.4abc
|
438 | (0, utils_1.remove)(stateMachines, stateMachine);
|
439 | }
|
440 | else {
|
441 | captureMatchIfValidAndRemove(stateMachine);
|
442 | }
|
443 | }
|
444 | function stateIPv4Dot(stateMachine, char) {
|
445 | if (regex_lib_1.digitRe.test(char)) {
|
446 | stateMachine.octetsEncountered++;
|
447 | // Once we have encountered 4 octets, it's *potentially* a valid
|
448 | // IPv4 address. Our IPv4 regex will confirm the match later
|
449 | // though to make sure each octet is in the 0-255 range, and
|
450 | // there's exactly 4 octets (not 5 or more)
|
451 | if (stateMachine.octetsEncountered === 4) {
|
452 | stateMachine.acceptStateReached = true;
|
453 | }
|
454 | stateMachine.state = 13 /* IpV4Digit */;
|
455 | }
|
456 | else {
|
457 | captureMatchIfValidAndRemove(stateMachine);
|
458 | }
|
459 | }
|
460 | function statePortColon(stateMachine, char) {
|
461 | if (regex_lib_1.digitRe.test(char)) {
|
462 | stateMachine.state = 9 /* PortNumber */;
|
463 | }
|
464 | else {
|
465 | captureMatchIfValidAndRemove(stateMachine);
|
466 | }
|
467 | }
|
468 | function statePortNumber(stateMachine, char) {
|
469 | if (regex_lib_1.digitRe.test(char)) {
|
470 | // Stay in port number state
|
471 | }
|
472 | else if ((0, uri_utils_1.isUrlSuffixStartChar)(char)) {
|
473 | // '/', '?', or '#'
|
474 | stateMachine.state = 10 /* Path */;
|
475 | }
|
476 | else {
|
477 | captureMatchIfValidAndRemove(stateMachine);
|
478 | }
|
479 | }
|
480 | function statePath(stateMachine, char) {
|
481 | if ((0, uri_utils_1.isPathChar)(char)) {
|
482 | // Stay in the path state
|
483 | }
|
484 | else {
|
485 | captureMatchIfValidAndRemove(stateMachine);
|
486 | }
|
487 | }
|
488 | // Handles if we're reading a 'mailto:' prefix on the string
|
489 | function stateEmailMailto_M(stateMachine, char) {
|
490 | if (char.toLowerCase() === 'a') {
|
491 | stateMachine.state = 16 /* EmailMailto_A */;
|
492 | }
|
493 | else {
|
494 | stateEmailLocalPart(stateMachine, char);
|
495 | }
|
496 | }
|
497 | function stateEmailMailto_A(stateMachine, char) {
|
498 | if (char.toLowerCase() === 'i') {
|
499 | stateMachine.state = 17 /* EmailMailto_I */;
|
500 | }
|
501 | else {
|
502 | stateEmailLocalPart(stateMachine, char);
|
503 | }
|
504 | }
|
505 | function stateEmailMailto_I(stateMachine, char) {
|
506 | if (char.toLowerCase() === 'l') {
|
507 | stateMachine.state = 18 /* EmailMailto_L */;
|
508 | }
|
509 | else {
|
510 | stateEmailLocalPart(stateMachine, char);
|
511 | }
|
512 | }
|
513 | function stateEmailMailto_L(stateMachine, char) {
|
514 | if (char.toLowerCase() === 't') {
|
515 | stateMachine.state = 19 /* EmailMailto_T */;
|
516 | }
|
517 | else {
|
518 | stateEmailLocalPart(stateMachine, char);
|
519 | }
|
520 | }
|
521 | function stateEmailMailto_T(stateMachine, char) {
|
522 | if (char.toLowerCase() === 'o') {
|
523 | stateMachine.state = 20 /* EmailMailto_O */;
|
524 | }
|
525 | else {
|
526 | stateEmailLocalPart(stateMachine, char);
|
527 | }
|
528 | }
|
529 | function stateEmailMailto_O(stateMachine, char) {
|
530 | if (char.toLowerCase() === ':') {
|
531 | stateMachine.state = 21 /* EmailMailto_Colon */;
|
532 | }
|
533 | else {
|
534 | stateEmailLocalPart(stateMachine, char);
|
535 | }
|
536 | }
|
537 | function stateEmailMailtoColon(stateMachine, char) {
|
538 | if ((0, email_utils_1.isEmailLocalPartChar)(char)) {
|
539 | stateMachine.state = 22 /* EmailLocalPart */;
|
540 | }
|
541 | else {
|
542 | (0, utils_1.remove)(stateMachines, stateMachine);
|
543 | }
|
544 | }
|
545 | // Handles the state when we're currently in the "local part" of an
|
546 | // email address (as opposed to the "domain part")
|
547 | function stateEmailLocalPart(stateMachine, char) {
|
548 | if (char === '.') {
|
549 | stateMachine.state = 23 /* EmailLocalPartDot */;
|
550 | }
|
551 | else if (char === '@') {
|
552 | stateMachine.state = 24 /* EmailAtSign */;
|
553 | }
|
554 | else if ((0, email_utils_1.isEmailLocalPartChar)(char)) {
|
555 | // stay in the "local part" of the email address
|
556 | // Note: because stateEmailLocalPart() is called from the
|
557 | // 'mailto' states (when the 'mailto' prefix itself has been
|
558 | // broken), make sure to set the state to EmailLocalPart
|
559 | stateMachine.state = 22 /* EmailLocalPart */;
|
560 | }
|
561 | else {
|
562 | // not an email address character
|
563 | (0, utils_1.remove)(stateMachines, stateMachine);
|
564 | }
|
565 | }
|
566 | // Handles the state where we've read
|
567 | function stateEmailLocalPartDot(stateMachine, char) {
|
568 | if (char === '.') {
|
569 | // We read a second '.' in a row, not a valid email address
|
570 | // local part
|
571 | (0, utils_1.remove)(stateMachines, stateMachine);
|
572 | }
|
573 | else if (char === '@') {
|
574 | // We read the '@' character immediately after a dot ('.'), not
|
575 | // an email address
|
576 | (0, utils_1.remove)(stateMachines, stateMachine);
|
577 | }
|
578 | else if ((0, email_utils_1.isEmailLocalPartChar)(char)) {
|
579 | stateMachine.state = 22 /* EmailLocalPart */;
|
580 | }
|
581 | else {
|
582 | // Anything else, not an email address
|
583 | (0, utils_1.remove)(stateMachines, stateMachine);
|
584 | }
|
585 | }
|
586 | function stateEmailAtSign(stateMachine, char) {
|
587 | if ((0, uri_utils_1.isDomainLabelStartChar)(char)) {
|
588 | stateMachine.state = 25 /* EmailDomainChar */;
|
589 | }
|
590 | else {
|
591 | // Anything else, not an email address
|
592 | (0, utils_1.remove)(stateMachines, stateMachine);
|
593 | }
|
594 | }
|
595 | function stateEmailDomainChar(stateMachine, char) {
|
596 | if (char === '.') {
|
597 | stateMachine.state = 27 /* EmailDomainDot */;
|
598 | }
|
599 | else if (char === '-') {
|
600 | stateMachine.state = 26 /* EmailDomainHyphen */;
|
601 | }
|
602 | else if ((0, uri_utils_1.isDomainLabelChar)(char)) {
|
603 | // Stay in the DomainChar state
|
604 | }
|
605 | else {
|
606 | // Anything else, we potentially matched if the criteria has
|
607 | // been met
|
608 | captureMatchIfValidAndRemove(stateMachine);
|
609 | }
|
610 | }
|
611 | function stateEmailDomainHyphen(stateMachine, char) {
|
612 | if (char === '-' || char === '.') {
|
613 | // Not valid to have two hyphens ("--") or hypen+dot ("-.")
|
614 | captureMatchIfValidAndRemove(stateMachine);
|
615 | }
|
616 | else if ((0, uri_utils_1.isDomainLabelChar)(char)) {
|
617 | stateMachine.state = 25 /* EmailDomainChar */;
|
618 | }
|
619 | else {
|
620 | // Anything else
|
621 | captureMatchIfValidAndRemove(stateMachine);
|
622 | }
|
623 | }
|
624 | function stateEmailDomainDot(stateMachine, char) {
|
625 | if (char === '.' || char === '-') {
|
626 | // not valid to have two dots ("..") or dot+hypen (".-")
|
627 | captureMatchIfValidAndRemove(stateMachine);
|
628 | }
|
629 | else if ((0, uri_utils_1.isDomainLabelStartChar)(char)) {
|
630 | stateMachine.state = 25 /* EmailDomainChar */;
|
631 | // After having read a '.' and then a valid domain character,
|
632 | // we now know that the domain part of the email is valid, and
|
633 | // we have found at least a partial EmailMatch (however, the
|
634 | // email address may have additional characters from this point)
|
635 | stateMachine.acceptStateReached = true;
|
636 | }
|
637 | else {
|
638 | // Anything else
|
639 | captureMatchIfValidAndRemove(stateMachine);
|
640 | }
|
641 | }
|
642 | // Handles the state when we've just encountered a '#' character
|
643 | function stateHashtagHashChar(stateMachine, char) {
|
644 | if ((0, hashtag_utils_1.isHashtagTextChar)(char)) {
|
645 | // '#' char with valid hash text char following
|
646 | stateMachine.state = 29 /* HashtagTextChar */;
|
647 | stateMachine.acceptStateReached = true;
|
648 | }
|
649 | else {
|
650 | (0, utils_1.remove)(stateMachines, stateMachine);
|
651 | }
|
652 | }
|
653 | // Handles the state when we're currently in the hash tag's text chars
|
654 | function stateHashtagTextChar(stateMachine, char) {
|
655 | if ((0, hashtag_utils_1.isHashtagTextChar)(char)) {
|
656 | // Continue reading characters in the HashtagText state
|
657 | }
|
658 | else {
|
659 | captureMatchIfValidAndRemove(stateMachine);
|
660 | }
|
661 | }
|
662 | // Handles the state when we've just encountered a '@' character
|
663 | function stateMentionAtChar(stateMachine, char) {
|
664 | if ((0, mention_utils_1.isMentionTextChar)(char)) {
|
665 | // '@' char with valid mention text char following
|
666 | stateMachine.state = 31 /* MentionTextChar */;
|
667 | stateMachine.acceptStateReached = true;
|
668 | }
|
669 | else {
|
670 | (0, utils_1.remove)(stateMachines, stateMachine);
|
671 | }
|
672 | }
|
673 | // Handles the state when we're currently in the mention's text chars
|
674 | function stateMentionTextChar(stateMachine, char) {
|
675 | if ((0, mention_utils_1.isMentionTextChar)(char)) {
|
676 | // Continue reading characters in the HashtagText state
|
677 | }
|
678 | else if (regex_lib_1.alphaNumericAndMarksRe.test(char)) {
|
679 | // Char is invalid for a mention text char, not a valid match.
|
680 | // Note that ascii alphanumeric chars are okay (which are tested
|
681 | // in the previous 'if' statement, but others are not)
|
682 | (0, utils_1.remove)(stateMachines, stateMachine);
|
683 | }
|
684 | else {
|
685 | captureMatchIfValidAndRemove(stateMachine);
|
686 | }
|
687 | }
|
688 | function statePhoneNumberPlus(stateMachine, char) {
|
689 | if (regex_lib_1.digitRe.test(char)) {
|
690 | stateMachine.state = 38 /* PhoneNumberDigit */;
|
691 | }
|
692 | else {
|
693 | (0, utils_1.remove)(stateMachines, stateMachine);
|
694 | // This character may start a new match. Add states for it
|
695 | stateNoMatch(char);
|
696 | }
|
697 | }
|
698 | function statePhoneNumberOpenParen(stateMachine, char) {
|
699 | if (regex_lib_1.digitRe.test(char)) {
|
700 | stateMachine.state = 33 /* PhoneNumberAreaCodeDigit1 */;
|
701 | }
|
702 | else {
|
703 | (0, utils_1.remove)(stateMachines, stateMachine);
|
704 | }
|
705 | // It's also possible that the paren was just an open brace for
|
706 | // a piece of text. Start other machines
|
707 | stateNoMatch(char);
|
708 | }
|
709 | function statePhoneNumberAreaCodeDigit1(stateMachine, char) {
|
710 | if (regex_lib_1.digitRe.test(char)) {
|
711 | stateMachine.state = 34 /* PhoneNumberAreaCodeDigit2 */;
|
712 | }
|
713 | else {
|
714 | (0, utils_1.remove)(stateMachines, stateMachine);
|
715 | }
|
716 | }
|
717 | function statePhoneNumberAreaCodeDigit2(stateMachine, char) {
|
718 | if (regex_lib_1.digitRe.test(char)) {
|
719 | stateMachine.state = 35 /* PhoneNumberAreaCodeDigit3 */;
|
720 | }
|
721 | else {
|
722 | (0, utils_1.remove)(stateMachines, stateMachine);
|
723 | }
|
724 | }
|
725 | function statePhoneNumberAreaCodeDigit3(stateMachine, char) {
|
726 | if (char === ')') {
|
727 | stateMachine.state = 36 /* PhoneNumberCloseParen */;
|
728 | }
|
729 | else {
|
730 | (0, utils_1.remove)(stateMachines, stateMachine);
|
731 | }
|
732 | }
|
733 | function statePhoneNumberCloseParen(stateMachine, char) {
|
734 | if (regex_lib_1.digitRe.test(char)) {
|
735 | stateMachine.state = 38 /* PhoneNumberDigit */;
|
736 | }
|
737 | else if ((0, phone_number_utils_1.isPhoneNumberSeparatorChar)(char)) {
|
738 | stateMachine.state = 39 /* PhoneNumberSeparator */;
|
739 | }
|
740 | else {
|
741 | (0, utils_1.remove)(stateMachines, stateMachine);
|
742 | }
|
743 | }
|
744 | function statePhoneNumberDigit(stateMachine, char) {
|
745 | // For now, if we've reached any digits, we'll say that the machine
|
746 | // has reached its accept state. The phone regex will confirm the
|
747 | // match later.
|
748 | // Alternatively, we could count the number of digits to avoid
|
749 | // invoking the phone number regex
|
750 | stateMachine.acceptStateReached = true;
|
751 | if ((0, phone_number_utils_1.isPhoneNumberControlChar)(char)) {
|
752 | stateMachine.state = 40 /* PhoneNumberControlChar */;
|
753 | }
|
754 | else if (char === '#') {
|
755 | stateMachine.state = 41 /* PhoneNumberPoundChar */;
|
756 | }
|
757 | else if (regex_lib_1.digitRe.test(char)) {
|
758 | // Stay in the phone number digit state
|
759 | }
|
760 | else if (char === '(') {
|
761 | stateMachine.state = 32 /* PhoneNumberOpenParen */;
|
762 | }
|
763 | else if ((0, phone_number_utils_1.isPhoneNumberSeparatorChar)(char)) {
|
764 | stateMachine.state = 39 /* PhoneNumberSeparator */;
|
765 | }
|
766 | else {
|
767 | captureMatchIfValidAndRemove(stateMachine);
|
768 | // The transition from a digit character to a letter can be the
|
769 | // start of a new scheme URL match
|
770 | if ((0, uri_utils_1.isSchemeStartChar)(char)) {
|
771 | stateMachines.push(createSchemeUrlStateMachine(charIdx, 0 /* SchemeChar */));
|
772 | }
|
773 | }
|
774 | }
|
775 | function statePhoneNumberSeparator(stateMachine, char) {
|
776 | if (regex_lib_1.digitRe.test(char)) {
|
777 | stateMachine.state = 38 /* PhoneNumberDigit */;
|
778 | }
|
779 | else if (char === '(') {
|
780 | stateMachine.state = 32 /* PhoneNumberOpenParen */;
|
781 | }
|
782 | else {
|
783 | captureMatchIfValidAndRemove(stateMachine);
|
784 | // This character may start a new match. Add states for it
|
785 | stateNoMatch(char);
|
786 | }
|
787 | }
|
788 | // The ";" characters is "wait" in a phone number
|
789 | // The "," characters is "pause" in a phone number
|
790 | function statePhoneNumberControlChar(stateMachine, char) {
|
791 | if ((0, phone_number_utils_1.isPhoneNumberControlChar)(char)) {
|
792 | // Stay in the "control char" state
|
793 | }
|
794 | else if (char === '#') {
|
795 | stateMachine.state = 41 /* PhoneNumberPoundChar */;
|
796 | }
|
797 | else if (regex_lib_1.digitRe.test(char)) {
|
798 | stateMachine.state = 38 /* PhoneNumberDigit */;
|
799 | }
|
800 | else {
|
801 | captureMatchIfValidAndRemove(stateMachine);
|
802 | }
|
803 | }
|
804 | // The "#" characters is "pound" in a phone number
|
805 | function statePhoneNumberPoundChar(stateMachine, char) {
|
806 | if ((0, phone_number_utils_1.isPhoneNumberControlChar)(char)) {
|
807 | stateMachine.state = 40 /* PhoneNumberControlChar */;
|
808 | }
|
809 | else if (regex_lib_1.digitRe.test(char)) {
|
810 | // According to some of the older tests, if there's a digit
|
811 | // after a '#' sign, the match is invalid. TODO: Revisit if this is true
|
812 | (0, utils_1.remove)(stateMachines, stateMachine);
|
813 | }
|
814 | else {
|
815 | captureMatchIfValidAndRemove(stateMachine);
|
816 | }
|
817 | }
|
818 | /*
|
819 | * Captures a match if it is valid (i.e. has a full domain name for a
|
820 | * TLD match). If a match is not valid, it is possible that we want to
|
821 | * keep reading characters in order to make a full match.
|
822 | */
|
823 | function captureMatchIfValidAndRemove(stateMachine) {
|
824 | // Remove the state machine first. There are a number of code paths
|
825 | // which return out of this function early, so make sure we have
|
826 | // this done
|
827 | (0, utils_1.remove)(stateMachines, stateMachine);
|
828 | // Make sure the state machine being checked has actually reached an
|
829 | // "accept" state. If it hasn't reach one, it can't be a match
|
830 | if (!stateMachine.acceptStateReached) {
|
831 | return;
|
832 | }
|
833 | var startIdx = stateMachine.startIdx;
|
834 | var matchedText = text.slice(stateMachine.startIdx, charIdx);
|
835 | // Handle any unbalanced braces (parens, square brackets, or curly
|
836 | // brackets) inside the URL. This handles situations like:
|
837 | // The link (google.com)
|
838 | // and
|
839 | // Check out this link here (en.wikipedia.org/wiki/IANA_(disambiguation))
|
840 | //
|
841 | // And also remove any punctuation chars at the end such as:
|
842 | // '?', ',', ':', '.', etc.
|
843 | matchedText = excludeUnbalancedTrailingBracesAndPunctuation(matchedText);
|
844 | if (stateMachine.type === 'url') {
|
845 | // We don't want to accidentally match a URL that is preceded by an
|
846 | // '@' character, which would be an email address
|
847 | var charBeforeUrlMatch = text.charAt(stateMachine.startIdx - 1);
|
848 | if (charBeforeUrlMatch === '@') {
|
849 | return;
|
850 | }
|
851 | // For the purpose of this parser, we've generalized 'www'
|
852 | // matches as part of 'tld' matches. However, for backward
|
853 | // compatibility, we distinguish beween TLD matches and matches
|
854 | // that begin with 'www.' so that users may turn off 'www'
|
855 | // matches. As such, we need to correct for that now if the
|
856 | // URL begins with 'www.'
|
857 | var urlMatchType = stateMachine.matchType;
|
858 | if (urlMatchType === 'scheme') {
|
859 | // Autolinker accepts many characters in a url's scheme (like `fake://test.com`).
|
860 | // However, in cases where a URL is missing whitespace before an obvious link,
|
861 | // (for example: `nowhitespacehttp://www.test.com`), we only want the match to start
|
862 | // at the http:// part. We will check if the match contains a common scheme and then
|
863 | // shift the match to start from there.
|
864 | var httpSchemeMatch = uri_utils_1.httpSchemeRe.exec(matchedText);
|
865 | if (httpSchemeMatch) {
|
866 | // If we found an overmatched URL, we want to find the index
|
867 | // of where the match should start and shift the match to
|
868 | // start from the beginning of the common scheme
|
869 | startIdx = startIdx + httpSchemeMatch.index;
|
870 | matchedText = matchedText.slice(httpSchemeMatch.index);
|
871 | }
|
872 | if (!(0, uri_utils_1.isValidSchemeUrl)(matchedText)) {
|
873 | return; // not a valid match
|
874 | }
|
875 | }
|
876 | else if (urlMatchType === 'tld') {
|
877 | if (!(0, uri_utils_1.isValidTldMatch)(matchedText)) {
|
878 | return; // not a valid match
|
879 | }
|
880 | }
|
881 | else if (urlMatchType === 'ipV4') {
|
882 | if (!(0, uri_utils_1.isValidIpV4Address)(matchedText)) {
|
883 | return; // not a valid match
|
884 | }
|
885 | }
|
886 | else {
|
887 | (0, utils_1.assertNever)(urlMatchType);
|
888 | }
|
889 | matches.push(new url_match_1.UrlMatch({
|
890 | tagBuilder: tagBuilder,
|
891 | matchedText: matchedText,
|
892 | offset: startIdx,
|
893 | urlMatchType: urlMatchType,
|
894 | url: matchedText,
|
895 | protocolRelativeMatch: matchedText.slice(0, 2) === '//',
|
896 | // TODO: Do these settings need to be passed to the match,
|
897 | // or should we handle them here in UrlMatcher?
|
898 | stripPrefix: stripPrefix,
|
899 | stripTrailingSlash: stripTrailingSlash,
|
900 | decodePercentEncoding: decodePercentEncoding,
|
901 | }));
|
902 | }
|
903 | else if (stateMachine.type === 'email') {
|
904 | // if the email address has a valid TLD, add it to the list of matches
|
905 | if ((0, email_utils_1.isValidEmail)(matchedText)) {
|
906 | matches.push(new email_match_1.EmailMatch({
|
907 | tagBuilder: tagBuilder,
|
908 | matchedText: matchedText,
|
909 | offset: startIdx,
|
910 | email: matchedText.replace(email_utils_1.mailtoSchemePrefixRe, ''),
|
911 | }));
|
912 | }
|
913 | }
|
914 | else if (stateMachine.type === 'hashtag') {
|
915 | if ((0, hashtag_utils_1.isValidHashtag)(matchedText)) {
|
916 | matches.push(new hashtag_match_1.HashtagMatch({
|
917 | tagBuilder: tagBuilder,
|
918 | matchedText: matchedText,
|
919 | offset: startIdx,
|
920 | serviceName: hashtagServiceName,
|
921 | hashtag: matchedText.slice(1),
|
922 | }));
|
923 | }
|
924 | }
|
925 | else if (stateMachine.type === 'mention') {
|
926 | if ((0, mention_utils_1.isValidMention)(matchedText, mentionServiceName)) {
|
927 | matches.push(new mention_match_1.MentionMatch({
|
928 | tagBuilder: tagBuilder,
|
929 | matchedText: matchedText,
|
930 | offset: startIdx,
|
931 | serviceName: mentionServiceName,
|
932 | mention: matchedText.slice(1), // strip off the '@' character at the beginning
|
933 | }));
|
934 | }
|
935 | }
|
936 | else if (stateMachine.type === 'phone') {
|
937 | // remove any trailing spaces that were considered as "separator"
|
938 | // chars by the state machine
|
939 | matchedText = matchedText.replace(/ +$/g, '');
|
940 | if ((0, phone_number_utils_1.isValidPhoneNumber)(matchedText)) {
|
941 | var cleanNumber = matchedText.replace(/[^0-9,;#]/g, ''); // strip out non-digit characters exclude comma semicolon and #
|
942 | matches.push(new phone_match_1.PhoneMatch({
|
943 | tagBuilder: tagBuilder,
|
944 | matchedText: matchedText,
|
945 | offset: startIdx,
|
946 | number: cleanNumber,
|
947 | plusSign: matchedText.charAt(0) === '+',
|
948 | }));
|
949 | }
|
950 | }
|
951 | else {
|
952 | (0, utils_1.assertNever)(stateMachine);
|
953 | }
|
954 | }
|
955 | }
|
956 | exports.parseMatches = parseMatches;
|
957 | var openBraceRe = /[\(\{\[]/;
|
958 | var closeBraceRe = /[\)\}\]]/;
|
959 | var oppositeBrace = {
|
960 | ')': '(',
|
961 | '}': '{',
|
962 | ']': '[',
|
963 | };
|
964 | /**
|
965 | * Determines if a match found has unmatched closing parenthesis,
|
966 | * square brackets or curly brackets. If so, these unbalanced symbol(s) will be
|
967 | * removed from the URL match itself.
|
968 | *
|
969 | * A match may have an extra closing parenthesis/square brackets/curly brackets
|
970 | * at the end of the match because these are valid URL path characters. For
|
971 | * example, "wikipedia.com/something_(disambiguation)" should be auto-linked.
|
972 | *
|
973 | * However, an extra parenthesis *will* be included when the URL itself is
|
974 | * wrapped in parenthesis, such as in the case of:
|
975 | *
|
976 | * "(wikipedia.com/something_(disambiguation))"
|
977 | *
|
978 | * In this case, the last closing parenthesis should *not* be part of the
|
979 | * URL itself, and this method will exclude it from the returned URL.
|
980 | *
|
981 | * For square brackets in URLs such as in PHP arrays, the same behavior as
|
982 | * parenthesis discussed above should happen:
|
983 | *
|
984 | * "[http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3]"
|
985 | *
|
986 | * The very last closing square bracket should not be part of the URL itself,
|
987 | * and therefore this method will remove it.
|
988 | *
|
989 | * @param matchedText The full matched URL/email/hashtag/etc. from the state
|
990 | * machine parser.
|
991 | * @return The updated matched text with extraneous suffix characters removed.
|
992 | */
|
993 | function excludeUnbalancedTrailingBracesAndPunctuation(matchedText) {
|
994 | var braceCounts = {
|
995 | '(': 0,
|
996 | '{': 0,
|
997 | '[': 0,
|
998 | };
|
999 | for (var i = 0; i < matchedText.length; i++) {
|
1000 | var char_1 = matchedText.charAt(i);
|
1001 | if (openBraceRe.test(char_1)) {
|
1002 | braceCounts[char_1]++;
|
1003 | }
|
1004 | else if (closeBraceRe.test(char_1)) {
|
1005 | braceCounts[oppositeBrace[char_1]]--;
|
1006 | }
|
1007 | }
|
1008 | var endIdx = matchedText.length - 1;
|
1009 | var char;
|
1010 | while (endIdx >= 0) {
|
1011 | char = matchedText.charAt(endIdx);
|
1012 | if (closeBraceRe.test(char)) {
|
1013 | var oppositeBraceChar = oppositeBrace[char];
|
1014 | if (braceCounts[oppositeBraceChar] < 0) {
|
1015 | braceCounts[oppositeBraceChar]++;
|
1016 | endIdx--;
|
1017 | }
|
1018 | else {
|
1019 | break;
|
1020 | }
|
1021 | }
|
1022 | else if (uri_utils_1.urlSuffixedCharsNotAllowedAtEndRe.test(char)) {
|
1023 | // Walk back a punctuation char like '?', ',', ':', '.', etc.
|
1024 | endIdx--;
|
1025 | }
|
1026 | else {
|
1027 | break;
|
1028 | }
|
1029 | }
|
1030 | return matchedText.slice(0, endIdx + 1);
|
1031 | }
|
1032 | exports.excludeUnbalancedTrailingBracesAndPunctuation = excludeUnbalancedTrailingBracesAndPunctuation;
|
1033 | function createSchemeUrlStateMachine(startIdx, state) {
|
1034 | return {
|
1035 | type: 'url',
|
1036 | startIdx: startIdx,
|
1037 | state: state,
|
1038 | acceptStateReached: false,
|
1039 | matchType: 'scheme',
|
1040 | };
|
1041 | }
|
1042 | function createTldUrlStateMachine(startIdx, state) {
|
1043 | return {
|
1044 | type: 'url',
|
1045 | startIdx: startIdx,
|
1046 | state: state,
|
1047 | acceptStateReached: false,
|
1048 | matchType: 'tld',
|
1049 | };
|
1050 | }
|
1051 | function createIpV4UrlStateMachine(startIdx, state) {
|
1052 | return {
|
1053 | type: 'url',
|
1054 | startIdx: startIdx,
|
1055 | state: state,
|
1056 | acceptStateReached: false,
|
1057 | matchType: 'ipV4',
|
1058 | octetsEncountered: 1, // starts at 1 because we create this machine when encountering the first octet
|
1059 | };
|
1060 | }
|
1061 | function createEmailStateMachine(startIdx, state) {
|
1062 | return {
|
1063 | type: 'email',
|
1064 | startIdx: startIdx,
|
1065 | state: state,
|
1066 | acceptStateReached: false,
|
1067 | };
|
1068 | }
|
1069 | function createHashtagStateMachine(startIdx, state) {
|
1070 | return {
|
1071 | type: 'hashtag',
|
1072 | startIdx: startIdx,
|
1073 | state: state,
|
1074 | acceptStateReached: false,
|
1075 | };
|
1076 | }
|
1077 | function createMentionStateMachine(startIdx, state) {
|
1078 | return {
|
1079 | type: 'mention',
|
1080 | startIdx: startIdx,
|
1081 | state: state,
|
1082 | acceptStateReached: false,
|
1083 | };
|
1084 | }
|
1085 | function createPhoneNumberStateMachine(startIdx, state) {
|
1086 | return {
|
1087 | type: 'phone',
|
1088 | startIdx: startIdx,
|
1089 | state: state,
|
1090 | acceptStateReached: false,
|
1091 | };
|
1092 | }
|
1093 | //# sourceMappingURL=parse-matches.js.map |
\ | No newline at end of file |