UNPKG

12.6 kBJavaScriptView Raw
1'use strict';
2Object.defineProperty(exports, "__esModule", { value: true });
3exports.init = exports.EmailOptimizer = exports.EMAILCHAR = exports._EMAILCHAR = void 0;
4const mod_1 = require("../mod");
5/**
6 * 邮箱地址中允许出现的字符
7 * 参考:http://www.cs.tut.fi/~jkorpela/rfc/822addr.html
8 */
9exports._EMAILCHAR = '!"#$%&\'*+-/0123456789=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~.'.split('');
10exports.EMAILCHAR = {};
11for (let i in exports._EMAILCHAR)
12 exports.EMAILCHAR[exports._EMAILCHAR[i]] = 1;
13/**
14 * 邮箱地址识别优化模块
15 *
16 * @author 老雷<leizongmin@gmail.com>
17 */
18class EmailOptimizer extends mod_1.SubSModuleOptimizer {
19 /**
20 * 对可能是邮箱地址的单词进行优化
21 *
22 * @param {array} words 单词数组
23 * @return {array}
24 */
25 doOptimize(words) {
26 const POSTAG = this.segment.POSTAG;
27 //debug(words);
28 let i = 0;
29 let ie = words.length - 1;
30 let addr_start = false;
31 let has_at = false;
32 while (i < ie) {
33 let word = words[i];
34 let is_ascii = ((word.p == POSTAG.A_NX) ||
35 (word.p == POSTAG.A_M && word.w.charCodeAt(0) < 128))
36 ? true : false;
37 // 如果是外文字符或者数字,符合电子邮件地址开头的条件
38 // @ts-ignore
39 if (addr_start === false && is_ascii) {
40 addr_start = i;
41 i++;
42 continue;
43 }
44 else {
45 // 如果遇到@符号,符合第二个条件
46 if (has_at === false && word.w == '@') {
47 has_at = true;
48 i++;
49 continue;
50 }
51 // 如果已经遇到过@符号,且出现了其他字符,则截取邮箱地址
52 if (has_at !== false && words[i - 1].w != '@' && is_ascii === false && !(word.w in exports.EMAILCHAR)) {
53 let mailws = words.slice(addr_start, i);
54 //debug(toEmailAddress(mailws));
55 words.splice(addr_start, mailws.length, {
56 w: this.toEmailAddress(mailws),
57 p: POSTAG.URL
58 });
59 i = addr_start + 1;
60 ie -= mailws.length - 1;
61 addr_start = false;
62 has_at = false;
63 continue;
64 }
65 // 如果已经开头
66 if (addr_start !== false && (is_ascii || word.w in exports.EMAILCHAR)) {
67 i++;
68 continue;
69 }
70 }
71 // 移到下一个词
72 addr_start = false;
73 has_at = false;
74 i++;
75 }
76 // 检查剩余部分
77 if (addr_start && has_at && words[ie]) {
78 let word = words[ie];
79 let is_ascii = ((word.p == POSTAG.A_NX) ||
80 (word.p == POSTAG.A_M && word.w in exports.EMAILCHAR))
81 ? true : false;
82 if (is_ascii) {
83 let mailws = words.slice(addr_start, words.length);
84 //debug(toEmailAddress(mailws));
85 words.splice(addr_start, mailws.length, {
86 w: this.toEmailAddress(mailws),
87 p: POSTAG.URL
88 });
89 }
90 }
91 return words;
92 }
93 /**
94 * 根据一组单词生成邮箱地址
95 *
96 * @param {array} words 单词数组
97 * @return {string}
98 */
99 toEmailAddress(words) {
100 let ret = words[0].w;
101 for (let i = 1, word; word = words[i]; i++) {
102 ret += word.w;
103 }
104 return ret;
105 }
106}
107exports.EmailOptimizer = EmailOptimizer;
108exports.init = EmailOptimizer.init.bind(EmailOptimizer);
109exports.default = EmailOptimizer;
110//# sourceMappingURL=data:application/json;base64,
\No newline at end of file