UNPKG

12.6 kBJavaScriptView Raw
1'use strict';
2Object.defineProperty(exports, "__esModule", { value: true });
3exports.init = exports.EmailOptimizer = exports.EMAILCHAR = exports._EMAILCHAR = void 0;
4const mod_1 = require("../mod");
5/**
6 * 邮箱地址中允许出现的字符
7 * 参考:http://www.cs.tut.fi/~jkorpela/rfc/822addr.html
8 */
9exports._EMAILCHAR = '!"#$%&\'*+-/0123456789=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~.'.split('');
10exports.EMAILCHAR = {};
11for (let i in exports._EMAILCHAR)
12 exports.EMAILCHAR[exports._EMAILCHAR[i]] = 1;
13/**
14 * 邮箱地址识别优化模块
15 *
16 * @author 老雷<leizongmin@gmail.com>
17 */
18class EmailOptimizer extends mod_1.SubSModuleOptimizer {
19 /**
20 * 对可能是邮箱地址的单词进行优化
21 *
22 * @param {array} words 单词数组
23 * @return {array}
24 */
25 doOptimize(words) {
26 const POSTAG = this.segment.POSTAG;
27 //debug(words);
28 let i = 0;
29 let ie = words.length - 1;
30 let addr_start = false;
31 let has_at = false;
32 while (i < ie) {
33 let word = words[i];
34 let is_ascii = ((word.p == POSTAG.A_NX) ||
35 (word.p == POSTAG.A_M && word.w.charCodeAt(0) < 128))
36 ? true : false;
37 // 如果是外文字符或者数字,符合电子邮件地址开头的条件
38 // @ts-ignore
39 if (addr_start === false && is_ascii) {
40 addr_start = i;
41 i++;
42 continue;
43 }
44 else {
45 // 如果遇到@符号,符合第二个条件
46 if (has_at === false && word.w == '@') {
47 has_at = true;
48 i++;
49 continue;
50 }
51 // 如果已经遇到过@符号,且出现了其他字符,则截取邮箱地址
52 if (has_at !== false && words[i - 1].w != '@' && is_ascii === false && !(word.w in exports.EMAILCHAR)) {
53 let mailws = words.slice(addr_start, i);
54 //debug(toEmailAddress(mailws));
55 words.splice(addr_start, mailws.length, {
56 w: this.toEmailAddress(mailws),
57 p: POSTAG.URL
58 });
59 i = addr_start + 1;
60 ie -= mailws.length - 1;
61 addr_start = false;
62 has_at = false;
63 continue;
64 }
65 // 如果已经开头
66 if (addr_start !== false && (is_ascii || word.w in exports.EMAILCHAR)) {
67 i++;
68 continue;
69 }
70 }
71 // 移到下一个词
72 addr_start = false;
73 has_at = false;
74 i++;
75 }
76 // 检查剩余部分
77 if (addr_start && has_at && words[ie]) {
78 let word = words[ie];
79 let is_ascii = ((word.p == POSTAG.A_NX) ||
80 (word.p == POSTAG.A_M && word.w in exports.EMAILCHAR))
81 ? true : false;
82 if (is_ascii) {
83 let mailws = words.slice(addr_start, words.length);
84 //debug(toEmailAddress(mailws));
85 words.splice(addr_start, mailws.length, {
86 w: this.toEmailAddress(mailws),
87 p: POSTAG.URL
88 });
89 }
90 }
91 return words;
92 }
93 /**
94 * 根据一组单词生成邮箱地址
95 *
96 * @param {array} words 单词数组
97 * @return {string}
98 */
99 toEmailAddress(words) {
100 let ret = words[0].w;
101 for (let i = 1, word; word = words[i]; i++) {
102 ret += word.w;
103 }
104 return ret;
105 }
106}
107exports.EmailOptimizer = EmailOptimizer;
108exports.init = EmailOptimizer.init.bind(EmailOptimizer);
109exports.default = EmailOptimizer;
110//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiRW1haWxPcHRpbWl6ZXIuanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyJFbWFpbE9wdGltaXplci50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQSxZQUFZLENBQUM7OztBQUViLGdDQUE4RTtBQUk5RTs7O0dBR0c7QUFDVSxRQUFBLFVBQVUsR0FBRyx1RkFBdUYsQ0FBQyxLQUFLLENBQUMsRUFBRSxDQUFDLENBQUM7QUFDL0csUUFBQSxTQUFTLEdBQWtCLEVBQUUsQ0FBQztBQUMzQyxLQUFLLElBQUksQ0FBQyxJQUFJLGtCQUFVO0lBQUUsaUJBQVMsQ0FBQyxrQkFBVSxDQUFDLENBQUMsQ0FBQyxDQUFDLEdBQUcsQ0FBQyxDQUFDO0FBRXZEOzs7O0dBSUc7QUFDSCxNQUFhLGNBQWUsU0FBUSx5QkFBbUI7SUFHdEQ7Ozs7O09BS0c7SUFDSCxVQUFVLENBQUMsS0FBSztRQUVmLE1BQU0sTUFBTSxHQUFHLElBQUksQ0FBQyxPQUFPLENBQUMsTUFBTSxDQUFDO1FBQ25DLGVBQWU7UUFFZixJQUFJLENBQUMsR0FBRyxDQUFDLENBQUM7UUFDVixJQUFJLEVBQUUsR0FBRyxLQUFLLENBQUMsTUFBTSxHQUFHLENBQUMsQ0FBQztRQUMxQixJQUFJLFVBQVUsR0FBcUIsS0FBSyxDQUFDO1FBQ3pDLElBQUksTUFBTSxHQUFHLEtBQUssQ0FBQztRQUVuQixPQUFPLENBQUMsR0FBRyxFQUFFLEVBQ2I7WUFDQyxJQUFJLElBQUksR0FBRyxLQUFLLENBQUMsQ0FBQyxDQUFDLENBQUM7WUFDcEIsSUFBSSxRQUFRLEdBQUcsQ0FBQyxDQUFDLElBQUksQ0FBQyxDQUFDLElBQUksTUFBTSxDQUFDLElBQUksQ0FBQztnQkFDdEMsQ0FBQyxJQUFJLENBQUMsQ0FBQyxJQUFJLE1BQU0sQ0FBQyxHQUFHLElBQUksSUFBSSxDQUFDLENBQUMsQ0FBQyxVQUFVLENBQUMsQ0FBQyxDQUFDLEdBQUcsR0FBRyxDQUFDLENBQUM7Z0JBQ3JELENBQUMsQ0FBQyxJQUFJLENBQUMsQ0FBQyxDQUFDLEtBQUssQ0FBQztZQUVoQiw0QkFBNEI7WUFDNUIsYUFBYTtZQUNiLElBQUksVUFBVSxLQUFLLEtBQUssSUFBSSxRQUFRLEVBQ3BDO2dCQUNDLFVBQVUsR0FBRyxDQUFDLENBQUM7Z0JBQ2YsQ0FBQyxFQUFFLENBQUM7Z0JBQ0osU0FBUzthQUNUO2lCQUVEO2dCQUNDLGtCQUFrQjtnQkFDbEIsSUFBSSxNQUFNLEtBQUssS0FBSyxJQUFJLElBQUksQ0FBQyxDQUFDLElBQUksR0FBRyxFQUNyQztvQkFDQyxNQUFNLEdBQUcsSUFBSSxDQUFDO29CQUNkLENBQUMsRUFBRSxDQUFDO29CQUNKLFNBQVM7aUJBQ1Q7Z0JBQ0QsOEJBQThCO2dCQUM5QixJQUFJLE1BQU0sS0FBSyxLQUFLLElBQUksS0FBSyxDQUFDLENBQUMsR0FBRyxDQUFDLENBQUMsQ0FBQyxDQUFDLElBQUksR0FBRyxJQUFJLFFBQVEsS0FBSyxLQUFLLElBQUksQ0FBQyxDQUFDLElBQUksQ0FBQyxDQUFDLElBQUksaUJBQVMsQ0FBQyxFQUM3RjtvQkFDQyxJQUFJLE1BQU0sR0FBRyxLQUFLLENBQUMsS0FBSyxDQUFDLFVBQVUsRUFBRSxDQUFDLENBQUMsQ0FBQztvQkFDeEMsZ0NBQWdDO29CQUNoQyxLQUFLLENBQUMsTUFBTSxDQUFDLFVBQVUsRUFBRSxNQUFNLENBQUMsTUFBTSxFQUFFO3dCQUN2QyxDQUFDLEVBQUUsSUFBSSxDQUFDLGNBQWMsQ0FBQyxNQUFNLENBQUM7d0JBQzlCLENBQUMsRUFBRSxNQUFNLENBQUMsR0FBRztxQkFDYixDQUFDLENBQUM7b0JBQ0gsQ0FBQyxHQUFXLFVBQVUsR0FBRyxDQUFDLENBQUM7b0JBQzNCLEVBQUUsSUFBSSxNQUFNLENBQUMsTUFBTSxHQUFHLENBQUMsQ0FBQztvQkFDeEIsVUFBVSxHQUFHLEtBQUssQ0FBQztvQkFDbkIsTUFBTSxHQUFHLEtBQUssQ0FBQztvQkFDZixTQUFTO2lCQUNUO2dCQUNELFNBQVM7Z0JBQ1QsSUFBSSxVQUFVLEtBQUssS0FBSyxJQUFJLENBQUMsUUFBUSxJQUFJLElBQUksQ0FBQyxDQUFDLElBQUksaUJBQVMsQ0FBQyxFQUM3RDtvQkFDQyxDQUFDLEVBQUUsQ0FBQztvQkFDSixTQUFTO2lCQUNUO2FBQ0Q7WUFFRCxTQUFTO1lBQ1QsVUFBVSxHQUFHLEtBQUssQ0FBQztZQUNuQixNQUFNLEdBQUcsS0FBSyxDQUFDO1lBQ2YsQ0FBQyxFQUFFLENBQUM7U0FDSjtRQUVELFNBQVM7UUFDVCxJQUFJLFVBQVUsSUFBSSxNQUFNLElBQUksS0FBSyxDQUFDLEVBQUUsQ0FBQyxFQUNyQztZQUNDLElBQUksSUFBSSxHQUFHLEtBQUssQ0FBQyxFQUFFLENBQUMsQ0FBQztZQUNyQixJQUFJLFFBQVEsR0FBRyxDQUFDLENBQUMsSUFBSSxDQUFDLENBQUMsSUFBSSxNQUFNLENBQUMsSUFBSSxDQUFDO2dCQUN0QyxDQUFDLElBQUksQ0FBQyxDQUFDLElBQUksTUFBTSxDQUFDLEdBQUcsSUFBSSxJQUFJLENBQUMsQ0FBQyxJQUFJLGlCQUFTLENBQUMsQ0FBQztnQkFDOUMsQ0FBQyxDQUFDLElBQUksQ0FBQyxDQUFDLENBQUMsS0FBSyxDQUFDO1lBQ2hCLElBQUksUUFBUSxFQUNaO2dCQUNDLElBQUksTUFBTSxHQUFHLEtBQUssQ0FBQyxLQUFLLENBQUMsVUFBVSxFQUFFLEtBQUssQ0FBQyxNQUFNLENBQUMsQ0FBQztnQkFDbkQsZ0NBQWdDO2dCQUNoQyxLQUFLLENBQUMsTUFBTSxDQUFDLFVBQVUsRUFBRSxNQUFNLENBQUMsTUFBTSxFQUFFO29CQUN2QyxDQUFDLEVBQUUsSUFBSSxDQUFDLGNBQWMsQ0FBQyxNQUFNLENBQUM7b0JBQzlCLENBQUMsRUFBRSxNQUFNLENBQUMsR0FBRztpQkFDYixDQUFDLENBQUM7YUFDSDtTQUNEO1FBRUQsT0FBTyxLQUFLLENBQUM7SUFDZCxDQUFDO0lBRUQ7Ozs7O09BS0c7SUFDSCxjQUFjLENBQUMsS0FBYztRQUU1QixJQUFJLEdBQUcsR0FBRyxLQUFLLENBQUMsQ0FBQyxDQUFDLENBQUMsQ0FBQyxDQUFDO1FBQ3JCLEtBQUssSUFBSSxDQUFDLEdBQUcsQ0FBQyxFQUFFLElBQUksRUFBRSxJQUFJLEdBQUcsS0FBSyxDQUFDLENBQUMsQ0FBQyxFQUFFLENBQUMsRUFBRSxFQUMxQztZQUNDLEdBQUcsSUFBSSxJQUFJLENBQUMsQ0FBQyxDQUFDO1NBQ2Q7UUFDRCxPQUFPLEdBQUcsQ0FBQztJQUNaLENBQUM7Q0FFRDtBQTdHRCx3Q0E2R0M7QUFFWSxRQUFBLElBQUksR0FBRyxjQUFjLENBQUMsSUFBSSxDQUFDLElBQUksQ0FBQyxjQUFjLENBQXdDLENBQUM7QUFFcEcsa0JBQWUsY0FBYyxDQUFDIiwic291cmNlc0NvbnRlbnQiOlsiJ3VzZSBzdHJpY3QnO1xuXG5pbXBvcnQgeyBTdWJTTW9kdWxlLCBTdWJTTW9kdWxlT3B0aW1pemVyLCBJU3ViT3B0aW1pemVyQ3JlYXRlIH0gZnJvbSAnLi4vbW9kJztcbmltcG9ydCB7IFNlZ21lbnQsIElXb3JkLCBJRElDVCB9IGZyb20gJy4uL1NlZ21lbnQnO1xuaW1wb3J0IFVTdHJpbmcgZnJvbSAndW5pLXN0cmluZyc7XG5cbi8qKlxuICog6YKu566x5Zyw5Z2A5Lit5YWB6K645Ye6546w55qE5a2X56ymXG4gKiDlj4LogIPvvJpodHRwOi8vd3d3LmNzLnR1dC5maS9+amtvcnBlbGEvcmZjLzgyMmFkZHIuaHRtbFxuICovXG5leHBvcnQgY29uc3QgX0VNQUlMQ0hBUiA9ICchXCIjJCUmXFwnKistLzAxMjM0NTY3ODk9P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWl5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fi4nLnNwbGl0KCcnKTtcbmV4cG9ydCBjb25zdCBFTUFJTENIQVI6IElESUNUPG51bWJlcj4gPSB7fTtcbmZvciAobGV0IGkgaW4gX0VNQUlMQ0hBUikgRU1BSUxDSEFSW19FTUFJTENIQVJbaV1dID0gMTtcblxuLyoqXG4gKiDpgq7nrrHlnLDlnYDor4bliKvkvJjljJbmqKHlnZdcbiAqXG4gKiBAYXV0aG9yIOiAgembtzxsZWl6b25nbWluQGdtYWlsLmNvbT5cbiAqL1xuZXhwb3J0IGNsYXNzIEVtYWlsT3B0aW1pemVyIGV4dGVuZHMgU3ViU01vZHVsZU9wdGltaXplclxue1xuXG5cdC8qKlxuXHQgKiDlr7nlj6/og73mmK/pgq7nrrHlnLDlnYDnmoTljZXor43ov5vooYzkvJjljJZcblx0ICpcblx0ICogQHBhcmFtIHthcnJheX0gd29yZHMg5Y2V6K+N5pWw57uEXG5cdCAqIEByZXR1cm4ge2FycmF5fVxuXHQgKi9cblx0ZG9PcHRpbWl6ZSh3b3Jkcylcblx0e1xuXHRcdGNvbnN0IFBPU1RBRyA9IHRoaXMuc2VnbWVudC5QT1NUQUc7XG5cdFx0Ly9kZWJ1Zyh3b3Jkcyk7XG5cblx0XHRsZXQgaSA9IDA7XG5cdFx0bGV0IGllID0gd29yZHMubGVuZ3RoIC0gMTtcblx0XHRsZXQgYWRkcl9zdGFydDogYm9vbGVhbiB8IG51bWJlciA9IGZhbHNlO1xuXHRcdGxldCBoYXNfYXQgPSBmYWxzZTtcblxuXHRcdHdoaWxlIChpIDwgaWUpXG5cdFx0e1xuXHRcdFx0bGV0IHdvcmQgPSB3b3Jkc1tpXTtcblx0XHRcdGxldCBpc19hc2NpaSA9ICgod29yZC5wID09IFBPU1RBRy5BX05YKSB8fFxuXHRcdFx0XHQod29yZC5wID09IFBPU1RBRy5BX00gJiYgd29yZC53LmNoYXJDb2RlQXQoMCkgPCAxMjgpKVxuXHRcdFx0XHQ/IHRydWUgOiBmYWxzZTtcblxuXHRcdFx0Ly8g5aaC5p6c5piv5aSW5paH5a2X56ym5oiW6ICF5pWw5a2X77yM56ym5ZCI55S15a2Q6YKu5Lu25Zyw5Z2A5byA5aS055qE5p2h5Lu2XG5cdFx0XHQvLyBAdHMtaWdub3JlXG5cdFx0XHRpZiAoYWRkcl9zdGFydCA9PT0gZmFsc2UgJiYgaXNfYXNjaWkpXG5cdFx0XHR7XG5cdFx0XHRcdGFkZHJfc3RhcnQgPSBpO1xuXHRcdFx0XHRpKys7XG5cdFx0XHRcdGNvbnRpbnVlO1xuXHRcdFx0fVxuXHRcdFx0ZWxzZVxuXHRcdFx0e1xuXHRcdFx0XHQvLyDlpoLmnpzpgYfliLBA56ym5Y+377yM56ym5ZCI56ys5LqM5Liq5p2h5Lu2XG5cdFx0XHRcdGlmIChoYXNfYXQgPT09IGZhbHNlICYmIHdvcmQudyA9PSAnQCcpXG5cdFx0XHRcdHtcblx0XHRcdFx0XHRoYXNfYXQgPSB0cnVlO1xuXHRcdFx0XHRcdGkrKztcblx0XHRcdFx0XHRjb250aW51ZTtcblx0XHRcdFx0fVxuXHRcdFx0XHQvLyDlpoLmnpzlt7Lnu4/pgYfliLDov4dA56ym5Y+377yM5LiU5Ye6546w5LqG5YW25LuW5a2X56ym77yM5YiZ5oiq5Y+W6YKu566x5Zyw5Z2AXG5cdFx0XHRcdGlmIChoYXNfYXQgIT09IGZhbHNlICYmIHdvcmRzW2kgLSAxXS53ICE9ICdAJyAmJiBpc19hc2NpaSA9PT0gZmFsc2UgJiYgISh3b3JkLncgaW4gRU1BSUxDSEFSKSlcblx0XHRcdFx0e1xuXHRcdFx0XHRcdGxldCBtYWlsd3MgPSB3b3Jkcy5zbGljZShhZGRyX3N0YXJ0LCBpKTtcblx0XHRcdFx0XHQvL2RlYnVnKHRvRW1haWxBZGRyZXNzKG1haWx3cykpO1xuXHRcdFx0XHRcdHdvcmRzLnNwbGljZShhZGRyX3N0YXJ0LCBtYWlsd3MubGVuZ3RoLCB7XG5cdFx0XHRcdFx0XHR3OiB0aGlzLnRvRW1haWxBZGRyZXNzKG1haWx3cyksXG5cdFx0XHRcdFx0XHRwOiBQT1NUQUcuVVJMXG5cdFx0XHRcdFx0fSk7XG5cdFx0XHRcdFx0aSA9IDxudW1iZXI+YWRkcl9zdGFydCArIDE7XG5cdFx0XHRcdFx0aWUgLT0gbWFpbHdzLmxlbmd0aCAtIDE7XG5cdFx0XHRcdFx0YWRkcl9zdGFydCA9IGZhbHNlO1xuXHRcdFx0XHRcdGhhc19hdCA9IGZhbHNlO1xuXHRcdFx0XHRcdGNvbnRpbnVlO1xuXHRcdFx0XHR9XG5cdFx0XHRcdC8vIOWmguaenOW3sue7j+W8gOWktFxuXHRcdFx0XHRpZiAoYWRkcl9zdGFydCAhPT0gZmFsc2UgJiYgKGlzX2FzY2lpIHx8IHdvcmQudyBpbiBFTUFJTENIQVIpKVxuXHRcdFx0XHR7XG5cdFx0XHRcdFx0aSsrO1xuXHRcdFx0XHRcdGNvbnRpbnVlO1xuXHRcdFx0XHR9XG5cdFx0XHR9XG5cblx0XHRcdC8vIOenu+WIsOS4i+S4gOS4quivjVxuXHRcdFx0YWRkcl9zdGFydCA9IGZhbHNlO1xuXHRcdFx0aGFzX2F0ID0gZmFsc2U7XG5cdFx0XHRpKys7XG5cdFx0fVxuXG5cdFx0Ly8g5qOA5p+l5Ymp5L2Z6YOo5YiGXG5cdFx0aWYgKGFkZHJfc3RhcnQgJiYgaGFzX2F0ICYmIHdvcmRzW2llXSlcblx0XHR7XG5cdFx0XHRsZXQgd29yZCA9IHdvcmRzW2llXTtcblx0XHRcdGxldCBpc19hc2NpaSA9ICgod29yZC5wID09IFBPU1RBRy5BX05YKSB8fFxuXHRcdFx0XHQod29yZC5wID09IFBPU1RBRy5BX00gJiYgd29yZC53IGluIEVNQUlMQ0hBUikpXG5cdFx0XHRcdD8gdHJ1ZSA6IGZhbHNlO1xuXHRcdFx0aWYgKGlzX2FzY2lpKVxuXHRcdFx0e1xuXHRcdFx0XHRsZXQgbWFpbHdzID0gd29yZHMuc2xpY2UoYWRkcl9zdGFydCwgd29yZHMubGVuZ3RoKTtcblx0XHRcdFx0Ly9kZWJ1Zyh0b0VtYWlsQWRkcmVzcyhtYWlsd3MpKTtcblx0XHRcdFx0d29yZHMuc3BsaWNlKGFkZHJfc3RhcnQsIG1haWx3cy5sZW5ndGgsIHtcblx0XHRcdFx0XHR3OiB0aGlzLnRvRW1haWxBZGRyZXNzKG1haWx3cyksXG5cdFx0XHRcdFx0cDogUE9TVEFHLlVSTFxuXHRcdFx0XHR9KTtcblx0XHRcdH1cblx0XHR9XG5cblx0XHRyZXR1cm4gd29yZHM7XG5cdH1cblxuXHQvKipcblx0ICog5qC55o2u5LiA57uE5Y2V6K+N55Sf5oiQ6YKu566x5Zyw5Z2AXG5cdCAqXG5cdCAqIEBwYXJhbSB7YXJyYXl9IHdvcmRzIOWNleivjeaVsOe7hFxuXHQgKiBAcmV0dXJuIHtzdHJpbmd9XG5cdCAqL1xuXHR0b0VtYWlsQWRkcmVzcyh3b3JkczogSVdvcmRbXSlcblx0e1xuXHRcdGxldCByZXQgPSB3b3Jkc1swXS53O1xuXHRcdGZvciAobGV0IGkgPSAxLCB3b3JkOyB3b3JkID0gd29yZHNbaV07IGkrKylcblx0XHR7XG5cdFx0XHRyZXQgKz0gd29yZC53O1xuXHRcdH1cblx0XHRyZXR1cm4gcmV0O1xuXHR9XG5cbn1cblxuZXhwb3J0IGNvbnN0IGluaXQgPSBFbWFpbE9wdGltaXplci5pbml0LmJpbmQoRW1haWxPcHRpbWl6ZXIpIGFzIElTdWJPcHRpbWl6ZXJDcmVhdGU8RW1haWxPcHRpbWl6ZXI+O1xuXG5leHBvcnQgZGVmYXVsdCBFbWFpbE9wdGltaXplcjtcbiJdfQ==
\No newline at end of file