UNPKG

25.2 kBJavaScriptView Raw
1/**
2 * 人名优化模块
3 *
4 * @author 老雷<leizongmin@gmail.com>
5 * @version 0.1
6 */
7'use strict';
8Object.defineProperty(exports, "__esModule", { value: true });
9exports.init = exports.ChsNameOptimizer = void 0;
10const mod_1 = require("../mod");
11const CHS_NAMES_1 = require("../mod/CHS_NAMES");
12/**
13 * @todo 支援 XX氏
14 */
15class ChsNameOptimizer extends mod_1.SubSModuleOptimizer {
16 constructor() {
17 super(...arguments);
18 this.name = 'ChsNameOptimizer';
19 }
20 _cache() {
21 super._cache();
22 this._TABLE = this.segment.getDict('TABLE');
23 this._BLACKLIST = this.segment.getDict("BLACKLIST_FOR_OPTIMIZER" /* BLACKLIST_FOR_OPTIMIZER */) || {};
24 }
25 isMergeable2(...words) {
26 let nw = words.join('');
27 if (!this._BLACKLIST[nw]) {
28 return true;
29 }
30 return null;
31 }
32 isMergeable(word, nextword) {
33 if (word && nextword) {
34 let nw = word.w + nextword.w;
35 /**
36 * 不合併存在於 BLACKLIST 內的字詞
37 */
38 if (!this._BLACKLIST[nw]) {
39 return true;
40 /*
41 return {
42 word,
43 nextword,
44 nw,
45 bool: true,
46 }
47 */
48 }
49 }
50 return null;
51 }
52 /**
53 * 对可能是人名的单词进行优化
54 *
55 * @param {array} words 单词数组
56 * @return {array}
57 */
58 doOptimize(words) {
59 //debug(words);
60 const POSTAG = this._POSTAG;
61 let i = 0;
62 /* 第一遍扫描 */
63 while (i < words.length) {
64 let word = words[i];
65 let nextword = words[i + 1];
66 if (this.isMergeable(word, nextword)) {
67 //debug(nextword);
68 // 如果为 "小|老" + 姓
69 if (nextword && (word.w == '小' || word.w == '老') &&
70 (nextword.w in CHS_NAMES_1.default.FAMILY_NAME_1 || nextword.w in CHS_NAMES_1.default.FAMILY_NAME_2)) {
71 /*
72 words.splice(i, 2, {
73 w: word.w + nextword.w,
74 p: POSTAG.A_NR,
75 m: [word, nextword],
76 });
77 */
78 this.sliceToken(words, i, 2, {
79 w: word.w + nextword.w,
80 p: POSTAG.A_NR,
81 m: [word, nextword],
82 }, undefined, {
83 [this.name]: 1,
84 });
85 i++;
86 continue;
87 }
88 // 如果是 姓 + 名(2字以内)
89 if ((word.w in CHS_NAMES_1.default.FAMILY_NAME_1 || word.w in CHS_NAMES_1.default.FAMILY_NAME_2) &&
90 ((nextword.p & POSTAG.A_NR) > 0 && nextword.w.length <= 2)) {
91 /*
92 words.splice(i, 2, {
93 w: word.w + nextword.w,
94 p: POSTAG.A_NR,
95 m: [word, nextword],
96 });
97 */
98 this.sliceToken(words, i, 2, {
99 w: word.w + nextword.w,
100 p: POSTAG.A_NR,
101 m: [word, nextword],
102 }, undefined, {
103 [this.name]: 2,
104 });
105 i++;
106 continue;
107 }
108 // 如果相邻两个均为单字且至少有一个字是未识别的,则尝试判断其是否为人名
109 if (!word.p || !nextword.p) {
110 if ((word.w in CHS_NAMES_1.default.SINGLE_NAME && word.w == nextword.w) ||
111 (word.w in CHS_NAMES_1.default.DOUBLE_NAME_1 && nextword.w in CHS_NAMES_1.default.DOUBLE_NAME_2)) {
112 /*
113 words.splice(i, 2, {
114 w: word.w + nextword.w,
115 p: POSTAG.A_NR,
116 m: [word, nextword],
117 });
118 */
119 this.sliceToken(words, i, 2, {
120 w: word.w + nextword.w,
121 p: POSTAG.A_NR,
122 m: [word, nextword],
123 }, undefined, {
124 [this.name]: 3,
125 });
126 // 如果上一个单词可能是一个姓,则合并
127 let preword = words[i - 1];
128 if (preword
129 && (preword.w in CHS_NAMES_1.default.FAMILY_NAME_1 || preword.w in CHS_NAMES_1.default.FAMILY_NAME_2)
130 && this.isMergeable2(preword.w, word.w, nextword.w)) {
131 /*
132 words.splice(i - 1, 2, {
133 w: preword.w + word.w + nextword.w,
134 p: POSTAG.A_NR,
135 m: [preword, word, nextword],
136 });
137 */
138 this.sliceToken(words, i - 1, 2, {
139 w: preword.w + word.w + nextword.w,
140 p: POSTAG.A_NR,
141 m: [preword, word, nextword],
142 }, undefined, {
143 [this.name]: 4,
144 });
145 }
146 else {
147 i++;
148 }
149 continue;
150 }
151 }
152 // 如果为 无歧义的姓 + 名(2字以内) 且其中一个未未识别词
153 if ((word.w in CHS_NAMES_1.default.FAMILY_NAME_1 || word.w in CHS_NAMES_1.default.FAMILY_NAME_2)
154 && (!word.p || !nextword.p)
155 /**
156 * 防止將標點符號當作名字的BUG
157 */
158 && !(word.p & POSTAG.D_W || nextword.p & POSTAG.D_W)) {
159 //debug(word, nextword);
160 /*
161 words.splice(i, 2, {
162 w: word.w + nextword.w,
163 p: POSTAG.A_NR,
164 m: [word, nextword],
165 });
166 */
167 this.sliceToken(words, i, 2, {
168 w: word.w + nextword.w,
169 p: POSTAG.A_NR,
170 m: [word, nextword],
171 }, undefined, {
172 [this.name]: 5,
173 });
174 }
175 }
176 // 移到下一个单词
177 i++;
178 }
179 /* 第二遍扫描 */
180 i = 0;
181 while (i < words.length) {
182 let word = words[i];
183 let nextword = words[i + 1];
184 if (this.isMergeable(word, nextword)) {
185 // 如果为 姓 + 单字名
186 if ((word.w in CHS_NAMES_1.default.FAMILY_NAME_1 || word.w in CHS_NAMES_1.default.FAMILY_NAME_2)
187 &&
188 nextword.w in CHS_NAMES_1.default.SINGLE_NAME) {
189 /*
190 words.splice(i, 2, {
191 w: word.w + nextword.w,
192 p: POSTAG.A_NR,
193 m: [word, nextword],
194 });
195 */
196 let nw = word.w + nextword.w;
197 let ew = this._TABLE[nw];
198 /**
199 * 更改為只有新詞屬於人名或未知詞時才會合併
200 */
201 if (!ew || !ew.p || ew.p & POSTAG.A_NR) {
202 this.sliceToken(words, i, 2, {
203 w: nw,
204 p: POSTAG.A_NR,
205 m: [word, nextword],
206 }, undefined, {
207 [this.name]: 6,
208 exists_word: ew,
209 });
210 i++;
211 continue;
212 }
213 }
214 }
215 // 移到下一个单词
216 i++;
217 }
218 return words;
219 }
220}
221exports.ChsNameOptimizer = ChsNameOptimizer;
222exports.init = ChsNameOptimizer.init.bind(ChsNameOptimizer);
223exports.default = ChsNameOptimizer;
224//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"ChsNameOptimizer.js","sourceRoot":"","sources":["ChsNameOptimizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,YAAY,CAAC;;;AAEb,gCAA6F;AAC7F,gDAAsH;AAKtH;;GAEG;AACH,MAAa,gBAAiB,SAAQ,yBAAmB;IAAzD;;QAIC,SAAI,GAAG,kBAAkB,CAAC;IAoQ3B,CAAC;IAlQA,MAAM;QAEL,KAAK,CAAC,MAAM,EAAE,CAAC;QAEf,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAE5C,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,yDAA0C,IAAI,EAAE,CAAC;IACxF,CAAC;IAED,YAAY,CAAC,GAAG,KAAe;QAE9B,IAAI,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAExB,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC,EACxB;YACC,OAAO,IAAI,CAAC;SACZ;QAED,OAAO,IAAI,CAAC;IACb,CAAC;IAED,WAAW,CAAC,IAAW,EAAE,QAAe;QAEvC,IAAI,IAAI,IAAI,QAAQ,EACpB;YACC,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC;YAE7B;;eAEG;YACH,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC,EACxB;gBACC,OAAO,IAAI,CAAC;gBAEZ;;;;;;;kBAOE;aACF;SACD;QAED,OAAO,IAAI,CAAC;IACb,CAAC;IAED;;;;;OAKG;IACH,UAAU,CAAC,KAAc;QAExB,eAAe;QACf,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC;QAC5B,IAAI,CAAC,GAAG,CAAC,CAAC;QAEV,WAAW;QACX,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EACvB;YACC,IAAI,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACpB,IAAI,QAAQ,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAE5B,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,QAAQ,CAAC,EACpC;gBACC,kBAAkB;gBAClB,iBAAiB;gBACjB,IAAI,QAAQ,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC;oBAC/C,CAAC,QAAQ,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,QAAQ,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC,EACjF;oBACC;;;;;;sBAME;oBAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;wBAC5B,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;wBACtB,CAAC,EAAE,MAAM,CAAC,IAAI;wBACd,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC;qBACnB,EAAE,SAAS,EAAE;wBACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;qBACd,CAAC,CAAC;oBAEH,CAAC,EAAE,CAAC;oBACJ,SAAS;iBACT;gBAED,kBAAkB;gBAClB,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC;oBAC3E,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,EAC3D;oBACC;;;;;;sBAME;oBAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;wBAC5B,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;wBACtB,CAAC,EAAE,MAAM,CAAC,IAAI;wBACd,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC;qBACnB,EAAE,SAAS,EAAE;wBACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;qBACd,CAAC,CAAC;oBAEH,CAAC,EAAE,CAAC;oBACJ,SAAS;iBACT;gBAED,qCAAqC;gBACrC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,EAC1B;oBACC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,WAAW,IAAI,IAAI,CAAC,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC;wBAC5D,CAAC,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,QAAQ,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC,EAC7E;wBACC;;;;;;0BAME;wBAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;4BAC5B,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;4BACtB,CAAC,EAAE,MAAM,CAAC,IAAI;4BACd,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC;yBACnB,EAAE,SAAS,EAAE;4BACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;yBACd,CAAC,CAAC;wBAEH,oBAAoB;wBACpB,IAAI,OAAO,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;wBAC3B,IAAI,OAAO;+BACP,CAAC,OAAO,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,OAAO,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC;+BAC9E,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAG,QAAQ,CAAC,CAAC,CAAC,EAErD;4BAEC;;;;;;8BAME;4BAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;gCAChC,CAAC,EAAE,OAAO,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;gCAClC,CAAC,EAAE,MAAM,CAAC,IAAI;gCACd,CAAC,EAAE,CAAC,OAAO,EAAE,IAAI,EAAE,QAAQ,CAAC;6BAC5B,EAAE,SAAS,EAAE;gCACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;6BACd,CAAC,CAAC;yBAEH;6BAED;4BACC,CAAC,EAAE,CAAC;yBACJ;wBACD,SAAS;qBACT;iBACD;gBAED,iCAAiC;gBACjC,IACC,CAAC,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC;uBACrE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;oBAE3B;;uBAEG;uBACA,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,IAAI,QAAQ,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EAErD;oBACC,wBAAwB;oBACxB;;;;;;sBAME;oBAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;wBAC5B,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;wBACtB,CAAC,EAAE,MAAM,CAAC,IAAI;wBACd,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC;qBACnB,EAAE,SAAS,EAAE;wBACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;qBACd,CAAC,CAAC;iBACH;aACD;YAED,UAAU;YACV,CAAC,EAAE,CAAC;SACJ;QAED,WAAW;QACX,CAAC,GAAG,CAAC,CAAC;QACN,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EACvB;YACC,IAAI,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACpB,IAAI,QAAQ,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAC5B,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,QAAQ,CAAC,EACpC;gBACC,cAAc;gBACd,IACC,CAAC,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC;;wBAExE,QAAQ,CAAC,CAAC,IAAI,mBAAS,CAAC,WAAW,EAEpC;oBACC;;;;;;sBAME;oBAEF,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC;oBAC7B,IAAI,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;oBAEzB;;uBAEG;oBACH,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,EACtC;wBACC,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;4BAC5B,CAAC,EAAE,EAAE;4BACL,CAAC,EAAE,MAAM,CAAC,IAAI;4BACd,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC;yBACnB,EAAE,SAAS,EAAE;4BACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;4BACd,WAAW,EAAE,EAAE;yBACf,CAAC,CAAC;wBAEH,CAAC,EAAE,CAAC;wBACJ,SAAS;qBACT;iBACD;aACD;YAED,UAAU;YACV,CAAC,EAAE,CAAC;SACJ;QAED,OAAO,KAAK,CAAC;IACd,CAAC;CACD;AAxQD,4CAwQC;AAEY,QAAA,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAiC,CAAC;AAEjG,kBAAe,gBAAgB,CAAC","sourcesContent":["/**\n * 人名优化模块\n *\n * @author 老雷<leizongmin@gmail.com>\n * @version 0.1\n */\n\n'use strict';\n\nimport { SubSModule, SubSModuleOptimizer, ISubOptimizer, SubSModuleTokenizer } from '../mod';\nimport CHS_NAMES, { FAMILY_NAME_1, FAMILY_NAME_2, SINGLE_NAME, DOUBLE_NAME_1, DOUBLE_NAME_2 } from '../mod/CHS_NAMES';\nimport Segment, { IDICT, IWord } from '../Segment';\nimport { debug } from '../util';\nimport { EnumDictDatabase } from '../const';\n\n/**\n * @todo 支援 XX氏\n */\nexport class ChsNameOptimizer extends SubSModuleOptimizer\n{\n\tprotected _TABLE: IDICT<IWord>;\n\n\tname = 'ChsNameOptimizer';\n\n\t_cache()\n\t{\n\t\tsuper._cache();\n\n\t\tthis._TABLE = this.segment.getDict('TABLE');\n\n\t\tthis._BLACKLIST = this.segment.getDict(EnumDictDatabase.BLACKLIST_FOR_OPTIMIZER) || {};\n\t}\n\n\tisMergeable2(...words: string[])\n\t{\n\t\tlet nw = words.join('');\n\n\t\tif (!this._BLACKLIST[nw])\n\t\t{\n\t\t\treturn true;\n\t\t}\n\n\t\treturn null;\n\t}\n\n\tisMergeable(word: IWord, nextword: IWord)\n\t{\n\t\tif (word && nextword)\n\t\t{\n\t\t\tlet nw = word.w + nextword.w;\n\n\t\t\t/**\n\t\t\t * 不合併存在於 BLACKLIST 內的字詞\n\t\t\t */\n\t\t\tif (!this._BLACKLIST[nw])\n\t\t\t{\n\t\t\t\treturn true;\n\n\t\t\t\t/*\n\t\t\t\treturn {\n\t\t\t\t\tword,\n\t\t\t\t\tnextword,\n\t\t\t\t\tnw,\n\t\t\t\t\tbool: true,\n\t\t\t\t}\n\t\t\t\t*/\n\t\t\t}\n\t\t}\n\n\t\treturn null;\n\t}\n\n\t/**\n\t * 对可能是人名的单词进行优化\n\t *\n\t * @param {array} words 单词数组\n\t * @return {array}\n\t */\n\tdoOptimize(words: IWord[]): IWord[]\n\t{\n\t\t//debug(words);\n\t\tconst POSTAG = this._POSTAG;\n\t\tlet i = 0;\n\n\t\t/* 第一遍扫描 */\n\t\twhile (i < words.length)\n\t\t{\n\t\t\tlet word = words[i];\n\t\t\tlet nextword = words[i + 1];\n\n\t\t\tif (this.isMergeable(word, nextword))\n\t\t\t{\n\t\t\t\t//debug(nextword);\n\t\t\t\t// 如果为  \"小|老\" + 姓\n\t\t\t\tif (nextword && (word.w == '小' || word.w == '老') &&\n\t\t\t\t\t(nextword.w in CHS_NAMES.FAMILY_NAME_1 || nextword.w in CHS_NAMES.FAMILY_NAME_2))\n\t\t\t\t{\n\t\t\t\t\t/*\n\t\t\t\t\twords.splice(i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t});\n\t\t\t\t\t*/\n\n\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t[this.name]: 1,\n\t\t\t\t\t});\n\n\t\t\t\t\ti++;\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\n\t\t\t\t// 如果是 姓 + 名（2字以内）\n\t\t\t\tif ((word.w in CHS_NAMES.FAMILY_NAME_1 || word.w in CHS_NAMES.FAMILY_NAME_2) &&\n\t\t\t\t\t((nextword.p & POSTAG.A_NR) > 0 && nextword.w.length <= 2))\n\t\t\t\t{\n\t\t\t\t\t/*\n\t\t\t\t\twords.splice(i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t});\n\t\t\t\t\t*/\n\n\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t[this.name]: 2,\n\t\t\t\t\t});\n\n\t\t\t\t\ti++;\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\n\t\t\t\t// 如果相邻两个均为单字且至少有一个字是未识别的，则尝试判断其是否为人名\n\t\t\t\tif (!word.p || !nextword.p)\n\t\t\t\t{\n\t\t\t\t\tif ((word.w in CHS_NAMES.SINGLE_NAME && word.w == nextword.w) ||\n\t\t\t\t\t\t(word.w in CHS_NAMES.DOUBLE_NAME_1 && nextword.w in CHS_NAMES.DOUBLE_NAME_2))\n\t\t\t\t\t{\n\t\t\t\t\t\t/*\n\t\t\t\t\t\twords.splice(i, 2, {\n\t\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t\t});\n\t\t\t\t\t\t*/\n\n\t\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t\t[this.name]: 3,\n\t\t\t\t\t\t});\n\n\t\t\t\t\t\t// 如果上一个单词可能是一个姓，则合并\n\t\t\t\t\t\tlet preword = words[i - 1];\n\t\t\t\t\t\tif (preword\n\t\t\t\t\t\t\t&& (preword.w in CHS_NAMES.FAMILY_NAME_1 || preword.w in CHS_NAMES.FAMILY_NAME_2)\n\t\t\t\t\t\t\t&& this.isMergeable2(preword.w, word.w,  nextword.w)\n\t\t\t\t\t\t)\n\t\t\t\t\t\t{\n\n\t\t\t\t\t\t\t/*\n\t\t\t\t\t\t\twords.splice(i - 1, 2, {\n\t\t\t\t\t\t\t\tw: preword.w + word.w + nextword.w,\n\t\t\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\t\t\tm: [preword, word, nextword],\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t*/\n\n\t\t\t\t\t\t\tthis.sliceToken(words, i - 1, 2, {\n\t\t\t\t\t\t\t\tw: preword.w + word.w + nextword.w,\n\t\t\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\t\t\tm: [preword, word, nextword],\n\t\t\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t\t\t[this.name]: 4,\n\t\t\t\t\t\t\t});\n\n\t\t\t\t\t\t}\n\t\t\t\t\t\telse\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\ti++;\n\t\t\t\t\t\t}\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\t// 如果为 无歧义的姓 + 名（2字以内） 且其中一个未未识别词\n\t\t\t\tif (\n\t\t\t\t\t(word.w in CHS_NAMES.FAMILY_NAME_1 || word.w in CHS_NAMES.FAMILY_NAME_2)\n\t\t\t\t\t&& (!word.p || !nextword.p)\n\n\t\t\t\t\t/**\n\t\t\t\t\t * 防止將標點符號當作名字的BUG\n\t\t\t\t\t */\n\t\t\t\t\t&& !(word.p & POSTAG.D_W || nextword.p & POSTAG.D_W)\n\t\t\t\t)\n\t\t\t\t{\n\t\t\t\t\t//debug(word, nextword);\n\t\t\t\t\t/*\n\t\t\t\t\twords.splice(i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t});\n\t\t\t\t\t*/\n\n\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t[this.name]: 5,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// 移到下一个单词\n\t\t\ti++;\n\t\t}\n\n\t\t/* 第二遍扫描 */\n\t\ti = 0;\n\t\twhile (i < words.length)\n\t\t{\n\t\t\tlet word = words[i];\n\t\t\tlet nextword = words[i + 1];\n\t\t\tif (this.isMergeable(word, nextword))\n\t\t\t{\n\t\t\t\t// 如果为 姓 + 单字名\n\t\t\t\tif (\n\t\t\t\t\t(word.w in CHS_NAMES.FAMILY_NAME_1 || word.w in CHS_NAMES.FAMILY_NAME_2)\n\t\t\t\t\t&&\n\t\t\t\t\tnextword.w in CHS_NAMES.SINGLE_NAME\n\t\t\t\t)\n\t\t\t\t{\n\t\t\t\t\t/*\n\t\t\t\t\twords.splice(i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t});\n\t\t\t\t\t*/\n\n\t\t\t\t\tlet nw = word.w + nextword.w;\n\t\t\t\t\tlet ew = this._TABLE[nw];\n\n\t\t\t\t\t/**\n\t\t\t\t\t * 更改為只有新詞屬於人名或未知詞時才會合併\n\t\t\t\t\t */\n\t\t\t\t\tif (!ew || !ew.p || ew.p & POSTAG.A_NR)\n\t\t\t\t\t{\n\t\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\t\tw: nw,\n\t\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t\t[this.name]: 6,\n\t\t\t\t\t\t\texists_word: ew,\n\t\t\t\t\t\t});\n\n\t\t\t\t\t\ti++;\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// 移到下一个单词\n\t\t\ti++;\n\t\t}\n\n\t\treturn words;\n\t}\n}\n\nexport const init = ChsNameOptimizer.init.bind(ChsNameOptimizer) as typeof ChsNameOptimizer.init;\n\nexport default ChsNameOptimizer;\n\n"]}
\No newline at end of file