1 | /**
|
2 | * 人名优化模块
|
3 | *
|
4 | * @author 老雷<leizongmin@gmail.com>
|
5 | * @version 0.1
|
6 | */
|
7 | ;
|
8 | Object.defineProperty(exports, "__esModule", { value: true });
|
9 | exports.init = exports.ChsNameOptimizer = void 0;
|
10 | const mod_1 = require("../mod");
|
11 | const CHS_NAMES_1 = require("../mod/CHS_NAMES");
|
12 | /**
|
13 | * @todo 支援 XX氏
|
14 | */
|
15 | class ChsNameOptimizer extends mod_1.SubSModuleOptimizer {
|
16 | constructor() {
|
17 | super(...arguments);
|
18 | this.name = 'ChsNameOptimizer';
|
19 | }
|
20 | _cache() {
|
21 | super._cache();
|
22 | this._TABLE = this.segment.getDict('TABLE');
|
23 | this._BLACKLIST = this.segment.getDict("BLACKLIST_FOR_OPTIMIZER" /* BLACKLIST_FOR_OPTIMIZER */) || {};
|
24 | }
|
25 | isMergeable2(...words) {
|
26 | let nw = words.join('');
|
27 | if (!this._BLACKLIST[nw]) {
|
28 | return true;
|
29 | }
|
30 | return null;
|
31 | }
|
32 | isMergeable(word, nextword) {
|
33 | if (word && nextword) {
|
34 | let nw = word.w + nextword.w;
|
35 | /**
|
36 | * 不合併存在於 BLACKLIST 內的字詞
|
37 | */
|
38 | if (!this._BLACKLIST[nw]) {
|
39 | return true;
|
40 | /*
|
41 | return {
|
42 | word,
|
43 | nextword,
|
44 | nw,
|
45 | bool: true,
|
46 | }
|
47 | */
|
48 | }
|
49 | }
|
50 | return null;
|
51 | }
|
52 | /**
|
53 | * 对可能是人名的单词进行优化
|
54 | *
|
55 | * @param {array} words 单词数组
|
56 | * @return {array}
|
57 | */
|
58 | doOptimize(words) {
|
59 | //debug(words);
|
60 | const POSTAG = this._POSTAG;
|
61 | let i = 0;
|
62 | /* 第一遍扫描 */
|
63 | while (i < words.length) {
|
64 | let word = words[i];
|
65 | let nextword = words[i + 1];
|
66 | if (this.isMergeable(word, nextword)) {
|
67 | //debug(nextword);
|
68 | // 如果为 "小|老" + 姓
|
69 | if (nextword && (word.w == '小' || word.w == '老') &&
|
70 | (nextword.w in CHS_NAMES_1.default.FAMILY_NAME_1 || nextword.w in CHS_NAMES_1.default.FAMILY_NAME_2)) {
|
71 | /*
|
72 | words.splice(i, 2, {
|
73 | w: word.w + nextword.w,
|
74 | p: POSTAG.A_NR,
|
75 | m: [word, nextword],
|
76 | });
|
77 | */
|
78 | this.sliceToken(words, i, 2, {
|
79 | w: word.w + nextword.w,
|
80 | p: POSTAG.A_NR,
|
81 | m: [word, nextword],
|
82 | }, undefined, {
|
83 | [this.name]: 1,
|
84 | });
|
85 | i++;
|
86 | continue;
|
87 | }
|
88 | // 如果是 姓 + 名(2字以内)
|
89 | if ((word.w in CHS_NAMES_1.default.FAMILY_NAME_1 || word.w in CHS_NAMES_1.default.FAMILY_NAME_2) &&
|
90 | ((nextword.p & POSTAG.A_NR) > 0 && nextword.w.length <= 2)) {
|
91 | /*
|
92 | words.splice(i, 2, {
|
93 | w: word.w + nextword.w,
|
94 | p: POSTAG.A_NR,
|
95 | m: [word, nextword],
|
96 | });
|
97 | */
|
98 | this.sliceToken(words, i, 2, {
|
99 | w: word.w + nextword.w,
|
100 | p: POSTAG.A_NR,
|
101 | m: [word, nextword],
|
102 | }, undefined, {
|
103 | [this.name]: 2,
|
104 | });
|
105 | i++;
|
106 | continue;
|
107 | }
|
108 | // 如果相邻两个均为单字且至少有一个字是未识别的,则尝试判断其是否为人名
|
109 | if (!word.p || !nextword.p) {
|
110 | if ((word.w in CHS_NAMES_1.default.SINGLE_NAME && word.w == nextword.w) ||
|
111 | (word.w in CHS_NAMES_1.default.DOUBLE_NAME_1 && nextword.w in CHS_NAMES_1.default.DOUBLE_NAME_2)) {
|
112 | /*
|
113 | words.splice(i, 2, {
|
114 | w: word.w + nextword.w,
|
115 | p: POSTAG.A_NR,
|
116 | m: [word, nextword],
|
117 | });
|
118 | */
|
119 | this.sliceToken(words, i, 2, {
|
120 | w: word.w + nextword.w,
|
121 | p: POSTAG.A_NR,
|
122 | m: [word, nextword],
|
123 | }, undefined, {
|
124 | [this.name]: 3,
|
125 | });
|
126 | // 如果上一个单词可能是一个姓,则合并
|
127 | let preword = words[i - 1];
|
128 | if (preword
|
129 | && (preword.w in CHS_NAMES_1.default.FAMILY_NAME_1 || preword.w in CHS_NAMES_1.default.FAMILY_NAME_2)
|
130 | && this.isMergeable2(preword.w, word.w, nextword.w)) {
|
131 | /*
|
132 | words.splice(i - 1, 2, {
|
133 | w: preword.w + word.w + nextword.w,
|
134 | p: POSTAG.A_NR,
|
135 | m: [preword, word, nextword],
|
136 | });
|
137 | */
|
138 | this.sliceToken(words, i - 1, 2, {
|
139 | w: preword.w + word.w + nextword.w,
|
140 | p: POSTAG.A_NR,
|
141 | m: [preword, word, nextword],
|
142 | }, undefined, {
|
143 | [this.name]: 4,
|
144 | });
|
145 | }
|
146 | else {
|
147 | i++;
|
148 | }
|
149 | continue;
|
150 | }
|
151 | }
|
152 | // 如果为 无歧义的姓 + 名(2字以内) 且其中一个未未识别词
|
153 | if ((word.w in CHS_NAMES_1.default.FAMILY_NAME_1 || word.w in CHS_NAMES_1.default.FAMILY_NAME_2)
|
154 | && (!word.p || !nextword.p)
|
155 | /**
|
156 | * 防止將標點符號當作名字的BUG
|
157 | */
|
158 | && !(word.p & POSTAG.D_W || nextword.p & POSTAG.D_W)) {
|
159 | //debug(word, nextword);
|
160 | /*
|
161 | words.splice(i, 2, {
|
162 | w: word.w + nextword.w,
|
163 | p: POSTAG.A_NR,
|
164 | m: [word, nextword],
|
165 | });
|
166 | */
|
167 | this.sliceToken(words, i, 2, {
|
168 | w: word.w + nextword.w,
|
169 | p: POSTAG.A_NR,
|
170 | m: [word, nextword],
|
171 | }, undefined, {
|
172 | [this.name]: 5,
|
173 | });
|
174 | }
|
175 | }
|
176 | // 移到下一个单词
|
177 | i++;
|
178 | }
|
179 | /* 第二遍扫描 */
|
180 | i = 0;
|
181 | while (i < words.length) {
|
182 | let word = words[i];
|
183 | let nextword = words[i + 1];
|
184 | if (this.isMergeable(word, nextword)) {
|
185 | // 如果为 姓 + 单字名
|
186 | if ((word.w in CHS_NAMES_1.default.FAMILY_NAME_1 || word.w in CHS_NAMES_1.default.FAMILY_NAME_2)
|
187 | &&
|
188 | nextword.w in CHS_NAMES_1.default.SINGLE_NAME) {
|
189 | /*
|
190 | words.splice(i, 2, {
|
191 | w: word.w + nextword.w,
|
192 | p: POSTAG.A_NR,
|
193 | m: [word, nextword],
|
194 | });
|
195 | */
|
196 | let nw = word.w + nextword.w;
|
197 | let ew = this._TABLE[nw];
|
198 | /**
|
199 | * 更改為只有新詞屬於人名或未知詞時才會合併
|
200 | */
|
201 | if (!ew || !ew.p || ew.p & POSTAG.A_NR) {
|
202 | this.sliceToken(words, i, 2, {
|
203 | w: nw,
|
204 | p: POSTAG.A_NR,
|
205 | m: [word, nextword],
|
206 | }, undefined, {
|
207 | [this.name]: 6,
|
208 | exists_word: ew,
|
209 | });
|
210 | i++;
|
211 | continue;
|
212 | }
|
213 | }
|
214 | }
|
215 | // 移到下一个单词
|
216 | i++;
|
217 | }
|
218 | return words;
|
219 | }
|
220 | }
|
221 | exports.ChsNameOptimizer = ChsNameOptimizer;
|
222 | exports.init = ChsNameOptimizer.init.bind(ChsNameOptimizer);
|
223 | exports.default = ChsNameOptimizer;
|
224 | //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"ChsNameOptimizer.js","sourceRoot":"","sources":["ChsNameOptimizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,YAAY,CAAC;;;AAEb,gCAA6F;AAC7F,gDAAsH;AAKtH;;GAEG;AACH,MAAa,gBAAiB,SAAQ,yBAAmB;IAAzD;;QAIC,SAAI,GAAG,kBAAkB,CAAC;IAoQ3B,CAAC;IAlQA,MAAM;QAEL,KAAK,CAAC,MAAM,EAAE,CAAC;QAEf,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAE5C,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,yDAA0C,IAAI,EAAE,CAAC;IACxF,CAAC;IAED,YAAY,CAAC,GAAG,KAAe;QAE9B,IAAI,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAExB,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC,EACxB;YACC,OAAO,IAAI,CAAC;SACZ;QAED,OAAO,IAAI,CAAC;IACb,CAAC;IAED,WAAW,CAAC,IAAW,EAAE,QAAe;QAEvC,IAAI,IAAI,IAAI,QAAQ,EACpB;YACC,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC;YAE7B;;eAEG;YACH,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC,EACxB;gBACC,OAAO,IAAI,CAAC;gBAEZ;;;;;;;kBAOE;aACF;SACD;QAED,OAAO,IAAI,CAAC;IACb,CAAC;IAED;;;;;OAKG;IACH,UAAU,CAAC,KAAc;QAExB,eAAe;QACf,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC;QAC5B,IAAI,CAAC,GAAG,CAAC,CAAC;QAEV,WAAW;QACX,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EACvB;YACC,IAAI,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACpB,IAAI,QAAQ,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAE5B,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,QAAQ,CAAC,EACpC;gBACC,kBAAkB;gBAClB,iBAAiB;gBACjB,IAAI,QAAQ,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC;oBAC/C,CAAC,QAAQ,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,QAAQ,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC,EACjF;oBACC;;;;;;sBAME;oBAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;wBAC5B,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;wBACtB,CAAC,EAAE,MAAM,CAAC,IAAI;wBACd,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC;qBACnB,EAAE,SAAS,EAAE;wBACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;qBACd,CAAC,CAAC;oBAEH,CAAC,EAAE,CAAC;oBACJ,SAAS;iBACT;gBAED,kBAAkB;gBAClB,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC;oBAC3E,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,EAC3D;oBACC;;;;;;sBAME;oBAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;wBAC5B,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;wBACtB,CAAC,EAAE,MAAM,CAAC,IAAI;wBACd,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC;qBACnB,EAAE,SAAS,EAAE;wBACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;qBACd,CAAC,CAAC;oBAEH,CAAC,EAAE,CAAC;oBACJ,SAAS;iBACT;gBAED,qCAAqC;gBACrC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,EAC1B;oBACC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,WAAW,IAAI,IAAI,CAAC,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC;wBAC5D,CAAC,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,QAAQ,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC,EAC7E;wBACC;;;;;;0BAME;wBAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;4BAC5B,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;4BACtB,CAAC,EAAE,MAAM,CAAC,IAAI;4BACd,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC;yBACnB,EAAE,SAAS,EAAE;4BACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;yBACd,CAAC,CAAC;wBAEH,oBAAoB;wBACpB,IAAI,OAAO,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;wBAC3B,IAAI,OAAO;+BACP,CAAC,OAAO,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,OAAO,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC;+BAC9E,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAG,QAAQ,CAAC,CAAC,CAAC,EAErD;4BAEC;;;;;;8BAME;4BAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;gCAChC,CAAC,EAAE,OAAO,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;gCAClC,CAAC,EAAE,MAAM,CAAC,IAAI;gCACd,CAAC,EAAE,CAAC,OAAO,EAAE,IAAI,EAAE,QAAQ,CAAC;6BAC5B,EAAE,SAAS,EAAE;gCACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;6BACd,CAAC,CAAC;yBAEH;6BAED;4BACC,CAAC,EAAE,CAAC;yBACJ;wBACD,SAAS;qBACT;iBACD;gBAED,iCAAiC;gBACjC,IACC,CAAC,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC;uBACrE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;oBAE3B;;uBAEG;uBACA,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,IAAI,QAAQ,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EAErD;oBACC,wBAAwB;oBACxB;;;;;;sBAME;oBAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;wBAC5B,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;wBACtB,CAAC,EAAE,MAAM,CAAC,IAAI;wBACd,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC;qBACnB,EAAE,SAAS,EAAE;wBACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;qBACd,CAAC,CAAC;iBACH;aACD;YAED,UAAU;YACV,CAAC,EAAE,CAAC;SACJ;QAED,WAAW;QACX,CAAC,GAAG,CAAC,CAAC;QACN,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EACvB;YACC,IAAI,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACpB,IAAI,QAAQ,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAC5B,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,QAAQ,CAAC,EACpC;gBACC,cAAc;gBACd,IACC,CAAC,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC,IAAI,mBAAS,CAAC,aAAa,CAAC;;wBAExE,QAAQ,CAAC,CAAC,IAAI,mBAAS,CAAC,WAAW,EAEpC;oBACC;;;;;;sBAME;oBAEF,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC;oBAC7B,IAAI,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;oBAEzB;;uBAEG;oBACH,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,EACtC;wBACC,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;4BAC5B,CAAC,EAAE,EAAE;4BACL,CAAC,EAAE,MAAM,CAAC,IAAI;4BACd,CAAC,EAAE,CAAC,IAAI,EAAE,QAAQ,CAAC;yBACnB,EAAE,SAAS,EAAE;4BACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;4BACd,WAAW,EAAE,EAAE;yBACf,CAAC,CAAC;wBAEH,CAAC,EAAE,CAAC;wBACJ,SAAS;qBACT;iBACD;aACD;YAED,UAAU;YACV,CAAC,EAAE,CAAC;SACJ;QAED,OAAO,KAAK,CAAC;IACd,CAAC;CACD;AAxQD,4CAwQC;AAEY,QAAA,IAAI,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAiC,CAAC;AAEjG,kBAAe,gBAAgB,CAAC","sourcesContent":["/**\n * 人名优化模块\n *\n * @author 老雷<leizongmin@gmail.com>\n * @version 0.1\n */\n\n'use strict';\n\nimport { SubSModule, SubSModuleOptimizer, ISubOptimizer, SubSModuleTokenizer } from '../mod';\nimport CHS_NAMES, { FAMILY_NAME_1, FAMILY_NAME_2, SINGLE_NAME, DOUBLE_NAME_1, DOUBLE_NAME_2 } from '../mod/CHS_NAMES';\nimport Segment, { IDICT, IWord } from '../Segment';\nimport { debug } from '../util';\nimport { EnumDictDatabase } from '../const';\n\n/**\n * @todo 支援 XX氏\n */\nexport class ChsNameOptimizer extends SubSModuleOptimizer\n{\n\tprotected _TABLE: IDICT<IWord>;\n\n\tname = 'ChsNameOptimizer';\n\n\t_cache()\n\t{\n\t\tsuper._cache();\n\n\t\tthis._TABLE = this.segment.getDict('TABLE');\n\n\t\tthis._BLACKLIST = this.segment.getDict(EnumDictDatabase.BLACKLIST_FOR_OPTIMIZER) || {};\n\t}\n\n\tisMergeable2(...words: string[])\n\t{\n\t\tlet nw = words.join('');\n\n\t\tif (!this._BLACKLIST[nw])\n\t\t{\n\t\t\treturn true;\n\t\t}\n\n\t\treturn null;\n\t}\n\n\tisMergeable(word: IWord, nextword: IWord)\n\t{\n\t\tif (word && nextword)\n\t\t{\n\t\t\tlet nw = word.w + nextword.w;\n\n\t\t\t/**\n\t\t\t * 不合併存在於 BLACKLIST 內的字詞\n\t\t\t */\n\t\t\tif (!this._BLACKLIST[nw])\n\t\t\t{\n\t\t\t\treturn true;\n\n\t\t\t\t/*\n\t\t\t\treturn {\n\t\t\t\t\tword,\n\t\t\t\t\tnextword,\n\t\t\t\t\tnw,\n\t\t\t\t\tbool: true,\n\t\t\t\t}\n\t\t\t\t*/\n\t\t\t}\n\t\t}\n\n\t\treturn null;\n\t}\n\n\t/**\n\t * 对可能是人名的单词进行优化\n\t *\n\t * @param {array} words 单词数组\n\t * @return {array}\n\t */\n\tdoOptimize(words: IWord[]): IWord[]\n\t{\n\t\t//debug(words);\n\t\tconst POSTAG = this._POSTAG;\n\t\tlet i = 0;\n\n\t\t/* 第一遍扫描 */\n\t\twhile (i < words.length)\n\t\t{\n\t\t\tlet word = words[i];\n\t\t\tlet nextword = words[i + 1];\n\n\t\t\tif (this.isMergeable(word, nextword))\n\t\t\t{\n\t\t\t\t//debug(nextword);\n\t\t\t\t// 如果为  \"小|老\" + 姓\n\t\t\t\tif (nextword && (word.w == '小' || word.w == '老') &&\n\t\t\t\t\t(nextword.w in CHS_NAMES.FAMILY_NAME_1 || nextword.w in CHS_NAMES.FAMILY_NAME_2))\n\t\t\t\t{\n\t\t\t\t\t/*\n\t\t\t\t\twords.splice(i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t});\n\t\t\t\t\t*/\n\n\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t[this.name]: 1,\n\t\t\t\t\t});\n\n\t\t\t\t\ti++;\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\n\t\t\t\t// 如果是 姓 + 名（2字以内）\n\t\t\t\tif ((word.w in CHS_NAMES.FAMILY_NAME_1 || word.w in CHS_NAMES.FAMILY_NAME_2) &&\n\t\t\t\t\t((nextword.p & POSTAG.A_NR) > 0 && nextword.w.length <= 2))\n\t\t\t\t{\n\t\t\t\t\t/*\n\t\t\t\t\twords.splice(i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t});\n\t\t\t\t\t*/\n\n\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t[this.name]: 2,\n\t\t\t\t\t});\n\n\t\t\t\t\ti++;\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\n\t\t\t\t// 如果相邻两个均为单字且至少有一个字是未识别的，则尝试判断其是否为人名\n\t\t\t\tif (!word.p || !nextword.p)\n\t\t\t\t{\n\t\t\t\t\tif ((word.w in CHS_NAMES.SINGLE_NAME && word.w == nextword.w) ||\n\t\t\t\t\t\t(word.w in CHS_NAMES.DOUBLE_NAME_1 && nextword.w in CHS_NAMES.DOUBLE_NAME_2))\n\t\t\t\t\t{\n\t\t\t\t\t\t/*\n\t\t\t\t\t\twords.splice(i, 2, {\n\t\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t\t});\n\t\t\t\t\t\t*/\n\n\t\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t\t[this.name]: 3,\n\t\t\t\t\t\t});\n\n\t\t\t\t\t\t// 如果上一个单词可能是一个姓，则合并\n\t\t\t\t\t\tlet preword = words[i - 1];\n\t\t\t\t\t\tif (preword\n\t\t\t\t\t\t\t&& (preword.w in CHS_NAMES.FAMILY_NAME_1 || preword.w in CHS_NAMES.FAMILY_NAME_2)\n\t\t\t\t\t\t\t&& this.isMergeable2(preword.w, word.w,  nextword.w)\n\t\t\t\t\t\t)\n\t\t\t\t\t\t{\n\n\t\t\t\t\t\t\t/*\n\t\t\t\t\t\t\twords.splice(i - 1, 2, {\n\t\t\t\t\t\t\t\tw: preword.w + word.w + nextword.w,\n\t\t\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\t\t\tm: [preword, word, nextword],\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t*/\n\n\t\t\t\t\t\t\tthis.sliceToken(words, i - 1, 2, {\n\t\t\t\t\t\t\t\tw: preword.w + word.w + nextword.w,\n\t\t\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\t\t\tm: [preword, word, nextword],\n\t\t\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t\t\t[this.name]: 4,\n\t\t\t\t\t\t\t});\n\n\t\t\t\t\t\t}\n\t\t\t\t\t\telse\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\ti++;\n\t\t\t\t\t\t}\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\t// 如果为 无歧义的姓 + 名（2字以内） 且其中一个未未识别词\n\t\t\t\tif (\n\t\t\t\t\t(word.w in CHS_NAMES.FAMILY_NAME_1 || word.w in CHS_NAMES.FAMILY_NAME_2)\n\t\t\t\t\t&& (!word.p || !nextword.p)\n\n\t\t\t\t\t/**\n\t\t\t\t\t * 防止將標點符號當作名字的BUG\n\t\t\t\t\t */\n\t\t\t\t\t&& !(word.p & POSTAG.D_W || nextword.p & POSTAG.D_W)\n\t\t\t\t)\n\t\t\t\t{\n\t\t\t\t\t//debug(word, nextword);\n\t\t\t\t\t/*\n\t\t\t\t\twords.splice(i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t});\n\t\t\t\t\t*/\n\n\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t[this.name]: 5,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// 移到下一个单词\n\t\t\ti++;\n\t\t}\n\n\t\t/* 第二遍扫描 */\n\t\ti = 0;\n\t\twhile (i < words.length)\n\t\t{\n\t\t\tlet word = words[i];\n\t\t\tlet nextword = words[i + 1];\n\t\t\tif (this.isMergeable(word, nextword))\n\t\t\t{\n\t\t\t\t// 如果为 姓 + 单字名\n\t\t\t\tif (\n\t\t\t\t\t(word.w in CHS_NAMES.FAMILY_NAME_1 || word.w in CHS_NAMES.FAMILY_NAME_2)\n\t\t\t\t\t&&\n\t\t\t\t\tnextword.w in CHS_NAMES.SINGLE_NAME\n\t\t\t\t)\n\t\t\t\t{\n\t\t\t\t\t/*\n\t\t\t\t\twords.splice(i, 2, {\n\t\t\t\t\t\tw: word.w + nextword.w,\n\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t});\n\t\t\t\t\t*/\n\n\t\t\t\t\tlet nw = word.w + nextword.w;\n\t\t\t\t\tlet ew = this._TABLE[nw];\n\n\t\t\t\t\t/**\n\t\t\t\t\t * 更改為只有新詞屬於人名或未知詞時才會合併\n\t\t\t\t\t */\n\t\t\t\t\tif (!ew || !ew.p || ew.p & POSTAG.A_NR)\n\t\t\t\t\t{\n\t\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\t\tw: nw,\n\t\t\t\t\t\t\tp: POSTAG.A_NR,\n\t\t\t\t\t\t\tm: [word, nextword],\n\t\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t\t[this.name]: 6,\n\t\t\t\t\t\t\texists_word: ew,\n\t\t\t\t\t\t});\n\n\t\t\t\t\t\ti++;\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// 移到下一个单词\n\t\t\ti++;\n\t\t}\n\n\t\treturn words;\n\t}\n}\n\nexport const init = ChsNameOptimizer.init.bind(ChsNameOptimizer) as typeof ChsNameOptimizer.init;\n\nexport default ChsNameOptimizer;\n\n"]} |
\ | No newline at end of file |