1 | ;
|
2 | Object.defineProperty(exports, "__esModule", { value: true });
|
3 | exports.init = exports.DictOptimizer = void 0;
|
4 | const mod_1 = require("../mod");
|
5 | const DIRECTIONS_REGEXP = /^[東西南北东]+$/;
|
6 | /**
|
7 | * 词典优化模块
|
8 | *
|
9 | * @author 老雷<leizongmin@gmail.com>
|
10 | */
|
11 | class DictOptimizer extends mod_1.SubSModuleOptimizer {
|
12 | constructor() {
|
13 | super(...arguments);
|
14 | this.name = 'DictOptimizer';
|
15 | }
|
16 | _cache() {
|
17 | super._cache();
|
18 | this._TABLE = this.segment.getDict('TABLE');
|
19 | this._POSTAG = this.segment.POSTAG;
|
20 | }
|
21 | isMergeable(w1, w2, { POSTAG, TABLE, nw, i, nw_cache, nw_cache_exists, }) {
|
22 | let bool;
|
23 | let m;
|
24 | /**
|
25 | * 原始判斷模式
|
26 | */
|
27 | if (w1.p == w2.p) {
|
28 | bool = true;
|
29 | }
|
30 | /**
|
31 | * 不確定沒有BUG 但原始模式已經不合需求 因為單一項目多個詞性
|
32 | */
|
33 | else if (m = (w1.p & w2.p)) {
|
34 | if (1 || m & POSTAG.D_N) {
|
35 | bool = true;
|
36 | }
|
37 | }
|
38 | /**
|
39 | * 允許例如 幾 + %
|
40 | */
|
41 | else if (w1.p && typeof w2.p == 'undefined') {
|
42 | bool = true;
|
43 | }
|
44 | else if (w1.p & POSTAG.D_D && w2.p & POSTAG.D_V) {
|
45 | ({
|
46 | nw_cache,
|
47 | nw_cache_exists,
|
48 | } = this._getWordCache(nw, nw_cache, nw_cache_exists));
|
49 | let mw = nw_cache;
|
50 | if (mw && (mw.p & POSTAG.D_D || mw.p & POSTAG.D_V)) {
|
51 | bool = true;
|
52 | }
|
53 | }
|
54 | return bool
|
55 | && this._getWordCache(nw, nw_cache, nw_cache_exists).nw_cache_exists;
|
56 | }
|
57 | _getWordCache(nw, nw_cache, nw_cache_exists) {
|
58 | if (typeof nw_cache_exists === 'undefined') {
|
59 | const TABLE = this._TABLE;
|
60 | nw_cache = nw_cache || TABLE[nw];
|
61 | nw_cache_exists = !!nw_cache;
|
62 | }
|
63 | return {
|
64 | nw,
|
65 | nw_cache,
|
66 | nw_cache_exists,
|
67 | };
|
68 | }
|
69 | /**
|
70 | * 词典优化
|
71 | *
|
72 | * @param {array} words 单词数组
|
73 | * @param {bool} is_not_first 是否为管理器调用的
|
74 | * @return {array}
|
75 | */
|
76 | doOptimize(words, is_not_first) {
|
77 | //debug(words);
|
78 | if (typeof is_not_first == 'undefined') {
|
79 | is_not_first = false;
|
80 | }
|
81 | // 合并相邻的能组成一个单词的两个词
|
82 | const TABLE = this._TABLE;
|
83 | const POSTAG = this._POSTAG;
|
84 | const self = this;
|
85 | let i = 0;
|
86 | let ie = words.length - 1;
|
87 | while (i < ie) {
|
88 | let w1 = words[i];
|
89 | let w2 = words[i + 1];
|
90 | //debug(w1.w + ', ' + w2.w);
|
91 | // ==========================================
|
92 | let nw = w1.w + w2.w;
|
93 | let nw_cache;
|
94 | let nw_cache_exists;
|
95 | /**
|
96 | * 形容词 + 助词 = 形容词,如: 不同 + 的 = 不同的
|
97 | */
|
98 | if (w1.w != '了'
|
99 | && (w1.p & POSTAG.D_A)
|
100 | && (w2.p & POSTAG.D_U)) {
|
101 | let p = POSTAG.D_A;
|
102 | let f;
|
103 | ({
|
104 | nw_cache,
|
105 | nw_cache_exists,
|
106 | } = self._getWordCache(nw, nw_cache, nw_cache_exists));
|
107 | let mw = nw_cache;
|
108 | if (!mw || (mw.p & POSTAG.D_A)) {
|
109 | if (mw && (mw.p & POSTAG.D_A)) {
|
110 | p = mw.p;
|
111 | f = mw.f;
|
112 | }
|
113 | else if (w1.p & POSTAG.BAD) {
|
114 | p = POSTAG.D_A + POSTAG.BAD;
|
115 | }
|
116 | this.sliceToken(words, i, 2, {
|
117 | w: nw,
|
118 | //p: ((nw in TABLE && TABLE[nw].p & POSTAG.D_A) ? TABLE[nw].p : POSTAG.D_A),
|
119 | p,
|
120 | f,
|
121 | m: [w1, w2],
|
122 | }, undefined, {
|
123 | [this.name]: 1,
|
124 | });
|
125 | ie--;
|
126 | continue;
|
127 | }
|
128 | }
|
129 | /**
|
130 | * 形容詞 + 名詞 = 名詞
|
131 | */
|
132 | if ((w1.p & POSTAG.D_A)
|
133 | && (w2.p & POSTAG.D_N)) {
|
134 | ({
|
135 | nw_cache,
|
136 | nw_cache_exists,
|
137 | } = self._getWordCache(nw, nw_cache, nw_cache_exists));
|
138 | if (nw_cache_exists) {
|
139 | let mw = nw_cache;
|
140 | if (mw.p & POSTAG.D_N) {
|
141 | this.sliceToken(words, i, 2, {
|
142 | w: nw,
|
143 | p: mw.p,
|
144 | f: mw.f,
|
145 | m: [w1, w2],
|
146 | }, undefined, {
|
147 | [this.name]: 7,
|
148 | });
|
149 | ie--;
|
150 | continue;
|
151 | }
|
152 | }
|
153 | }
|
154 | // 能组成一个新词的(词性必须相同)
|
155 | if (this.isMergeable(w1, w2, {
|
156 | nw,
|
157 | POSTAG,
|
158 | TABLE,
|
159 | i,
|
160 | nw_cache,
|
161 | nw_cache_exists,
|
162 | }))
|
163 | //if (w1.p == w2.p && nw in TABLE)
|
164 | {
|
165 | ({
|
166 | nw_cache,
|
167 | nw_cache_exists,
|
168 | } = self._getWordCache(nw, nw_cache, nw_cache_exists));
|
169 | let mw = nw_cache;
|
170 | this.sliceToken(words, i, 2, {
|
171 | w: nw,
|
172 | p: mw.p,
|
173 | f: mw.f,
|
174 | m: [w1, w2],
|
175 | }, undefined, {
|
176 | [this.name]: 2,
|
177 | });
|
178 | ie--;
|
179 | continue;
|
180 | }
|
181 | // ============================================
|
182 | // 数词组合
|
183 | if ((w1.p & POSTAG.A_M)) {
|
184 | //debug(w2.w + ' ' + (w2.p & POSTAG.A_M));
|
185 | // 百分比数字 如 10%,或者下一个词也是数词,则合并
|
186 | if ((w2.p & POSTAG.A_M
|
187 | && !/^第/.test(w2.w)) || w2.w == '%' || w2.w == '%') {
|
188 | this.sliceToken(words, i, 2, {
|
189 | w: w1.w + w2.w,
|
190 | p: POSTAG.A_M,
|
191 | m: [w1, w2],
|
192 | }, undefined, {
|
193 | [this.name]: 3,
|
194 | });
|
195 | ie--;
|
196 | continue;
|
197 | }
|
198 | // 数词 + 量词,合并。如: 100个
|
199 | if ((w2.p & POSTAG.A_Q)) {
|
200 | // 数量词
|
201 | let p = POSTAG.D_MQ;
|
202 | let nw = w1.w + w2.w;
|
203 | ({
|
204 | nw_cache,
|
205 | nw_cache_exists,
|
206 | } = self._getWordCache(nw, nw_cache, nw_cache_exists));
|
207 | if (nw_cache) {
|
208 | p = nw_cache.p | POSTAG.D_MQ;
|
209 | }
|
210 | else {
|
211 | if (w2.p & POSTAG.D_T) {
|
212 | p = p | POSTAG.D_T;
|
213 | }
|
214 | if (w2.p & POSTAG.D_N) {
|
215 | p = p | POSTAG.D_N;
|
216 | }
|
217 | if (w2.p & POSTAG.D_V) {
|
218 | p = p | POSTAG.D_V;
|
219 | }
|
220 | }
|
221 | this.sliceToken(words, i, 2, {
|
222 | w: nw,
|
223 | p,
|
224 | m: [w1, w2],
|
225 | }, undefined, {
|
226 | [this.name]: 4,
|
227 | });
|
228 | ie--;
|
229 | continue;
|
230 | }
|
231 | // 带小数点的数字 ,如 “3 . 14”,或者 “十五点三”
|
232 | // 数词 + "分之" + 数词,如“五十分之一”
|
233 | let w3 = words[i + 2];
|
234 | if (w3 && (w3.p & POSTAG.A_M)) {
|
235 | if (w2.w == '.'
|
236 | || w2.w == '点'
|
237 | || w2.w == '點'
|
238 | || w2.w == '分之') {
|
239 | this.sliceToken(words, i, 3, {
|
240 | w: w1.w + w2.w + w3.w,
|
241 | p: POSTAG.A_M,
|
242 | m: [w1, w2, w3],
|
243 | }, undefined, {
|
244 | [this.name]: 5,
|
245 | });
|
246 | ie -= 2;
|
247 | continue;
|
248 | }
|
249 | /**
|
250 | * 支援 `最多容納59,000個人,或5.9萬人,再多就不行了.這是環評的結論.`
|
251 | */
|
252 | if (w2.w == ',') {
|
253 | let _r1 = /^[\d0-9]+$/;
|
254 | let _r2 = /^(?:(?:[\d0-9]+)?(?:\.[\d0-9]+)|(?:[\d0-9]+))$/;
|
255 | if (_r1.test(w1.w) && _r2.test(w3.w)) {
|
256 | this.sliceToken(words, i, 3, {
|
257 | w: w1.w + w2.w + w3.w,
|
258 | p: POSTAG.A_M,
|
259 | m: [w1, w2, w3],
|
260 | }, undefined, {
|
261 | [this.name]: 6,
|
262 | });
|
263 | ie -= 2;
|
264 | continue;
|
265 | }
|
266 | }
|
267 | }
|
268 | }
|
269 | // 修正 “十五点五八”问题
|
270 | if ((w1.p & POSTAG.D_MQ) && ['點', '点'].includes(w1.w.substr(-1)) && w2.p & POSTAG.A_M) {
|
271 | //debug(w1, w2);
|
272 | let i2 = 2;
|
273 | let w4w = '';
|
274 | for (let j = i + i2; j < ie; j++) {
|
275 | let w3 = words[j];
|
276 | if ((w3.p & POSTAG.A_M) > 0) {
|
277 | w4w += w3.w;
|
278 | i2++;
|
279 | }
|
280 | else {
|
281 | break;
|
282 | }
|
283 | }
|
284 | this.sliceToken(words, i, i2, {
|
285 | w: w1.w + w2.w + w4w,
|
286 | p: POSTAG.D_MQ,
|
287 | m: [w1, w2, w4w],
|
288 | }, undefined, {
|
289 | [this.name]: 6,
|
290 | });
|
291 | ie -= i2 - 1;
|
292 | continue;
|
293 | }
|
294 | /**
|
295 | * 合併 東南西北
|
296 | */
|
297 | if (DIRECTIONS_REGEXP.test(w1.w)) {
|
298 | if (DIRECTIONS_REGEXP.test(w2.w)) {
|
299 | ({
|
300 | nw_cache,
|
301 | nw_cache_exists,
|
302 | } = self._getWordCache(nw, nw_cache, nw_cache_exists));
|
303 | let mw = this.createToken({
|
304 | p: POSTAG.D_F,
|
305 | ...nw_cache,
|
306 | w: nw,
|
307 | m: [w1, w2],
|
308 | });
|
309 | mw.p = mw.p | POSTAG.D_F;
|
310 | this.sliceToken(words, i, 2, mw, true, {
|
311 | [this.name]: 8,
|
312 | });
|
313 | ie--;
|
314 | continue;
|
315 | }
|
316 | }
|
317 | // 移到下一个词
|
318 | i++;
|
319 | }
|
320 | // 针对组合数字后无法识别新组合的数字问题,需要重新扫描一次
|
321 | return is_not_first === true ? words : this.doOptimize(words, true);
|
322 | }
|
323 | }
|
324 | exports.DictOptimizer = DictOptimizer;
|
325 | exports.init = DictOptimizer.init.bind(DictOptimizer);
|
326 | exports.default = DictOptimizer;
|
327 | //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"DictOptimizer.js","sourceRoot":"","sources":["DictOptimizer.ts"],"names":[],"mappings":"AAAA,YAAY,CAAC;;;AAEb,gCAA8E;AAQ9E,MAAM,iBAAiB,GAAG,YAAY,CAAC;AAEvC;;;;GAIG;AACH,MAAa,aAAc,SAAQ,yBAAmB;IAAtD;;QAKC,SAAI,GAAG,eAAe,CAAC;IAqZxB,CAAC;IAnZA,MAAM;QAEL,KAAK,CAAC,MAAM,EAAE,CAAC;QACf,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC5C,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC;IACpC,CAAC;IAED,WAAW,CAAC,EAAS,EAAE,EAAS,EAAE,EACjC,MAAM,EACN,KAAK,EACL,EAAE,EACF,CAAC,EACD,QAAQ,EACR,eAAe,GAQf;QAEA,IAAI,IAAa,CAAC;QAClB,IAAI,CAAS,CAAC;QAEd;;WAEG;QACH,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,EAChB;YACC,IAAI,GAAG,IAAI,CAAC;SACZ;QACD;;WAEG;aACE,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,EAC1B;YACC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,GAAG,EACvB;gBACC,IAAI,GAAG,IAAI,CAAC;aACZ;SACD;QACD;;WAEG;aACE,IAAI,EAAE,CAAC,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC,IAAI,WAAW,EAC3C;YACC,IAAI,GAAG,IAAI,CAAC;SACZ;aACI,IAAI,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,EAC/C;YACC,CAAC;gBACA,QAAQ;gBACR,eAAe;aACf,GAAG,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,eAAe,CAAC,CAAC,CAAC;YAEvD,IAAI,EAAE,GAAG,QAAQ,CAAC;YAElB,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EAClD;gBACC,IAAI,GAAG,IAAI,CAAC;aACZ;SACD;QAED,OAAO,IAAI;eACP,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,eAAe,CAAC,CAAC,eAAe,CAAC;IACvE,CAAC;IAED,aAAa,CAAC,EAAU,EAAE,QAAe,EAAE,eAAwB;QAElE,IAAI,OAAO,eAAe,KAAK,WAAW,EAC1C;YACC,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC;YAE1B,QAAQ,GAAG,QAAQ,IAAI,KAAK,CAAC,EAAE,CAAC,CAAC;YACjC,eAAe,GAAG,CAAC,CAAC,QAAQ,CAAC;SAC7B;QAED,OAAO;YACN,EAAE;YACF,QAAQ;YACR,eAAe;SACf,CAAA;IACF,CAAC;IAED;;;;;;OAMG;IACH,UAAU,CAAC,KAAc,EAAE,YAAqB;QAE/C,eAAe;QACf,IAAI,OAAO,YAAY,IAAI,WAAW,EACtC;YACC,YAAY,GAAG,KAAK,CAAC;SACrB;QACD,mBAAmB;QACnB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC;QAC1B,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAI,CAAC;QAElB,IAAI,CAAC,GAAG,CAAC,CAAC;QACV,IAAI,EAAE,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;QAC1B,OAAO,CAAC,GAAG,EAAE,EACb;YACC,IAAI,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAClB,IAAI,EAAE,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACtB,4BAA4B;YAE5B,6CAA6C;YAC7C,IAAI,EAAE,GAAW,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YAE7B,IAAI,QAAe,CAAC;YACpB,IAAI,eAAwB,CAAC;YAE7B;;eAEG;YACH,IAAI,EAAE,CAAC,CAAC,IAAI,GAAG;mBACX,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;mBACnB,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EAEvB;gBACC,IAAI,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;gBACnB,IAAI,CAAS,CAAC;gBAEd,CAAC;oBACA,QAAQ;oBACR,eAAe;iBACf,GAAG,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,eAAe,CAAC,CAAC,CAAC;gBAEvD,IAAI,EAAE,GAAG,QAAQ,CAAC;gBAElB,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EAC9B;oBACC,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EAC7B;wBACC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;wBACT,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;qBACT;yBACI,IAAI,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,EAC1B;wBACC,CAAC,GAAG,MAAM,CAAC,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC;qBAC5B;oBAED,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;wBAC5B,CAAC,EAAE,EAAE;wBACL,4EAA4E;wBAC5E,CAAC;wBACD,CAAC;wBACD,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;qBACX,EAAE,SAAS,EAAE;wBACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;qBACd,CAAC,CAAC;oBACH,EAAE,EAAE,CAAC;oBACL,SAAS;iBACT;aACD;YAED;;eAEG;YACH,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;mBACnB,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EAEvB;gBACC,CAAC;oBACA,QAAQ;oBACR,eAAe;iBACf,GAAG,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,eAAe,CAAC,CAAC,CAAC;gBAEvD,IAAI,eAAe,EACnB;oBACC,IAAI,EAAE,GAAG,QAAQ,CAAC;oBAElB,IAAI,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,EACrB;wBACC,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;4BAC5B,CAAC,EAAE,EAAE;4BACL,CAAC,EAAE,EAAE,CAAC,CAAC;4BACP,CAAC,EAAE,EAAE,CAAC,CAAC;4BACP,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;yBACX,EAAE,SAAS,EAAE;4BACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;yBACd,CAAC,CAAC;wBACH,EAAE,EAAE,CAAC;wBACL,SAAS;qBACT;iBACD;aACD;YAED,mBAAmB;YAEnB,IAAI,IAAI,CAAC,WAAW,CAAC,EAAE,EAAE,EAAE,EAAE;gBAC5B,EAAE;gBACF,MAAM;gBACN,KAAK;gBACL,CAAC;gBACD,QAAQ;gBACR,eAAe;aACf,CAAC;YACF,kCAAkC;YAClC;gBACC,CAAC;oBACA,QAAQ;oBACR,eAAe;iBACf,GAAG,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,eAAe,CAAC,CAAC,CAAC;gBAEvD,IAAI,EAAE,GAAG,QAAQ,CAAC;gBAElB,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;oBAC5B,CAAC,EAAE,EAAE;oBACL,CAAC,EAAE,EAAE,CAAC,CAAC;oBACP,CAAC,EAAE,EAAE,CAAC,CAAC;oBACP,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;iBACX,EAAE,SAAS,EAAE;oBACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;iBACd,CAAC,CAAC;gBACH,EAAE,EAAE,CAAC;gBACL,SAAS;aACT;YAED,+CAA+C;YAC/C,OAAO;YACP,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EACvB;gBACC,0CAA0C;gBAC1C,6BAA6B;gBAC7B,IAAI,CACH,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG;uBACd,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CACnB,IAAI,EAAE,CAAC,CAAC,IAAI,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,GAAG,EAC/B;oBACC,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;wBAC5B,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;wBACd,CAAC,EAAE,MAAM,CAAC,GAAG;wBACb,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;qBACX,EAAE,SAAS,EAAE;wBACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;qBACd,CAAC,CAAC;oBACH,EAAE,EAAE,CAAC;oBACL,SAAS;iBACT;gBACD,qBAAqB;gBACrB,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EACvB;oBACC,MAAM;oBACN,IAAI,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;oBACpB,IAAI,EAAE,GAAW,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;oBAE7B,CAAC;wBACA,QAAQ;wBACR,eAAe;qBACf,GAAG,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,eAAe,CAAC,CAAC,CAAC;oBAEvD,IAAI,QAAQ,EACZ;wBACC,CAAC,GAAG,QAAQ,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;qBAC7B;yBAED;wBACC,IAAI,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,EACrB;4BACC,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;yBACnB;wBACD,IAAI,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,EACrB;4BACC,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;yBACnB;wBACD,IAAI,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,EACrB;4BACC,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;yBACnB;qBACD;oBAED,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;wBAC5B,CAAC,EAAE,EAAE;wBACL,CAAC;wBACD,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;qBACX,EAAE,SAAS,EAAE;wBACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;qBACd,CAAC,CAAC;oBACH,EAAE,EAAE,CAAC;oBACL,SAAS;iBACT;gBACD,gCAAgC;gBAChC,0BAA0B;gBAC1B,IAAI,EAAE,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBACtB,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EAC7B;oBACC,IAAI,EAAE,CAAC,CAAC,IAAI,GAAG;2BACX,EAAE,CAAC,CAAC,IAAI,GAAG;2BACX,EAAE,CAAC,CAAC,IAAI,GAAG;2BACX,EAAE,CAAC,CAAC,IAAI,IAAI,EAEhB;wBACC,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;4BAC5B,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;4BACrB,CAAC,EAAE,MAAM,CAAC,GAAG;4BACb,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC;yBACf,EAAE,SAAS,EAAE;4BACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;yBACd,CAAC,CAAC;wBACH,EAAE,IAAI,CAAC,CAAC;wBACR,SAAS;qBACT;oBAED;;uBAEG;oBACH,IAAI,EAAE,CAAC,CAAC,IAAI,GAAG,EACf;wBACC,IAAI,GAAG,GAAG,YAAY,CAAC;wBACvB,IAAI,GAAG,GAAG,gDAAgD,CAAC;wBAE3D,IAAI,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,EACpC;4BACC,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE;gCAC5B,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;gCACrB,CAAC,EAAE,MAAM,CAAC,GAAG;gCACb,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC;6BACf,EAAE,SAAS,EAAE;gCACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;6BACd,CAAC,CAAC;4BACH,EAAE,IAAI,CAAC,CAAC;4BACR,SAAS;yBACT;qBACD;iBACD;aACD;YAED,eAAe;YACf,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,EACrF;gBACC,gBAAgB;gBAChB,IAAI,EAAE,GAAG,CAAC,CAAC;gBACX,IAAI,GAAG,GAAG,EAAE,CAAC;gBACb,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAChC;oBACC,IAAI,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;oBAClB,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,EAC3B;wBACC,GAAG,IAAI,EAAE,CAAC,CAAC,CAAC;wBACZ,EAAE,EAAE,CAAC;qBACL;yBAED;wBACC,MAAM;qBACN;iBACD;gBACD,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,EAAE;oBAC7B,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG;oBACpB,CAAC,EAAE,MAAM,CAAC,IAAI;oBACd,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,GAAG,CAAC;iBAChB,EAAE,SAAS,EAAE;oBACb,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;iBACd,CAAC,CAAC;gBACH,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;gBACb,SAAS;aACT;YAED;;eAEG;YACH,IAAI,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,EAChC;gBACC,IAAI,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,EAChC;oBACC,CAAC;wBACA,QAAQ;wBACR,eAAe;qBACf,GAAG,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,QAAQ,EAAE,eAAe,CAAC,CAAC,CAAC;oBAEvD,IAAI,EAAE,GAAe,IAAI,CAAC,WAAW,CAAC;wBACrC,CAAC,EAAE,MAAM,CAAC,GAAG;wBACb,GAAG,QAAQ;wBACX,CAAC,EAAE,EAAE;wBACL,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;qBACX,CAAC,CAAC;oBAEH,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;oBAEzB,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE;wBACtC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;qBACd,CAAC,CAAC;oBAEH,EAAE,EAAE,CAAC;oBACL,SAAS;iBACT;aACD;YAED,SAAS;YACT,CAAC,EAAE,CAAC;SACJ;QAED,+BAA+B;QAC/B,OAAO,YAAY,KAAK,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACrE,CAAC;CAED;AA1ZD,sCA0ZC;AAEY,QAAA,IAAI,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAuC,CAAC;AAEjG,kBAAe,aAAa,CAAC","sourcesContent":["'use strict';\n\nimport { SubSModule, SubSModuleOptimizer, ISubOptimizerCreate } from '../mod';\nimport { Segment, IWord, IDICT } from '../Segment';\n// @ts-ignore\nimport { UString } from 'uni-string';\nimport IPOSTAG from '../POSTAG';\nimport { debug, IWordDebug } from '../util';\nimport { zhRegExp } from 'regexp-cjk';\n\nconst DIRECTIONS_REGEXP = /^[東西南北东]+$/;\n\n/**\n * 词典优化模块\n *\n * @author 老雷<leizongmin@gmail.com>\n */\nexport class DictOptimizer extends SubSModuleOptimizer\n{\n\n\tprotected _TABLE: IDICT<IWord>;\n\n\tname = 'DictOptimizer';\n\n\t_cache()\n\t{\n\t\tsuper._cache();\n\t\tthis._TABLE = this.segment.getDict('TABLE');\n\t\tthis._POSTAG = this.segment.POSTAG;\n\t}\n\n\tisMergeable(w1: IWord, w2: IWord, {\n\t\tPOSTAG,\n\t\tTABLE,\n\t\tnw,\n\t\ti,\n\t\tnw_cache,\n\t\tnw_cache_exists,\n\t}: {\n\t\tPOSTAG: typeof IPOSTAG,\n\t\tTABLE: IDICT,\n\t\tnw: string,\n\t\ti: number,\n\t\tnw_cache: IWord,\n\t\tnw_cache_exists: boolean,\n\t}): boolean\n\t{\n\t\tlet bool: boolean;\n\t\tlet m: number;\n\n\t\t/**\n\t\t * 原始判斷模式\n\t\t */\n\t\tif (w1.p == w2.p)\n\t\t{\n\t\t\tbool = true;\n\t\t}\n\t\t/**\n\t\t * 不確定沒有BUG 但原始模式已經不合需求 因為單一項目多個詞性\n\t\t */\n\t\telse if (m = (w1.p & w2.p))\n\t\t{\n\t\t\tif (1 || m & POSTAG.D_N)\n\t\t\t{\n\t\t\t\tbool = true;\n\t\t\t}\n\t\t}\n\t\t/**\n\t\t * 允許例如 幾 + ％\n\t\t */\n\t\telse if (w1.p && typeof w2.p == 'undefined')\n\t\t{\n\t\t\tbool = true;\n\t\t}\n\t\telse if (w1.p & POSTAG.D_D && w2.p & POSTAG.D_V)\n\t\t{\n\t\t\t({\n\t\t\t\tnw_cache,\n\t\t\t\tnw_cache_exists,\n\t\t\t} = this._getWordCache(nw, nw_cache, nw_cache_exists));\n\n\t\t\tlet mw = nw_cache;\n\n\t\t\tif (mw && (mw.p & POSTAG.D_D || mw.p & POSTAG.D_V))\n\t\t\t{\n\t\t\t\tbool = true;\n\t\t\t}\n\t\t}\n\n\t\treturn bool\n\t\t\t&& this._getWordCache(nw, nw_cache, nw_cache_exists).nw_cache_exists;\n\t}\n\n\t_getWordCache(nw: string, nw_cache: IWord, nw_cache_exists: boolean)\n\t{\n\t\tif (typeof nw_cache_exists === 'undefined')\n\t\t{\n\t\t\tconst TABLE = this._TABLE;\n\n\t\t\tnw_cache = nw_cache || TABLE[nw];\n\t\t\tnw_cache_exists = !!nw_cache;\n\t\t}\n\n\t\treturn {\n\t\t\tnw,\n\t\t\tnw_cache,\n\t\t\tnw_cache_exists,\n\t\t}\n\t}\n\n\t/**\n\t * 词典优化\n\t *\n\t * @param {array} words 单词数组\n\t * @param {bool} is_not_first 是否为管理器调用的\n\t * @return {array}\n\t */\n\tdoOptimize(words: IWord[], is_not_first: boolean): IWord[]\n\t{\n\t\t//debug(words);\n\t\tif (typeof is_not_first == 'undefined')\n\t\t{\n\t\t\tis_not_first = false;\n\t\t}\n\t\t// 合并相邻的能组成一个单词的两个词\n\t\tconst TABLE = this._TABLE;\n\t\tconst POSTAG = this._POSTAG;\n\t\tconst self = this;\n\n\t\tlet i = 0;\n\t\tlet ie = words.length - 1;\n\t\twhile (i < ie)\n\t\t{\n\t\t\tlet w1 = words[i];\n\t\t\tlet w2 = words[i + 1];\n\t\t\t//debug(w1.w + ', ' + w2.w);\n\n\t\t\t// ==========================================\n\t\t\tlet nw: string = w1.w + w2.w;\n\n\t\t\tlet nw_cache: IWord;\n\t\t\tlet nw_cache_exists: boolean;\n\n\t\t\t/**\n\t\t\t * 形容词 + 助词 = 形容词，如： 不同 + 的 = 不同的\n\t\t\t */\n\t\t\tif (w1.w != '了'\n\t\t\t\t&& (w1.p & POSTAG.D_A)\n\t\t\t\t&& (w2.p & POSTAG.D_U)\n\t\t\t)\n\t\t\t{\n\t\t\t\tlet p = POSTAG.D_A;\n\t\t\t\tlet f: number;\n\n\t\t\t\t({\n\t\t\t\t\tnw_cache,\n\t\t\t\t\tnw_cache_exists,\n\t\t\t\t} = self._getWordCache(nw, nw_cache, nw_cache_exists));\n\n\t\t\t\tlet mw = nw_cache;\n\n\t\t\t\tif (!mw || (mw.p & POSTAG.D_A))\n\t\t\t\t{\n\t\t\t\t\tif (mw && (mw.p & POSTAG.D_A))\n\t\t\t\t\t{\n\t\t\t\t\t\tp = mw.p;\n\t\t\t\t\t\tf = mw.f;\n\t\t\t\t\t}\n\t\t\t\t\telse if (w1.p & POSTAG.BAD)\n\t\t\t\t\t{\n\t\t\t\t\t\tp = POSTAG.D_A + POSTAG.BAD;\n\t\t\t\t\t}\n\n\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\tw: nw,\n\t\t\t\t\t\t//p: ((nw in TABLE && TABLE[nw].p & POSTAG.D_A) ? TABLE[nw].p : POSTAG.D_A),\n\t\t\t\t\t\tp,\n\t\t\t\t\t\tf,\n\t\t\t\t\t\tm: [w1, w2],\n\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t[this.name]: 1,\n\t\t\t\t\t});\n\t\t\t\t\tie--;\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t/**\n\t\t\t * 形容詞 + 名詞 = 名詞\n\t\t\t */\n\t\t\tif ((w1.p & POSTAG.D_A)\n\t\t\t\t&& (w2.p & POSTAG.D_N)\n\t\t\t)\n\t\t\t{\n\t\t\t\t({\n\t\t\t\t\tnw_cache,\n\t\t\t\t\tnw_cache_exists,\n\t\t\t\t} = self._getWordCache(nw, nw_cache, nw_cache_exists));\n\n\t\t\t\tif (nw_cache_exists)\n\t\t\t\t{\n\t\t\t\t\tlet mw = nw_cache;\n\n\t\t\t\t\tif (mw.p & POSTAG.D_N)\n\t\t\t\t\t{\n\t\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\t\tw: nw,\n\t\t\t\t\t\t\tp: mw.p,\n\t\t\t\t\t\t\tf: mw.f,\n\t\t\t\t\t\t\tm: [w1, w2],\n\t\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t\t[this.name]: 7,\n\t\t\t\t\t\t});\n\t\t\t\t\t\tie--;\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// 能组成一个新词的(词性必须相同)\n\n\t\t\tif (this.isMergeable(w1, w2, {\n\t\t\t\tnw,\n\t\t\t\tPOSTAG,\n\t\t\t\tTABLE,\n\t\t\t\ti,\n\t\t\t\tnw_cache,\n\t\t\t\tnw_cache_exists,\n\t\t\t}))\n\t\t\t//if (w1.p == w2.p && nw in TABLE)\n\t\t\t{\n\t\t\t\t({\n\t\t\t\t\tnw_cache,\n\t\t\t\t\tnw_cache_exists,\n\t\t\t\t} = self._getWordCache(nw, nw_cache, nw_cache_exists));\n\n\t\t\t\tlet mw = nw_cache;\n\n\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\tw: nw,\n\t\t\t\t\tp: mw.p,\n\t\t\t\t\tf: mw.f,\n\t\t\t\t\tm: [w1, w2],\n\t\t\t\t}, undefined, {\n\t\t\t\t\t[this.name]: 2,\n\t\t\t\t});\n\t\t\t\tie--;\n\t\t\t\tcontinue;\n\t\t\t}\n\n\t\t\t// ============================================\n\t\t\t// 数词组合\n\t\t\tif ((w1.p & POSTAG.A_M))\n\t\t\t{\n\t\t\t\t//debug(w2.w + ' ' + (w2.p & POSTAG.A_M));\n\t\t\t\t// 百分比数字 如 10%，或者下一个词也是数词，则合并\n\t\t\t\tif ((\n\t\t\t\t\tw2.p & POSTAG.A_M\n\t\t\t\t\t&& !/^第/.test(w2.w)\n\t\t\t\t) || w2.w == '%' || w2.w == '％')\n\t\t\t\t{\n\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\tw: w1.w + w2.w,\n\t\t\t\t\t\tp: POSTAG.A_M,\n\t\t\t\t\t\tm: [w1, w2],\n\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t[this.name]: 3,\n\t\t\t\t\t});\n\t\t\t\t\tie--;\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\t\t// 数词 + 量词，合并。如： 100个\n\t\t\t\tif ((w2.p & POSTAG.A_Q))\n\t\t\t\t{\n\t\t\t\t\t// 数量词\n\t\t\t\t\tlet p = POSTAG.D_MQ;\n\t\t\t\t\tlet nw: string = w1.w + w2.w;\n\n\t\t\t\t\t({\n\t\t\t\t\t\tnw_cache,\n\t\t\t\t\t\tnw_cache_exists,\n\t\t\t\t\t} = self._getWordCache(nw, nw_cache, nw_cache_exists));\n\n\t\t\t\t\tif (nw_cache)\n\t\t\t\t\t{\n\t\t\t\t\t\tp = nw_cache.p | POSTAG.D_MQ;\n\t\t\t\t\t}\n\t\t\t\t\telse\n\t\t\t\t\t{\n\t\t\t\t\t\tif (w2.p & POSTAG.D_T)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tp = p | POSTAG.D_T;\n\t\t\t\t\t\t}\n\t\t\t\t\t\tif (w2.p & POSTAG.D_N)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tp = p | POSTAG.D_N;\n\t\t\t\t\t\t}\n\t\t\t\t\t\tif (w2.p & POSTAG.D_V)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tp = p | POSTAG.D_V;\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tthis.sliceToken(words, i, 2, {\n\t\t\t\t\t\tw: nw,\n\t\t\t\t\t\tp,\n\t\t\t\t\t\tm: [w1, w2],\n\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t[this.name]: 4,\n\t\t\t\t\t});\n\t\t\t\t\tie--;\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\t\t// 带小数点的数字 ，如 “3 . 14”，或者 “十五点三”\n\t\t\t\t// 数词 + \"分之\" + 数词，如“五十分之一”\n\t\t\t\tlet w3 = words[i + 2];\n\t\t\t\tif (w3 && (w3.p & POSTAG.A_M))\n\t\t\t\t{\n\t\t\t\t\tif (w2.w == '.'\n\t\t\t\t\t\t|| w2.w == '点'\n\t\t\t\t\t\t|| w2.w == '點'\n\t\t\t\t\t\t|| w2.w == '分之'\n\t\t\t\t\t)\n\t\t\t\t\t{\n\t\t\t\t\t\tthis.sliceToken(words, i, 3, {\n\t\t\t\t\t\t\tw: w1.w + w2.w + w3.w,\n\t\t\t\t\t\t\tp: POSTAG.A_M,\n\t\t\t\t\t\t\tm: [w1, w2, w3],\n\t\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t\t[this.name]: 5,\n\t\t\t\t\t\t});\n\t\t\t\t\t\tie -= 2;\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\n\t\t\t\t\t/**\n\t\t\t\t\t * 支援 `最多容納59,000個人,或5.9萬人,再多就不行了.這是環評的結論.`\n\t\t\t\t\t */\n\t\t\t\t\tif (w2.w == ',')\n\t\t\t\t\t{\n\t\t\t\t\t\tlet _r1 = /^[\\d０-９]+$/;\n\t\t\t\t\t\tlet _r2 = /^(?:(?:[\\d０-９]+)?(?:\\.[\\d０-９]+)|(?:[\\d０-９]+))$/;\n\n\t\t\t\t\t\tif (_r1.test(w1.w) && _r2.test(w3.w))\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tthis.sliceToken(words, i, 3, {\n\t\t\t\t\t\t\t\tw: w1.w + w2.w + w3.w,\n\t\t\t\t\t\t\t\tp: POSTAG.A_M,\n\t\t\t\t\t\t\t\tm: [w1, w2, w3],\n\t\t\t\t\t\t\t}, undefined, {\n\t\t\t\t\t\t\t\t[this.name]: 6,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\tie -= 2;\n\t\t\t\t\t\t\tcontinue;\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// 修正 “十五点五八”问题\n\t\t\tif ((w1.p & POSTAG.D_MQ) && ['點', '点'].includes(w1.w.substr(-1)) && w2.p & POSTAG.A_M)\n\t\t\t{\n\t\t\t\t//debug(w1, w2);\n\t\t\t\tlet i2 = 2;\n\t\t\t\tlet w4w = '';\n\t\t\t\tfor (let j = i + i2; j < ie; j++)\n\t\t\t\t{\n\t\t\t\t\tlet w3 = words[j];\n\t\t\t\t\tif ((w3.p & POSTAG.A_M) > 0)\n\t\t\t\t\t{\n\t\t\t\t\t\tw4w += w3.w;\n\t\t\t\t\t\ti2++;\n\t\t\t\t\t}\n\t\t\t\t\telse\n\t\t\t\t\t{\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tthis.sliceToken(words, i, i2, {\n\t\t\t\t\tw: w1.w + w2.w + w4w,\n\t\t\t\t\tp: POSTAG.D_MQ, // 数量词\n\t\t\t\t\tm: [w1, w2, w4w],\n\t\t\t\t}, undefined, {\n\t\t\t\t\t[this.name]: 6,\n\t\t\t\t});\n\t\t\t\tie -= i2 - 1;\n\t\t\t\tcontinue;\n\t\t\t}\n\n\t\t\t/**\n\t\t\t * 合併 東南西北\n\t\t\t */\n\t\t\tif (DIRECTIONS_REGEXP.test(w1.w))\n\t\t\t{\n\t\t\t\tif (DIRECTIONS_REGEXP.test(w2.w))\n\t\t\t\t{\n\t\t\t\t\t({\n\t\t\t\t\t\tnw_cache,\n\t\t\t\t\t\tnw_cache_exists,\n\t\t\t\t\t} = self._getWordCache(nw, nw_cache, nw_cache_exists));\n\n\t\t\t\t\tlet mw: IWordDebug = this.createToken({\n\t\t\t\t\t\tp: POSTAG.D_F,\n\t\t\t\t\t\t...nw_cache,\n\t\t\t\t\t\tw: nw,\n\t\t\t\t\t\tm: [w1, w2],\n\t\t\t\t\t});\n\n\t\t\t\t\tmw.p = mw.p | POSTAG.D_F;\n\n\t\t\t\t\tthis.sliceToken(words, i, 2, mw, true, {\n\t\t\t\t\t\t[this.name]: 8,\n\t\t\t\t\t});\n\n\t\t\t\t\tie--;\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// 移到下一个词\n\t\t\ti++;\n\t\t}\n\n\t\t// 针对组合数字后无法识别新组合的数字问题，需要重新扫描一次\n\t\treturn is_not_first === true ? words : this.doOptimize(words, true);\n\t}\n\n}\n\nexport const init = DictOptimizer.init.bind(DictOptimizer) as ISubOptimizerCreate<DictOptimizer>;\n\nexport default DictOptimizer;\n"]} |
\ | No newline at end of file |