1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 | 'use strict';
|
7 | Object.defineProperty(exports, "__esModule", { value: true });
|
8 | exports.Segment = void 0;
|
9 | const path = require("path");
|
10 | const get_1 = require("./fs/get");
|
11 | const blacklist_1 = require("./table/blacklist");
|
12 | const dict_1 = require("./table/dict");
|
13 | const loader_1 = require("./loader");
|
14 | const stopword_1 = require("./table/stopword");
|
15 | const synonym_1 = require("./table/synonym");
|
16 | const segment_dict_1 = require("segment-dict");
|
17 | const project_config_1 = require("../project.config");
|
18 | const core_1 = require("./segment/core");
|
19 | const defaults_1 = require("./segment/defaults");
|
20 | const index_1 = require("./defaults/index");
|
21 | const useModules2_1 = require("./segment/methods/useModules2");
|
22 |
|
23 |
|
24 |
|
25 | class Segment extends core_1.default {
|
26 | getDictDatabase(type, autocreate, libTableDict) {
|
27 | if ((autocreate || this.inited) && !this.db[type]) {
|
28 | if (type == synonym_1.default.type) {
|
29 | libTableDict = libTableDict || synonym_1.default;
|
30 | }
|
31 | else if (type == stopword_1.TableDictStopword.type) {
|
32 | libTableDict = libTableDict || stopword_1.TableDictStopword;
|
33 | }
|
34 | else if (type == blacklist_1.default.type || type == "BLACKLIST_FOR_OPTIMIZER" || type == "BLACKLIST_FOR_SYNONYM" ) {
|
35 | libTableDict = libTableDict || blacklist_1.default;
|
36 | }
|
37 | else {
|
38 | libTableDict = libTableDict || dict_1.TableDict;
|
39 | }
|
40 | this.db[type] = new libTableDict(type, this.options, {
|
41 | TABLE: this.DICT[type],
|
42 | });
|
43 | }
|
44 | return this.db[type];
|
45 | }
|
46 | use(mod, ...argv) {
|
47 | useModules2_1.useModules(this, mod, ...argv);
|
48 | this.inited = true;
|
49 | return this;
|
50 | }
|
51 | _resolveDictFilename(name, pathPlus = [], extPlus = []) {
|
52 | let options = {
|
53 | paths: [
|
54 | '',
|
55 | project_config_1.default.dict_root,
|
56 | ...pathPlus,
|
57 | path.resolve(segment_dict_1.default.DICT_ROOT, 'segment'),
|
58 | ],
|
59 | extensions: [
|
60 | '',
|
61 | ...extPlus,
|
62 | '.utf8',
|
63 | '.txt',
|
64 | ],
|
65 | onlyFile: true,
|
66 | };
|
67 | if (name.indexOf('*') != -1) {
|
68 | let ls = get_1.searchGlobSync(name, options);
|
69 | if (!ls || !ls.length) {
|
70 | throw Error(`Cannot find dict glob file "${name}".`);
|
71 | }
|
72 | return ls;
|
73 | }
|
74 | let filename = get_1.searchFirstSync(name, options);
|
75 | if (!filename) {
|
76 |
|
77 | throw Error(`Cannot find dict file "${name}".`);
|
78 | }
|
79 | return filename;
|
80 | }
|
81 | |
82 |
|
83 |
|
84 |
|
85 |
|
86 |
|
87 |
|
88 |
|
89 | loadDict(name, type, convert_to_lower, skipExists) {
|
90 | let filename = this._resolveDictFilename(name);
|
91 | if (Array.isArray(filename)) {
|
92 | let self = this;
|
93 | filename.forEach(v => this.loadDict(v, type, convert_to_lower, skipExists));
|
94 |
|
95 | return this;
|
96 | }
|
97 | if (!type)
|
98 | type = 'TABLE';
|
99 | const db = this.getDictDatabase(type, true);
|
100 | const TABLE = this.DICT[type] = db.TABLE;
|
101 | const TABLE2 = this.DICT[type + '2'] = db.TABLE2;
|
102 | |
103 |
|
104 |
|
105 |
|
106 |
|
107 |
|
108 |
|
109 |
|
110 | const POSTAG = this.POSTAG;
|
111 | let data = loader_1.default.SegmentDictLoader.loadSync(filename);
|
112 | data.forEach(function (data) {
|
113 | if (convert_to_lower) {
|
114 | data[0] = data[0].toLowerCase();
|
115 | }
|
116 | db.add(data, skipExists);
|
117 | |
118 |
|
119 |
|
120 |
|
121 |
|
122 |
|
123 |
|
124 |
|
125 |
|
126 |
|
127 |
|
128 |
|
129 | });
|
130 | data = undefined;
|
131 | this.inited = true;
|
132 | return this;
|
133 | }
|
134 | |
135 |
|
136 |
|
137 |
|
138 |
|
139 | loadSynonymDict(name, skipExists) {
|
140 | let filename = this._resolveDictFilename(name, [
|
141 | path.resolve(segment_dict_1.default.DICT_ROOT, 'synonym'),
|
142 | ]);
|
143 | if (Array.isArray(filename)) {
|
144 | let self = this;
|
145 | filename.forEach(v => this.loadSynonymDict(v, skipExists));
|
146 | return this;
|
147 | }
|
148 | let type = 'SYNONYM';
|
149 | const db = this.getDictDatabase(type, true);
|
150 | const TABLE = this.DICT[type] = db.TABLE;
|
151 | |
152 |
|
153 |
|
154 |
|
155 |
|
156 |
|
157 |
|
158 | let data = loader_1.default.SegmentSynonymLoader.loadSync(filename);
|
159 | data.forEach(function (blocks) {
|
160 | db.add(blocks, skipExists);
|
161 | |
162 |
|
163 |
|
164 |
|
165 |
|
166 |
|
167 |
|
168 |
|
169 |
|
170 | });
|
171 |
|
172 | data = undefined;
|
173 | this.inited = true;
|
174 | return this;
|
175 | }
|
176 | _loadBlacklistDict(name, type) {
|
177 | let filename = this._resolveDictFilename(name, [
|
178 | path.resolve(segment_dict_1.default.DICT_ROOT, 'blacklist'),
|
179 | ]);
|
180 | if (Array.isArray(filename)) {
|
181 | let self = this;
|
182 | filename.forEach(v => this._loadBlacklistDict(v, type));
|
183 | return this;
|
184 | }
|
185 | const db = this.getDictDatabase(type, true);
|
186 | const TABLE = this.DICT[type] = db.TABLE;
|
187 | let data = loader_1.default.SegmentDict
|
188 | .requireLoaderModule('line')
|
189 | .loadSync(filename, {
|
190 | filter(line) {
|
191 | return line.trim();
|
192 | },
|
193 | });
|
194 | data.forEach(v => db.add(v));
|
195 | data = undefined;
|
196 | this.inited = true;
|
197 | return this;
|
198 | }
|
199 | |
200 |
|
201 |
|
202 | loadBlacklistDict(name) {
|
203 | return this._loadBlacklistDict(name, "BLACKLIST" );
|
204 | }
|
205 | |
206 |
|
207 |
|
208 |
|
209 | loadBlacklistOptimizerDict(name) {
|
210 | return this._loadBlacklistDict(name, "BLACKLIST_FOR_OPTIMIZER" );
|
211 | }
|
212 | |
213 |
|
214 |
|
215 | loadBlacklistSynonymDict(name) {
|
216 | return this._loadBlacklistDict(name, "BLACKLIST_FOR_SYNONYM" );
|
217 | }
|
218 | |
219 |
|
220 |
|
221 |
|
222 |
|
223 | loadStopwordDict(name) {
|
224 | let filename = this._resolveDictFilename(name, [
|
225 | path.resolve(segment_dict_1.default.DICT_ROOT, 'stopword'),
|
226 | ]);
|
227 | if (Array.isArray(filename)) {
|
228 | let self = this;
|
229 | filename.forEach(v => this.loadStopwordDict(v));
|
230 | return this;
|
231 | }
|
232 | const type = "STOPWORD" ;
|
233 | const db = this.getDictDatabase(type, true);
|
234 | const TABLE = this.DICT[type] = db.TABLE;
|
235 | let data = loader_1.default.SegmentDict
|
236 | .requireLoaderModule('line')
|
237 | .loadSync(filename, {
|
238 | filter(line) {
|
239 | return line.trim();
|
240 | },
|
241 | });
|
242 | data.forEach(v => db.add(v));
|
243 | data = undefined;
|
244 | this.inited = true;
|
245 | return this;
|
246 | }
|
247 | useDefault(...argv) {
|
248 | index_1.useDefault(this, ...argv);
|
249 | this.inited = true;
|
250 | return this;
|
251 | }
|
252 | |
253 |
|
254 |
|
255 | autoInit(options) {
|
256 | if (!this.inited) {
|
257 | this.inited = true;
|
258 | if (!this.modules.tokenizer.length) {
|
259 | this.useDefault(options);
|
260 | }
|
261 | }
|
262 | return this;
|
263 | }
|
264 | addBlacklist(word, remove) {
|
265 | let me = this;
|
266 | this.autoInit(this.options);
|
267 | const BLACKLIST = me.getDictDatabase("BLACKLIST" );
|
268 | const TABLE = me.getDictDatabase("TABLE" );
|
269 | let bool = !remove;
|
270 | if (bool) {
|
271 | BLACKLIST.add(word);
|
272 | TABLE.remove(word);
|
273 | }
|
274 | else {
|
275 | BLACKLIST.remove(word);
|
276 | }
|
277 | return this;
|
278 | }
|
279 | |
280 |
|
281 |
|
282 | doBlacklist() {
|
283 | let me = this;
|
284 | this.autoInit(this.options);
|
285 | const BLACKLIST = me.getDict("BLACKLIST" );
|
286 | const TABLE = me.getDictDatabase("TABLE" );
|
287 | Object.entries(BLACKLIST)
|
288 | .forEach(function ([key, bool]) {
|
289 | bool && TABLE.remove(key);
|
290 | });
|
291 | return this;
|
292 | }
|
293 | doSegment(text, options = {}) {
|
294 | this.autoInit(this.options);
|
295 | return super.doSegment(text, options);
|
296 | }
|
297 | }
|
298 | exports.Segment = Segment;
|
299 | Segment.defaultOptionsDoSegment = defaults_1.defaultOptionsDoSegment;
|
300 | exports.default = Segment;
|
301 |
|
\ | No newline at end of file |