1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 | import POSTAG from '../POSTAG';
|
8 | import TableDictBlacklist from '../table/blacklist';
|
9 | import AbstractTableDictCore from '../table/core';
|
10 | import { TableDict } from '../table/dict';
|
11 | import { TableDictStopword } from '../table/stopword';
|
12 | import TableDictSynonym from '../table/synonym';
|
13 | import { ISubOptimizer, ISubTokenizer, Optimizer, Tokenizer } from '../mod/index';
|
14 | import { IWordDebug } from '../util/index';
|
15 | import { EnumDictDatabase } from '../const';
|
16 | import { IDICT, IDICT2, IDICT_BLACKLIST, IDICT_STOPWORD, IDICT_SYNONYM, IOptionsDoSegment, IOptionsSegment, ISPLIT, ISPLIT_FILTER, IWord } from './types';
|
17 | import { ITSOverwrite } from 'ts-type';
|
18 |
|
19 |
|
20 |
|
21 | export declare class SegmentCore {
|
22 | |
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 | SPLIT: ISPLIT;
|
33 | |
34 |
|
35 |
|
36 |
|
37 |
|
38 |
|
39 | SPLIT_FILTER: ISPLIT_FILTER;
|
40 | |
41 |
|
42 |
|
43 |
|
44 | POSTAG: typeof POSTAG;
|
45 | |
46 |
|
47 |
|
48 |
|
49 | DICT: {
|
50 | STOPWORD?: IDICT_STOPWORD;
|
51 | SYNONYM?: IDICT_SYNONYM;
|
52 | [key: string]: IDICT;
|
53 | };
|
54 | modules: {
|
55 | tokenizer: ISubTokenizer[];
|
56 | optimizer: ISubOptimizer[];
|
57 | };
|
58 | tokenizer: Tokenizer;
|
59 | optimizer: Optimizer;
|
60 | db: {
|
61 | [key: string]: TableDict;
|
62 | };
|
63 | options: IOptionsSegment;
|
64 | inited?: boolean;
|
65 | constructor(options?: IOptionsSegment);
|
66 | getDictDatabase<R extends TableDictSynonym>(type: EnumDictDatabase.SYNONYM, autocreate?: boolean, libTableDict?: {
|
67 | new (...argv: any[]): R;
|
68 | }): R;
|
69 | getDictDatabase<R extends TableDict>(type: EnumDictDatabase.TABLE, autocreate?: boolean, libTableDict?: {
|
70 | new (...argv: any[]): R;
|
71 | }): R;
|
72 | getDictDatabase<R extends TableDictStopword>(type: EnumDictDatabase.STOPWORD, autocreate?: boolean, libTableDict?: {
|
73 | new (...argv: any[]): R;
|
74 | }): R;
|
75 | getDictDatabase<R extends TableDictBlacklist>(type: EnumDictDatabase.BLACKLIST, autocreate?: boolean, libTableDict?: {
|
76 | new (...argv: any[]): R;
|
77 | }): R;
|
78 | getDictDatabase<R extends TableDictBlacklist>(type: EnumDictDatabase.BLACKLIST_FOR_OPTIMIZER, autocreate?: boolean, libTableDict?: {
|
79 | new (...argv: any[]): R;
|
80 | }): R;
|
81 | getDictDatabase<R extends AbstractTableDictCore<any>>(type: string | EnumDictDatabase, autocreate?: boolean, libTableDict?: {
|
82 | new (...argv: any[]): R;
|
83 | }): R;
|
84 | /**
|
85 | * 载入分词模块
|
86 | *
|
87 | * @param {String|Array|Object} module 模块名称(数组)或模块对象
|
88 | * @return {Segment}
|
89 | */
|
90 | use(mod: ISubOptimizer, ...argv: any[]): this;
|
91 | use(mod: ISubTokenizer, ...argv: any[]): this;
|
92 | use(mod: any, ...argv: any[]): this;
|
93 | |
94 |
|
95 |
|
96 |
|
97 |
|
98 |
|
99 | getDict(type: EnumDictDatabase.STOPWORD): IDICT_STOPWORD;
|
100 | getDict(type: EnumDictDatabase.SYNONYM): IDICT_SYNONYM;
|
101 | getDict(type: EnumDictDatabase.TABLE): IDICT<IWord>;
|
102 | getDict(type: EnumDictDatabase.BLACKLIST): IDICT_BLACKLIST;
|
103 | getDict(type: EnumDictDatabase.BLACKLIST_FOR_OPTIMIZER): IDICT_BLACKLIST;
|
104 | getDict(type: 'TABLE2'): IDICT2<IWord>;
|
105 | getDict(type: EnumDictDatabase): IDICT;
|
106 | getDict(type: any): IDICT;
|
107 | getOptionsDoSegment<T extends IOptionsDoSegment>(options?: T): T;
|
108 | protected _get_text(text: string | Buffer): string;
|
109 | addBlacklist(word: string, remove?: boolean): this;
|
110 | |
111 |
|
112 |
|
113 | doBlacklist(): this;
|
114 | listModules(options?: IOptionsDoSegment): {
|
115 | enable: {
|
116 | tokenizer: ISubTokenizer[];
|
117 | optimizer: ISubOptimizer[];
|
118 | };
|
119 | disable: {
|
120 | tokenizer: ISubTokenizer[];
|
121 | optimizer: ISubOptimizer[];
|
122 | };
|
123 | };
|
124 | |
125 |
|
126 |
|
127 |
|
128 |
|
129 |
|
130 |
|
131 |
|
132 |
|
133 |
|
134 |
|
135 | doSegment(text: string | Buffer, options: ITSOverwrite<IOptionsDoSegment, {
|
136 | simple: true;
|
137 | }>): string[];
|
138 | doSegment(text: string | Buffer, options?: IOptionsDoSegment): IWord[];
|
139 | |
140 |
|
141 |
|
142 | convertSynonym(ret: IWordDebug[], showcount: true): {
|
143 | count: number;
|
144 | list: IWordDebug[];
|
145 | };
|
146 | |
147 |
|
148 |
|
149 | convertSynonym(ret: IWordDebug[], showcount?: boolean): IWordDebug[];
|
150 | |
151 |
|
152 |
|
153 |
|
154 |
|
155 |
|
156 | stringify(words: Array<IWord | string>, ...argv: any[]): string;
|
157 | |
158 |
|
159 |
|
160 |
|
161 |
|
162 |
|
163 | static stringify(words: Array<IWord | string>, ...argv: any[]): string;
|
164 | |
165 |
|
166 |
|
167 |
|
168 |
|
169 |
|
170 |
|
171 | split(words: IWord[], s: string | number, ...argv: any[]): IWord[];
|
172 | |
173 |
|
174 |
|
175 |
|
176 |
|
177 |
|
178 |
|
179 |
|
180 | indexOf(words: IWord[], s: string | number, cur?: number, ...argv: any[]): number;
|
181 | }
|
182 | export { IDICT, IDICT2, IDICT_BLACKLIST, IDICT_STOPWORD, IDICT_SYNONYM, IOptionsDoSegment, IOptionsSegment, ISPLIT, ISPLIT_FILTER, IWord };
|
183 | export default SegmentCore;
|