UNPKG

5.15 kBTypeScriptView Raw
1/**
2 * 分词器接口
3 *
4 * @author 老雷<leizongmin@gmail.com>
5 */
6/// <reference types="node" />
7import TableDictBlacklist from './table/blacklist';
8import AbstractTableDictCore from './table/core';
9import { TableDict } from './table/dict';
10import { TableDictStopword } from './table/stopword';
11import TableDictSynonym from './table/synonym';
12import { ISubOptimizer, ISubTokenizer } from './mod';
13import { EnumDictDatabase } from './const';
14import { IDICT, IDICT2, IDICT_BLACKLIST, IDICT_STOPWORD, IDICT_SYNONYM, IOptionsDoSegment, IOptionsSegment, ISPLIT, ISPLIT_FILTER, IWord } from './segment/types';
15import SegmentCore from './segment/core';
16import { ITSOverwrite } from 'ts-type';
17import { IUseDefaultOptions } from './defaults/index';
18/**
19 * 创建分词器接口
20 */
21export declare class Segment extends SegmentCore {
22 static defaultOptionsDoSegment: IOptionsDoSegment;
23 getDictDatabase<R extends TableDictSynonym>(type: EnumDictDatabase.SYNONYM, autocreate?: boolean, libTableDict?: {
24 new (...argv: any[]): R;
25 }): R;
26 getDictDatabase<R extends TableDict>(type: EnumDictDatabase.TABLE, autocreate?: boolean, libTableDict?: {
27 new (...argv: any[]): R;
28 }): R;
29 getDictDatabase<R extends TableDictStopword>(type: EnumDictDatabase.STOPWORD, autocreate?: boolean, libTableDict?: {
30 new (...argv: any[]): R;
31 }): R;
32 getDictDatabase<R extends TableDictBlacklist>(type: EnumDictDatabase.BLACKLIST, autocreate?: boolean, libTableDict?: {
33 new (...argv: any[]): R;
34 }): R;
35 getDictDatabase<R extends TableDictBlacklist>(type: EnumDictDatabase.BLACKLIST_FOR_OPTIMIZER, autocreate?: boolean, libTableDict?: {
36 new (...argv: any[]): R;
37 }): R;
38 getDictDatabase<R extends TableDictBlacklist>(type: EnumDictDatabase.BLACKLIST_FOR_SYNONYM, autocreate?: boolean, libTableDict?: {
39 new (...argv: any[]): R;
40 }): R;
41 getDictDatabase<R extends AbstractTableDictCore<any>>(type: string | EnumDictDatabase, autocreate?: boolean, libTableDict?: {
42 new (...argv: any[]): R;
43 }): R;
44 /**
45 * 载入分词模块
46 *
47 * @param {String|Array|Object} module 模块名称(数组)或模块对象
48 * @return {Segment}
49 */
50 use(mod: ISubOptimizer, ...argv: any[]): any;
51 use(mod: ISubTokenizer, ...argv: any[]): any;
52 use(mod: Array<ISubTokenizer | ISubOptimizer | string>, ...argv: any[]): any;
53 use(mod: string, ...argv: any[]): any;
54 use(mod: any, ...argv: any[]): any;
55 _resolveDictFilename(name: string, pathPlus?: string[], extPlus?: string[]): string | string[];
56 /**
57 * 载入字典文件
58 *
59 * @param {String} name 字典文件名
60 * @param {String} type 类型
61 * @param {Boolean} convert_to_lower 是否全部转换为小写
62 * @return {Segment}
63 */
64 loadDict(name: string, type?: string, convert_to_lower?: boolean, skipExists?: boolean): this;
65 /**
66 * 载入同义词词典
67 *
68 * @param {String} name 字典文件名
69 */
70 loadSynonymDict(name: string, skipExists?: boolean): this;
71 protected _loadBlacklistDict(name: string, type: EnumDictDatabase): this;
72 /**
73 * 字典黑名單 在主字典內刪除此字典內有的條目
74 */
75 loadBlacklistDict(name: string): this;
76 /**
77 * 優化器黑名單 會防止部分優化器去組合此字典內的詞
78 * 例如 人名 自動組合之類
79 */
80 loadBlacklistOptimizerDict(name: string): this;
81 /**
82 * 轉換黑名單 動態轉換字詞時會忽略此字典內的詞
83 */
84 loadBlacklistSynonymDict(name: string): this;
85 /**
86 * 载入停止符词典
87 *
88 * @param {String} name 字典文件名
89 */
90 loadStopwordDict(name: string): this;
91 /**
92 * 使用默认的识别模块和字典文件
93 * 在使用預設值的情況下,不需要主動呼叫此函數
94 *
95 * @return {Segment}
96 */
97 useDefault(options?: IUseDefaultOptions, ...argv: any[]): any;
98 /**
99 * 此函數只需執行一次,並且一般狀況下不需要手動呼叫
100 */
101 autoInit(options?: IUseDefaultOptions): this;
102 addBlacklist(word: string, remove?: boolean): this;
103 /**
104 * remove key in TABLE by BLACKLIST
105 */
106 doBlacklist(): this;
107 /**
108 * 开始分词
109 *
110 * @param {String} text 文本
111 * @param {Object} options 选项
112 * - {Boolean} simple 是否仅返回单词内容
113 * - {Boolean} stripPunctuation 去除标点符号
114 * - {Boolean} convertSynonym 转换同义词
115 * - {Boolean} stripStopword 去除停止符
116 * @return {Array}
117 */
118 doSegment(text: string | Buffer, options: ITSOverwrite<IOptionsDoSegment, {
119 simple: true;
120 }>): string[];
121 doSegment(text: string | Buffer, options?: IOptionsDoSegment): IWord[];
122}
123export declare namespace Segment {
124 export { IDICT, IDICT2, IDICT_BLACKLIST, IDICT_STOPWORD, IDICT_SYNONYM, IOptionsDoSegment, IOptionsSegment, ISPLIT, ISPLIT_FILTER, IWord, };
125}
126export { IDICT, IDICT2, IDICT_BLACKLIST, IDICT_STOPWORD, IDICT_SYNONYM, IOptionsDoSegment, IOptionsSegment, ISPLIT, ISPLIT_FILTER, IWord, };
127export default Segment;