UNPKG

5.53 kBJavaScriptView Raw
1'use strict';
2Object.defineProperty(exports, "__esModule", { value: true });
3exports.init = exports.SingleTokenizer = void 0;
4const mod_1 = require("../mod");
5const uni_string_1 = require("uni-string");
6/**
7 * 单字切分模块
8 * 此模組不包含模組列表內 需要手動指定
9 *
10 * @author 老雷<leizongmin@gmail.com>
11 */
12class SingleTokenizer extends mod_1.SubSModuleTokenizer {
13 /**
14 * 对未识别的单词进行分词
15 *
16 * @param {array} words 单词数组
17 * @return {array}
18 */
19 split(words) {
20 const POSTAG = this.segment.POSTAG;
21 let ret = [];
22 for (let i = 0, word; word = words[i]; i++) {
23 if (typeof word.p == 'undefined' || word.p) {
24 ret.push(word);
25 }
26 else {
27 // 仅对未识别的词进行匹配
28 ret = ret.concat(this.splitSingle(word.w));
29 }
30 }
31 return ret;
32 }
33 /**
34 * 单字切分
35 *
36 * @param {string} text 要切分的文本
37 * @param {int} cur 开始位置
38 * @return {array}
39 */
40 splitSingle(text, cur) {
41 const POSTAG = this.segment.POSTAG;
42 if (isNaN(cur))
43 cur = 0;
44 if (cur > 0) {
45 text = text.slice(cur);
46 }
47 let ret = [];
48 uni_string_1.default
49 .split(text, '')
50 .forEach(function (w, i) {
51 ret.push({
52 w,
53 p: POSTAG.UNK,
54 });
55 });
56 return ret;
57 }
58}
59exports.SingleTokenizer = SingleTokenizer;
60exports.init = SingleTokenizer.init.bind(SingleTokenizer);
61exports.default = SingleTokenizer;
62//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiU2luZ2xlVG9rZW5pemVyLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiU2luZ2xlVG9rZW5pemVyLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUFBLFlBQVksQ0FBQzs7O0FBRWIsZ0NBQXlEO0FBRXpELDJDQUFpQztBQUVqQzs7Ozs7R0FLRztBQUNILE1BQWEsZUFBZ0IsU0FBUSx5QkFBbUI7SUFHdkQ7Ozs7O09BS0c7SUFDSCxLQUFLLENBQUMsS0FBYztRQUVuQixNQUFNLE1BQU0sR0FBRyxJQUFJLENBQUMsT0FBTyxDQUFDLE1BQU0sQ0FBQztRQUVuQyxJQUFJLEdBQUcsR0FBRyxFQUFFLENBQUM7UUFDYixLQUFLLElBQUksQ0FBQyxHQUFHLENBQUMsRUFBRSxJQUFJLEVBQUUsSUFBSSxHQUFHLEtBQUssQ0FBQyxDQUFDLENBQUMsRUFBRSxDQUFDLEVBQUUsRUFDMUM7WUFDQyxJQUFJLE9BQU8sSUFBSSxDQUFDLENBQUMsSUFBSSxXQUFXLElBQUksSUFBSSxDQUFDLENBQUMsRUFDMUM7Z0JBQ0MsR0FBRyxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsQ0FBQzthQUNmO2lCQUVEO2dCQUNDLGNBQWM7Z0JBQ2QsR0FBRyxHQUFHLEdBQUcsQ0FBQyxNQUFNLENBQUMsSUFBSSxDQUFDLFdBQVcsQ0FBQyxJQUFJLENBQUMsQ0FBQyxDQUFDLENBQUMsQ0FBQzthQUMzQztTQUNEO1FBQ0QsT0FBTyxHQUFHLENBQUM7SUFDWixDQUFDO0lBRUQ7Ozs7OztPQU1HO0lBQ0gsV0FBVyxDQUFDLElBQUksRUFBRSxHQUFZO1FBRTdCLE1BQU0sTUFBTSxHQUFHLElBQUksQ0FBQyxPQUFPLENBQUMsTUFBTSxDQUFDO1FBRW5DLElBQUksS0FBSyxDQUFDLEdBQUcsQ0FBQztZQUFFLEdBQUcsR0FBRyxDQUFDLENBQUM7UUFFeEIsSUFBSSxHQUFHLEdBQUcsQ0FBQyxFQUNYO1lBQ0MsSUFBSSxHQUFHLElBQUksQ0FBQyxLQUFLLENBQUMsR0FBRyxDQUFDLENBQUM7U0FDdkI7UUFFRCxJQUFJLEdBQUcsR0FBWSxFQUFFLENBQUM7UUFFdEIsb0JBQU87YUFDTCxLQUFLLENBQUMsSUFBSSxFQUFFLEVBQUUsQ0FBQzthQUNmLE9BQU8sQ0FBQyxVQUFVLENBQUMsRUFBRSxDQUFDO1lBRXRCLEdBQUcsQ0FBQyxJQUFJLENBQUM7Z0JBQ1IsQ0FBQztnQkFDRCxDQUFDLEVBQUUsTUFBTSxDQUFDLEdBQUc7YUFDYixDQUFDLENBQUM7UUFDSixDQUFDLENBQUMsQ0FDRjtRQUVELE9BQU8sR0FBRyxDQUFDO0lBQ1osQ0FBQztDQUNEO0FBOURELDBDQThEQztBQUVZLFFBQUEsSUFBSSxHQUFHLGVBQWUsQ0FBQyxJQUFJLENBQUMsSUFBSSxDQUFDLGVBQWUsQ0FBZ0MsQ0FBQztBQUU5RixrQkFBZSxlQUFlLENBQUMiLCJzb3VyY2VzQ29udGVudCI6WyIndXNlIHN0cmljdCc7XG5cbmltcG9ydCB7IFN1YlNNb2R1bGUsIFN1YlNNb2R1bGVUb2tlbml6ZXIgfSBmcm9tICcuLi9tb2QnO1xuaW1wb3J0IHsgU2VnbWVudCwgSVdvcmQgfSBmcm9tICcuLi9TZWdtZW50JztcbmltcG9ydCBVU3RyaW5nIGZyb20gJ3VuaS1zdHJpbmcnO1xuXG4vKipcbiAqIOWNleWtl+WIh+WIhuaooeWdl1xuICog5q2k5qih57WE5LiN5YyF5ZCr5qih57WE5YiX6KGo5YWnIOmcgOimgeaJi+WLleaMh+WumlxuICpcbiAqIEBhdXRob3Ig6ICB6Zu3PGxlaXpvbmdtaW5AZ21haWwuY29tPlxuICovXG5leHBvcnQgY2xhc3MgU2luZ2xlVG9rZW5pemVyIGV4dGVuZHMgU3ViU01vZHVsZVRva2VuaXplclxue1xuXG5cdC8qKlxuXHQgKiDlr7nmnKror4bliKvnmoTljZXor43ov5vooYzliIbor41cblx0ICpcblx0ICogQHBhcmFtIHthcnJheX0gd29yZHMg5Y2V6K+N5pWw57uEXG5cdCAqIEByZXR1cm4ge2FycmF5fVxuXHQgKi9cblx0c3BsaXQod29yZHM6IElXb3JkW10pOiBJV29yZFtdXG5cdHtcblx0XHRjb25zdCBQT1NUQUcgPSB0aGlzLnNlZ21lbnQuUE9TVEFHO1xuXG5cdFx0bGV0IHJldCA9IFtdO1xuXHRcdGZvciAobGV0IGkgPSAwLCB3b3JkOyB3b3JkID0gd29yZHNbaV07IGkrKylcblx0XHR7XG5cdFx0XHRpZiAodHlwZW9mIHdvcmQucCA9PSAndW5kZWZpbmVkJyB8fCB3b3JkLnApXG5cdFx0XHR7XG5cdFx0XHRcdHJldC5wdXNoKHdvcmQpO1xuXHRcdFx0fVxuXHRcdFx0ZWxzZVxuXHRcdFx0e1xuXHRcdFx0XHQvLyDku4Xlr7nmnKror4bliKvnmoTor43ov5vooYzljLnphY1cblx0XHRcdFx0cmV0ID0gcmV0LmNvbmNhdCh0aGlzLnNwbGl0U2luZ2xlKHdvcmQudykpO1xuXHRcdFx0fVxuXHRcdH1cblx0XHRyZXR1cm4gcmV0O1xuXHR9XG5cblx0LyoqXG5cdCAqIOWNleWtl+WIh+WIhlxuXHQgKlxuXHQgKiBAcGFyYW0ge3N0cmluZ30gdGV4dCDopoHliIfliIbnmoTmlofmnKxcblx0ICogQHBhcmFtIHtpbnR9IGN1ciDlvIDlp4vkvY3nva5cblx0ICogQHJldHVybiB7YXJyYXl9XG5cdCAqL1xuXHRzcGxpdFNpbmdsZSh0ZXh0LCBjdXI/OiBudW1iZXIpOiBJV29yZFtdXG5cdHtcblx0XHRjb25zdCBQT1NUQUcgPSB0aGlzLnNlZ21lbnQuUE9TVEFHO1xuXG5cdFx0aWYgKGlzTmFOKGN1cikpIGN1ciA9IDA7XG5cblx0XHRpZiAoY3VyID4gMClcblx0XHR7XG5cdFx0XHR0ZXh0ID0gdGV4dC5zbGljZShjdXIpO1xuXHRcdH1cblxuXHRcdGxldCByZXQ6IElXb3JkW10gPSBbXTtcblxuXHRcdFVTdHJpbmdcblx0XHRcdC5zcGxpdCh0ZXh0LCAnJylcblx0XHRcdC5mb3JFYWNoKGZ1bmN0aW9uICh3LCBpKVxuXHRcdFx0e1xuXHRcdFx0XHRyZXQucHVzaCh7XG5cdFx0XHRcdFx0dyxcblx0XHRcdFx0XHRwOiBQT1NUQUcuVU5LLFxuXHRcdFx0XHR9KTtcblx0XHRcdH0pXG5cdFx0O1xuXG5cdFx0cmV0dXJuIHJldDtcblx0fVxufVxuXG5leHBvcnQgY29uc3QgaW5pdCA9IFNpbmdsZVRva2VuaXplci5pbml0LmJpbmQoU2luZ2xlVG9rZW5pemVyKSBhcyB0eXBlb2YgU2luZ2xlVG9rZW5pemVyLmluaXQ7XG5cbmV4cG9ydCBkZWZhdWx0IFNpbmdsZVRva2VuaXplcjtcbiJdfQ==
\No newline at end of file