UNPKG

1.71 kBJavaScriptView Raw
1'use strict';
2Object.defineProperty(exports, "__esModule", { value: true });
3exports.init = exports.SingleTokenizer = void 0;
4const mod_1 = require("../mod");
5const uni_string_1 = require("uni-string");
6/**
7 * 单字切分模块
8 * 此模組不包含模組列表內 需要手動指定
9 *
10 * @author 老雷<leizongmin@gmail.com>
11 */
12class SingleTokenizer extends mod_1.SubSModuleTokenizer {
13 /**
14 * 对未识别的单词进行分词
15 *
16 * @param {array} words 单词数组
17 * @return {array}
18 */
19 split(words) {
20 const POSTAG = this.segment.POSTAG;
21 let ret = [];
22 for (let i = 0, word; word = words[i]; i++) {
23 if (typeof word.p == 'undefined' || word.p) {
24 ret.push(word);
25 }
26 else {
27 // 仅对未识别的词进行匹配
28 ret = ret.concat(this.splitSingle(word.w));
29 }
30 }
31 return ret;
32 }
33 /**
34 * 单字切分
35 *
36 * @param {string} text 要切分的文本
37 * @param {int} cur 开始位置
38 * @return {array}
39 */
40 splitSingle(text, cur) {
41 const POSTAG = this.segment.POSTAG;
42 if (isNaN(cur))
43 cur = 0;
44 if (cur > 0) {
45 text = text.slice(cur);
46 }
47 let ret = [];
48 uni_string_1.default
49 .split(text, '')
50 .forEach(function (w, i) {
51 ret.push({
52 w,
53 p: POSTAG.UNK,
54 });
55 });
56 return ret;
57 }
58}
59exports.SingleTokenizer = SingleTokenizer;
60exports.init = SingleTokenizer.init.bind(SingleTokenizer);
61exports.default = SingleTokenizer;
62//# sourceMappingURL=SingleTokenizer.js.map
\No newline at end of file