1 | 'use strict';
|
2 | Object.defineProperty(exports, "__esModule", { value: true });
|
3 | exports.init = exports.SingleTokenizer = void 0;
|
4 | const mod_1 = require("../mod");
|
5 | const uni_string_1 = require("uni-string");
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 | class SingleTokenizer extends mod_1.SubSModuleTokenizer {
|
13 | |
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 | split(words) {
|
20 | const POSTAG = this.segment.POSTAG;
|
21 | let ret = [];
|
22 | for (let i = 0, word; word = words[i]; i++) {
|
23 | if (typeof word.p == 'undefined' || word.p) {
|
24 | ret.push(word);
|
25 | }
|
26 | else {
|
27 |
|
28 | ret = ret.concat(this.splitSingle(word.w));
|
29 | }
|
30 | }
|
31 | return ret;
|
32 | }
|
33 | |
34 |
|
35 |
|
36 |
|
37 |
|
38 |
|
39 |
|
40 | splitSingle(text, cur) {
|
41 | const POSTAG = this.segment.POSTAG;
|
42 | if (isNaN(cur))
|
43 | cur = 0;
|
44 | if (cur > 0) {
|
45 | text = text.slice(cur);
|
46 | }
|
47 | let ret = [];
|
48 | uni_string_1.default
|
49 | .split(text, '')
|
50 | .forEach(function (w, i) {
|
51 | ret.push({
|
52 | w,
|
53 | p: POSTAG.UNK,
|
54 | });
|
55 | });
|
56 | return ret;
|
57 | }
|
58 | }
|
59 | exports.SingleTokenizer = SingleTokenizer;
|
60 | exports.init = SingleTokenizer.init.bind(SingleTokenizer);
|
61 | exports.default = SingleTokenizer;
|
62 |
|
\ | No newline at end of file |