1 | 'use strict';
|
2 | Object.defineProperty(exports, "__esModule", { value: true });
|
3 | exports.init = exports.WildcardTokenizer = void 0;
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 | const mod_1 = require("../mod");
|
10 | class WildcardTokenizer extends mod_1.SubSModuleTokenizer {
|
11 | constructor() {
|
12 | super(...arguments);
|
13 | this.name = 'WildcardTokenizer';
|
14 | }
|
15 | _cache() {
|
16 | super._cache();
|
17 | this._TABLE = this.segment.getDict('WILDCARD');
|
18 | this._TABLE2 = this.segment.getDict('WILDCARD2');
|
19 | }
|
20 | |
21 |
|
22 |
|
23 |
|
24 |
|
25 |
|
26 | split(words) {
|
27 |
|
28 | return this._splitUnknow(words, this.splitWildcard);
|
29 | }
|
30 | createWildcardToken(word, lasttype, attr) {
|
31 | let nw = this.createToken(word, true, attr);
|
32 | return nw;
|
33 | }
|
34 | splitWildcard(text, cur) {
|
35 |
|
36 | const TABLE = this._TABLE;
|
37 | let ret = [];
|
38 | let self = this;
|
39 |
|
40 | let wordinfo = self.matchWord(text);
|
41 | if (wordinfo.length) {
|
42 | let lastc = 0;
|
43 | for (let ui = 0, bw; bw = wordinfo[ui]; ui++) {
|
44 | if (bw.c > lastc) {
|
45 | ret.push({
|
46 | w: text.substr(lastc, bw.c - lastc),
|
47 | });
|
48 | }
|
49 | let nw = self.createWildcardToken({
|
50 | w: bw.w,
|
51 | p: TABLE[bw.w.toLowerCase()].p,
|
52 | });
|
53 | ret.push(nw);
|
54 | lastc = bw.c + bw.w.length;
|
55 | }
|
56 | let lastword = wordinfo[wordinfo.length - 1];
|
57 | if (lastword.c + lastword.w.length < text.length) {
|
58 | ret.push({
|
59 | w: text.substr(lastword.c + lastword.w.length),
|
60 | });
|
61 | }
|
62 | }
|
63 | return ret.length ? ret : undefined;
|
64 | }
|
65 | |
66 |
|
67 |
|
68 |
|
69 |
|
70 |
|
71 |
|
72 | matchWord(text, cur) {
|
73 |
|
74 | const TABLE = this._TABLE2;
|
75 | if (isNaN(cur))
|
76 | cur = 0;
|
77 | let ret = [];
|
78 |
|
79 | let s = false;
|
80 |
|
81 | let lowertext = text.toLowerCase();
|
82 | while (cur < text.length) {
|
83 | let stopword = null;
|
84 | for (let i in TABLE) {
|
85 | if (lowertext.substr(cur, i) in TABLE[i]) {
|
86 | stopword = {
|
87 | w: text.substr(cur, i),
|
88 | c: cur,
|
89 | };
|
90 | }
|
91 | }
|
92 | if (stopword !== null) {
|
93 | ret.push(stopword);
|
94 | cur += stopword.w.length;
|
95 | }
|
96 | else {
|
97 | cur++;
|
98 | }
|
99 | }
|
100 | return ret;
|
101 | }
|
102 | }
|
103 | exports.WildcardTokenizer = WildcardTokenizer;
|
104 | exports.init = WildcardTokenizer.init.bind(WildcardTokenizer);
|
105 | exports.default = WildcardTokenizer;
|
106 |
|
\ | No newline at end of file |