1 | "use strict";
|
2 |
|
3 |
|
4 |
|
5 | Object.defineProperty(exports, "__esModule", { value: true });
|
6 | exports.init = exports.JpSimpleTokenizer = exports.EnumJpSimpleTokenizerType = void 0;
|
7 | const mod_1 = require("../mod");
|
8 | var EnumJpSimpleTokenizerType;
|
9 | (function (EnumJpSimpleTokenizerType) {
|
10 | |
11 |
|
12 |
|
13 |
|
14 | EnumJpSimpleTokenizerType[EnumJpSimpleTokenizerType["HIRAGANA"] = 1] = "HIRAGANA";
|
15 | |
16 |
|
17 |
|
18 |
|
19 | EnumJpSimpleTokenizerType[EnumJpSimpleTokenizerType["KATAKANA"] = 2] = "KATAKANA";
|
20 | })(EnumJpSimpleTokenizerType = exports.EnumJpSimpleTokenizerType || (exports.EnumJpSimpleTokenizerType = {}));
|
21 | class JpSimpleTokenizer extends mod_1.SubSModuleTokenizer {
|
22 | constructor() {
|
23 | super(...arguments);
|
24 | this.name = 'JpSimpleTokenizer';
|
25 | }
|
26 | split(words, ...argv) {
|
27 | return this._splitUnset(words, this._splitText);
|
28 | }
|
29 | createJpSimpleToken(data, type) {
|
30 | return super.debugToken(data, {
|
31 | [this.name]: type,
|
32 | }, true);
|
33 | }
|
34 | _splitText(text) {
|
35 |
|
36 | let self = this;
|
37 | let b1 = /[ぁ-ん]/.test(text);
|
38 | let b2 = /[ァ-ヴーア-ン゙ー]/.test(text);
|
39 | if (!b1 || !b2) {
|
40 | if (b1 && /^[ぁ-ん]+$/.test(text) || b2 && /^[ァ-ヴーア-ン゙ー]+$/.test(text)) {
|
41 | return [self.createJpSimpleToken({
|
42 | w: text,
|
43 | }, b1 ? 1 : 2 )];
|
44 | }
|
45 | return null;
|
46 | }
|
47 | let ret = [];
|
48 | text
|
49 | .split(/((?:[^ァ-ヴーア-ン゙ー]+)?[ぁ-ん]+(?=[ァ-ヴーア-ン゙ー])|(?:[^ぁ-ん]+)?[ァ-ヴーア-ン゙ー]+(?=[ぁ-ん]))/)
|
50 | .forEach(function (w, i) {
|
51 | if (w !== '') {
|
52 | ret.push(self.createJpSimpleToken({
|
53 | w,
|
54 | }, /[ぁ-ん]/.test(w) ? 1
|
55 | : 2 ));
|
56 | }
|
57 | });
|
58 | return ret;
|
59 | }
|
60 | }
|
61 | exports.JpSimpleTokenizer = JpSimpleTokenizer;
|
62 | JpSimpleTokenizer.NAME = 'JpSimpleTokenizer';
|
63 | exports.init = JpSimpleTokenizer.init.bind(JpSimpleTokenizer);
|
64 | exports.default = JpSimpleTokenizer;
|
65 |
|
\ | No newline at end of file |