UNPKG

2.38 kBJavaScriptView Raw
1"use strict";
2/**
3 * Created by user on 2018/4/19/019.
4 */
5Object.defineProperty(exports, "__esModule", { value: true });
6exports.init = exports.JpSimpleTokenizer = exports.EnumJpSimpleTokenizerType = void 0;
7const mod_1 = require("../mod");
8var EnumJpSimpleTokenizerType;
9(function (EnumJpSimpleTokenizerType) {
10 /**
11 * 平仮名
12 * https://en.wikipedia.org/wiki/Hiragana
13 */
14 EnumJpSimpleTokenizerType[EnumJpSimpleTokenizerType["HIRAGANA"] = 1] = "HIRAGANA";
15 /**
16 * 片仮名
17 * https://en.wikipedia.org/wiki/Katakana
18 */
19 EnumJpSimpleTokenizerType[EnumJpSimpleTokenizerType["KATAKANA"] = 2] = "KATAKANA";
20})(EnumJpSimpleTokenizerType = exports.EnumJpSimpleTokenizerType || (exports.EnumJpSimpleTokenizerType = {}));
21class JpSimpleTokenizer extends mod_1.SubSModuleTokenizer {
22 constructor() {
23 super(...arguments);
24 this.name = 'JpSimpleTokenizer';
25 }
26 split(words, ...argv) {
27 return this._splitUnset(words, this._splitText);
28 }
29 createJpSimpleToken(data, type) {
30 return super.debugToken(data, {
31 [this.name]: type,
32 }, true);
33 }
34 _splitText(text) {
35 //const POSTAG = this.segment.POSTAG;
36 let self = this;
37 let b1 = /[ぁ-ん]/.test(text);
38 let b2 = /[ァ-ヴーア-ン゙ー]/.test(text);
39 if (!b1 || !b2) {
40 if (b1 && /^[ぁ-ん]+$/.test(text) || b2 && /^[ァ-ヴーア-ン゙ー]+$/.test(text)) {
41 return [self.createJpSimpleToken({
42 w: text,
43 }, b1 ? 1 /* HIRAGANA */ : 2 /* KATAKANA */)];
44 }
45 return null;
46 }
47 let ret = [];
48 text
49 .split(/((?:[^ァ-ヴーア-ン゙ー]+)?[ぁ-ん]+(?=[ァ-ヴーア-ン゙ー])|(?:[^ぁ-ん]+)?[ァ-ヴーア-ン゙ー]+(?=[ぁ-ん]))/)
50 .forEach(function (w, i) {
51 if (w !== '') {
52 ret.push(self.createJpSimpleToken({
53 w,
54 }, /[ぁ-ん]/.test(w) ? 1 /* HIRAGANA */
55 : 2 /* KATAKANA */));
56 }
57 });
58 return ret;
59 }
60}
61exports.JpSimpleTokenizer = JpSimpleTokenizer;
62JpSimpleTokenizer.NAME = 'JpSimpleTokenizer';
63exports.init = JpSimpleTokenizer.init.bind(JpSimpleTokenizer);
64exports.default = JpSimpleTokenizer;
65//# sourceMappingURL=JpSimpleTokenizer.js.map
\No newline at end of file