1 | ;
|
2 | /**
|
3 | * Created by user on 2018/4/19/019.
|
4 | */
|
5 | Object.defineProperty(exports, "__esModule", { value: true });
|
6 | exports.init = exports.JpSimpleTokenizer = exports.EnumJpSimpleTokenizerType = void 0;
|
7 | const mod_1 = require("../mod");
|
8 | var EnumJpSimpleTokenizerType;
|
9 | (function (EnumJpSimpleTokenizerType) {
|
10 | /**
|
11 | * 平仮名
|
12 | * https://en.wikipedia.org/wiki/Hiragana
|
13 | */
|
14 | EnumJpSimpleTokenizerType[EnumJpSimpleTokenizerType["HIRAGANA"] = 1] = "HIRAGANA";
|
15 | /**
|
16 | * 片仮名
|
17 | * https://en.wikipedia.org/wiki/Katakana
|
18 | */
|
19 | EnumJpSimpleTokenizerType[EnumJpSimpleTokenizerType["KATAKANA"] = 2] = "KATAKANA";
|
20 | })(EnumJpSimpleTokenizerType = exports.EnumJpSimpleTokenizerType || (exports.EnumJpSimpleTokenizerType = {}));
|
21 | let JpSimpleTokenizer = /** @class */ (() => {
|
22 | class JpSimpleTokenizer extends mod_1.SubSModuleTokenizer {
|
23 | constructor() {
|
24 | super(...arguments);
|
25 | this.name = 'JpSimpleTokenizer';
|
26 | }
|
27 | split(words, ...argv) {
|
28 | return this._splitUnset(words, this._splitText);
|
29 | }
|
30 | createJpSimpleToken(data, type) {
|
31 | return super.debugToken(data, {
|
32 | [this.name]: type,
|
33 | }, true);
|
34 | }
|
35 | _splitText(text) {
|
36 | //const POSTAG = this.segment.POSTAG;
|
37 | let self = this;
|
38 | let b1 = /[ぁ-ん]/.test(text);
|
39 | let b2 = /[ァ-ヴーア-ン゙ー]/.test(text);
|
40 | if (!b1 || !b2) {
|
41 | if (b1 && /^[ぁ-ん]+$/.test(text) || b2 && /^[ァ-ヴーア-ン゙ー]+$/.test(text)) {
|
42 | return [self.createJpSimpleToken({
|
43 | w: text,
|
44 | }, b1 ? 1 /* HIRAGANA */ : 2 /* KATAKANA */)];
|
45 | }
|
46 | return null;
|
47 | }
|
48 | let ret = [];
|
49 | text
|
50 | .split(/((?:[^ァ-ヴーア-ン゙ー]+)?[ぁ-ん]+(?=[ァ-ヴーア-ン゙ー])|(?:[^ぁ-ん]+)?[ァ-ヴーア-ン゙ー]+(?=[ぁ-ん]))/)
|
51 | .forEach(function (w, i) {
|
52 | if (w !== '') {
|
53 | ret.push(self.createJpSimpleToken({
|
54 | w,
|
55 | }, /[ぁ-ん]/.test(w) ? 1 /* HIRAGANA */
|
56 | : 2 /* KATAKANA */));
|
57 | }
|
58 | });
|
59 | return ret;
|
60 | }
|
61 | }
|
62 | JpSimpleTokenizer.NAME = 'JpSimpleTokenizer';
|
63 | return JpSimpleTokenizer;
|
64 | })();
|
65 | exports.JpSimpleTokenizer = JpSimpleTokenizer;
|
66 | exports.init = JpSimpleTokenizer.init.bind(JpSimpleTokenizer);
|
67 | exports.default = JpSimpleTokenizer;
|
68 | //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiSnBTaW1wbGVUb2tlbml6ZXIuanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyJKcFNpbXBsZVRva2VuaXplci50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiO0FBQUE7O0dBRUc7OztBQUVILGdDQUF5RDtBQUt6RCxJQUFrQix5QkFZakI7QUFaRCxXQUFrQix5QkFBeUI7SUFFMUM7OztPQUdHO0lBQ0gsaUZBQWMsQ0FBQTtJQUNkOzs7T0FHRztJQUNILGlGQUFjLENBQUE7QUFDZixDQUFDLEVBWmlCLHlCQUF5QixHQUF6QixpQ0FBeUIsS0FBekIsaUNBQXlCLFFBWTFDO0FBRUQ7SUFBQSxNQUFhLGlCQUFrQixTQUFRLHlCQUFtQjtRQUExRDs7WUFJQyxTQUFJLEdBQUcsbUJBQTRCLENBQUM7UUF5RHJDLENBQUM7UUF2REEsS0FBSyxDQUFDLEtBQWMsRUFBRSxHQUFHLElBQUk7WUFFNUIsT0FBTyxJQUFJLENBQUMsV0FBVyxDQUFDLEtBQUssRUFBRSxJQUFJLENBQUMsVUFBVSxDQUFDLENBQUM7UUFDakQsQ0FBQztRQUVTLG1CQUFtQixDQUF1QixJQUFPLEVBQUUsSUFBK0I7WUFFM0YsT0FBTyxLQUFLLENBQUMsVUFBVSxDQUFDLElBQUksRUFBRTtnQkFDN0IsQ0FBQyxJQUFJLENBQUMsSUFBSSxDQUFDLEVBQUUsSUFBSTthQUNqQixFQUFFLElBQUksQ0FBQyxDQUFDO1FBQ1YsQ0FBQztRQUVTLFVBQVUsQ0FBQyxJQUFZO1lBRWhDLHFDQUFxQztZQUVyQyxJQUFJLElBQUksR0FBRyxJQUFJLENBQUM7WUFFaEIsSUFBSSxFQUFFLEdBQUcsT0FBTyxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsQ0FBQztZQUM1QixJQUFJLEVBQUUsR0FBRyxhQUFhLENBQUMsSUFBSSxDQUFDLElBQUksQ0FBQyxDQUFDO1lBRWxDLElBQUksQ0FBQyxFQUFFLElBQUksQ0FBQyxFQUFFLEVBQ2Q7Z0JBQ0MsSUFBSSxFQUFFLElBQUksVUFBVSxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsSUFBSSxFQUFFLElBQUksZ0JBQWdCLENBQUMsSUFBSSxDQUFDLElBQUksQ0FBQyxFQUNwRTtvQkFDQyxPQUFPLENBQUMsSUFBSSxDQUFDLG1CQUFtQixDQUFDOzRCQUNoQyxDQUFDLEVBQUUsSUFBSTt5QkFDUCxFQUFFLEVBQUUsQ0FBQyxDQUFDLGtCQUFvQyxDQUFDLGlCQUFtQyxDQUM5RSxDQUFDLENBQUM7aUJBQ0g7Z0JBRUQsT0FBTyxJQUFJLENBQUM7YUFDWjtZQUVELElBQUksR0FBRyxHQUFZLEVBQUUsQ0FBQztZQUV0QixJQUFJO2lCQUNGLEtBQUssQ0FBQyw2RUFBNkUsQ0FBQztpQkFDcEYsT0FBTyxDQUFDLFVBQVUsQ0FBQyxFQUFFLENBQUM7Z0JBRXRCLElBQUksQ0FBQyxLQUFLLEVBQUUsRUFDWjtvQkFDQyxHQUFHLENBQUMsSUFBSSxDQUFDLElBQUksQ0FBQyxtQkFBbUIsQ0FBQzt3QkFDakMsQ0FBQztxQkFDRCxFQUFFLE9BQU8sQ0FBQyxJQUFJLENBQUMsQ0FBQyxDQUFDLENBQUMsQ0FBQzt3QkFDbEIsQ0FBQyxpQkFBbUMsQ0FDckMsQ0FBQyxDQUFDO2lCQUNIO1lBQ0YsQ0FBQyxDQUFDLENBRUY7WUFFRCxPQUFPLEdBQUcsQ0FBQztRQUNaLENBQUM7O0lBekRNLHNCQUFJLEdBQUcsbUJBQTRCLENBQUM7SUEyRDVDLHdCQUFDO0tBQUE7QUE3RFksOENBQWlCO0FBK0RqQixRQUFBLElBQUksR0FBRyxpQkFBaUIsQ0FBQyxJQUFJLENBQUMsSUFBSSxDQUFDLGlCQUFpQixDQUFrQyxDQUFDO0FBRXBHLGtCQUFlLGlCQUFpQixDQUFDIiwic291cmNlc0NvbnRlbnQiOlsiLyoqXG4gKiBDcmVhdGVkIGJ5IHVzZXIgb24gMjAxOC80LzE5LzAxOS5cbiAqL1xuXG5pbXBvcnQgeyBTdWJTTW9kdWxlLCBTdWJTTW9kdWxlVG9rZW5pemVyIH0gZnJvbSAnLi4vbW9kJztcbmltcG9ydCB7IFNlZ21lbnQsIElXb3JkIH0gZnJvbSAnLi4vU2VnbWVudCc7XG5pbXBvcnQgVVN0cmluZyBmcm9tICd1bmktc3RyaW5nJztcbmltcG9ydCB7IElXb3JkRGVidWcsIElXb3JkRGVidWdJbmZvIH0gZnJvbSAnLi4vdXRpbCc7XG5cbmV4cG9ydCBjb25zdCBlbnVtIEVudW1KcFNpbXBsZVRva2VuaXplclR5cGVcbntcblx0LyoqXG5cdCAqIOW5s+S7ruWQjVxuXHQgKiBodHRwczovL2VuLndpa2lwZWRpYS5vcmcvd2lraS9IaXJhZ2FuYVxuXHQgKi9cblx0SElSQUdBTkEgPSAweDEsXG5cdC8qKlxuXHQgKiDniYfku67lkI1cblx0ICogaHR0cHM6Ly9lbi53aWtpcGVkaWEub3JnL3dpa2kvS2F0YWthbmFcblx0ICovXG5cdEtBVEFLQU5BID0gMHgyLFxufVxuXG5leHBvcnQgY2xhc3MgSnBTaW1wbGVUb2tlbml6ZXIgZXh0ZW5kcyBTdWJTTW9kdWxlVG9rZW5pemVyXG57XG5cdHN0YXRpYyBOQU1FID0gJ0pwU2ltcGxlVG9rZW5pemVyJyBhcyBjb25zdDtcblxuXHRuYW1lID0gJ0pwU2ltcGxlVG9rZW5pemVyJyBhcyBjb25zdDtcblxuXHRzcGxpdCh3b3JkczogSVdvcmRbXSwgLi4uYXJndik6IElXb3JkW11cblx0e1xuXHRcdHJldHVybiB0aGlzLl9zcGxpdFVuc2V0KHdvcmRzLCB0aGlzLl9zcGxpdFRleHQpO1xuXHR9XG5cblx0cHJvdGVjdGVkIGNyZWF0ZUpwU2ltcGxlVG9rZW48VCBleHRlbmRzIElXb3JkRGVidWc+KGRhdGE6IFQsIHR5cGU6IEVudW1KcFNpbXBsZVRva2VuaXplclR5cGUpXG5cdHtcblx0XHRyZXR1cm4gc3VwZXIuZGVidWdUb2tlbihkYXRhLCB7XG5cdFx0XHRbdGhpcy5uYW1lXTogdHlwZSxcblx0XHR9LCB0cnVlKTtcblx0fVxuXG5cdHByb3RlY3RlZCBfc3BsaXRUZXh0KHRleHQ6IHN0cmluZyk6IElXb3JkW11cblx0e1xuXHRcdC8vY29uc3QgUE9TVEFHID0gdGhpcy5zZWdtZW50LlBPU1RBRztcblxuXHRcdGxldCBzZWxmID0gdGhpcztcblxuXHRcdGxldCBiMSA9IC9b44GBLeOCk10vLnRlc3QodGV4dCk7XG5cdFx0bGV0IGIyID0gL1vjgqEt44O044O8772xLe++ne++nu+9sF0vLnRlc3QodGV4dCk7XG5cblx0XHRpZiAoIWIxIHx8ICFiMilcblx0XHR7XG5cdFx0XHRpZiAoYjEgJiYgL15b44GBLeOCk10rJC8udGVzdCh0ZXh0KSB8fCBiMiAmJiAvXlvjgqEt44O044O8772xLe++ne++nu+9sF0rJC8udGVzdCh0ZXh0KSlcblx0XHRcdHtcblx0XHRcdFx0cmV0dXJuIFtzZWxmLmNyZWF0ZUpwU2ltcGxlVG9rZW4oe1xuXHRcdFx0XHRcdHc6IHRleHQsXG5cdFx0XHRcdH0sIGIxID8gRW51bUpwU2ltcGxlVG9rZW5pemVyVHlwZS5ISVJBR0FOQSA6IEVudW1KcFNpbXBsZVRva2VuaXplclR5cGUuS0FUQUtBTkFcblx0XHRcdFx0KV07XG5cdFx0XHR9XG5cblx0XHRcdHJldHVybiBudWxsO1xuXHRcdH1cblxuXHRcdGxldCByZXQ6IElXb3JkW10gPSBbXTtcblxuXHRcdHRleHRcblx0XHRcdC5zcGxpdCgvKCg/Olte44KhLeODtOODvO+9sS3vvp3vvp7vvbBdKyk/W+OBgS3jgpNdKyg/PVvjgqEt44O044O8772xLe++ne++nu+9sF0pfCg/Olte44GBLeOCk10rKT9b44KhLeODtOODvO+9sS3vvp3vvp7vvbBdKyg/PVvjgYEt44KTXSkpLylcblx0XHRcdC5mb3JFYWNoKGZ1bmN0aW9uICh3LCBpKVxuXHRcdFx0e1xuXHRcdFx0XHRpZiAodyAhPT0gJycpXG5cdFx0XHRcdHtcblx0XHRcdFx0XHRyZXQucHVzaChzZWxmLmNyZWF0ZUpwU2ltcGxlVG9rZW4oe1xuXHRcdFx0XHRcdFx0dyxcblx0XHRcdFx0XHR9LCAvW+OBgS3jgpNdLy50ZXN0KHcpID8gRW51bUpwU2ltcGxlVG9rZW5pemVyVHlwZS5ISVJBR0FOQVxuXHRcdFx0XHRcdFx0XHQ6IEVudW1KcFNpbXBsZVRva2VuaXplclR5cGUuS0FUQUtBTkFcblx0XHRcdFx0XHQpKTtcblx0XHRcdFx0fVxuXHRcdFx0fSlcblxuXHRcdDtcblxuXHRcdHJldHVybiByZXQ7XG5cdH1cblxufVxuXG5leHBvcnQgY29uc3QgaW5pdCA9IEpwU2ltcGxlVG9rZW5pemVyLmluaXQuYmluZChKcFNpbXBsZVRva2VuaXplcikgYXMgdHlwZW9mIEpwU2ltcGxlVG9rZW5pemVyLmluaXQ7XG5cbmV4cG9ydCBkZWZhdWx0IEpwU2ltcGxlVG9rZW5pemVyO1xuXG4iXX0= |
\ | No newline at end of file |