UNPKG

3.76 kBSource Map (JSON)View Raw
1{"version":3,"file":"JpSimpleTokenizer.js","sourceRoot":"","sources":["JpSimpleTokenizer.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,gCAAyD;AAKzD,IAAkB,yBAYjB;AAZD,WAAkB,yBAAyB;IAE1C;;;OAGG;IACH,iFAAc,CAAA;IACd;;;OAGG;IACH,iFAAc,CAAA;AACf,CAAC,EAZiB,yBAAyB,GAAzB,iCAAyB,KAAzB,iCAAyB,QAY1C;AAED,MAAa,iBAAkB,SAAQ,yBAAmB;IAA1D;;QAIC,SAAI,GAAG,mBAA4B,CAAC;IAyDrC,CAAC;IAvDA,KAAK,CAAC,KAAc,EAAE,GAAG,IAAI;QAE5B,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IACjD,CAAC;IAES,mBAAmB,CAAuB,IAAO,EAAE,IAA+B;QAE3F,OAAO,KAAK,CAAC,UAAU,CAAC,IAAI,EAAE;YAC7B,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI;SACjB,EAAE,IAAI,CAAC,CAAC;IACV,CAAC;IAES,UAAU,CAAC,IAAY;QAEhC,qCAAqC;QAErC,IAAI,IAAI,GAAG,IAAI,CAAC;QAEhB,IAAI,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5B,IAAI,EAAE,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAElC,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,EACd;YACC,IAAI,EAAE,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EACpE;gBACC,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC;wBAChC,CAAC,EAAE,IAAI;qBACP,EAAE,EAAE,CAAC,CAAC,kBAAoC,CAAC,iBAAmC,CAC9E,CAAC,CAAC;aACH;YAED,OAAO,IAAI,CAAC;SACZ;QAED,IAAI,GAAG,GAAY,EAAE,CAAC;QAEtB,IAAI;aACF,KAAK,CAAC,6EAA6E,CAAC;aACpF,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YAEtB,IAAI,CAAC,KAAK,EAAE,EACZ;gBACC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC;oBACjC,CAAC;iBACD,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;oBAClB,CAAC,iBAAmC,CACrC,CAAC,CAAC;aACH;QACF,CAAC,CAAC,CAEF;QAED,OAAO,GAAG,CAAC;IACZ,CAAC;;AA3DF,8CA6DC;AA3DO,sBAAI,GAAG,mBAA4B,CAAC;AA6D/B,QAAA,IAAI,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAkC,CAAC;AAEpG,kBAAe,iBAAiB,CAAC","sourcesContent":["/**\n * Created by user on 2018/4/19/019.\n */\n\nimport { SubSModule, SubSModuleTokenizer } from '../mod';\nimport { Segment, IWord } from '../Segment';\nimport UString from 'uni-string';\nimport { IWordDebug, IWordDebugInfo } from '../util';\n\nexport const enum EnumJpSimpleTokenizerType\n{\n\t/**\n\t * 平仮名\n\t * https://en.wikipedia.org/wiki/Hiragana\n\t */\n\tHIRAGANA = 0x1,\n\t/**\n\t * 片仮名\n\t * https://en.wikipedia.org/wiki/Katakana\n\t */\n\tKATAKANA = 0x2,\n}\n\nexport class JpSimpleTokenizer extends SubSModuleTokenizer\n{\n\tstatic NAME = 'JpSimpleTokenizer' as const;\n\n\tname = 'JpSimpleTokenizer' as const;\n\n\tsplit(words: IWord[], ...argv): IWord[]\n\t{\n\t\treturn this._splitUnset(words, this._splitText);\n\t}\n\n\tprotected createJpSimpleToken<T extends IWordDebug>(data: T, type: EnumJpSimpleTokenizerType)\n\t{\n\t\treturn super.debugToken(data, {\n\t\t\t[this.name]: type,\n\t\t}, true);\n\t}\n\n\tprotected _splitText(text: string): IWord[]\n\t{\n\t\t//const POSTAG = this.segment.POSTAG;\n\n\t\tlet self = this;\n\n\t\tlet b1 = /[ぁ-ん]/.test(text);\n\t\tlet b2 = /[ァ-ヴーア-ン゙ー]/.test(text);\n\n\t\tif (!b1 || !b2)\n\t\t{\n\t\t\tif (b1 && /^[ぁ-ん]+$/.test(text) || b2 && /^[ァ-ヴーア-ン゙ー]+$/.test(text))\n\t\t\t{\n\t\t\t\treturn [self.createJpSimpleToken({\n\t\t\t\t\tw: text,\n\t\t\t\t}, b1 ? EnumJpSimpleTokenizerType.HIRAGANA : EnumJpSimpleTokenizerType.KATAKANA\n\t\t\t\t)];\n\t\t\t}\n\n\t\t\treturn null;\n\t\t}\n\n\t\tlet ret: IWord[] = [];\n\n\t\ttext\n\t\t\t.split(/((?:[^ァ-ヴーア-ン゙ー]+)?[ぁ-ん]+(?=[ァ-ヴーア-ン゙ー])|(?:[^ぁ-ん]+)?[ァ-ヴーア-ン゙ー]+(?=[ぁ-ん]))/)\n\t\t\t.forEach(function (w, i)\n\t\t\t{\n\t\t\t\tif (w !== '')\n\t\t\t\t{\n\t\t\t\t\tret.push(self.createJpSimpleToken({\n\t\t\t\t\t\tw,\n\t\t\t\t\t}, /[ぁ-ん]/.test(w) ? EnumJpSimpleTokenizerType.HIRAGANA\n\t\t\t\t\t\t\t: EnumJpSimpleTokenizerType.KATAKANA\n\t\t\t\t\t));\n\t\t\t\t}\n\t\t\t})\n\n\t\t;\n\n\t\treturn ret;\n\t}\n\n}\n\nexport const init = JpSimpleTokenizer.init.bind(JpSimpleTokenizer) as typeof JpSimpleTokenizer.init;\n\nexport default JpSimpleTokenizer;\n\n"]}
\No newline at end of file