UNPKG

41.7 kBSource Map (JSON)View Raw
1{"version":3,"file":"DictTokenizer.js","sourceRoot":"","sources":["DictTokenizer.ts"],"names":[],"mappings":"AAAA,YAAY,CAAC;;;AAEb,gCAA8E;AAI9E,yCAAiD;AACjD,gDAAsH;AAGtH,wCAAwC;AAG3B,QAAA,uBAAuB,GAAG,EAAE,CAAC;AAC7B,QAAA,2BAA2B,GAAG,EAAE,CAAC;AAE9C;;;;GAIG;AACH,MAAa,aAAc,SAAQ,yBAAmB;IAAtD;;QAGC;;;;;;;;;WASG;QACH,oBAAe,GAAG,+BAAuB,CAAC;QAC1C;;;WAGG;QACH,gCAA2B,GAAG,mCAA2B,CAAC;IAy0B3D,CAAC;IAp0BA,MAAM;QAEL,KAAK,CAAC,MAAM,EAAE,CAAC;QACf,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAC5C,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC9C,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC;QAEnC,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,aAAa,IAAI,QAAQ,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,aAAa,GAAG,mCAA2B,EAC7H;YACC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC;SAC1D;QAED,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,aAAa,IAAI,QAAQ,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,aAAa,GAAG,mCAA2B,EAC7H;YACC,IAAI,CAAC,2BAA2B,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC;SACtE;IACF,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,KAAc;QAEnB,eAAe;QACf,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC;QAC1B,8BAA8B;QAE9B,MAAM,IAAI,GAAG,IAAI,CAAC;QAElB,IAAI,GAAG,GAAY,EAAE,CAAC;QACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAC1C;YACC,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,EACd;gBACC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACf,SAAS;aACT;YAED,cAAc;YACd,IAAI,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACvD,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EACvB;gBACC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACf,SAAS;aACT;YAED,YAAY;YACZ,IAAI,KAAK,GAAG,CAAC,CAAC;YAEd,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,EAAE;gBAEhC,IAAI,EAAE,CAAC,CAAC,GAAG,KAAK,EAChB;oBACC,GAAG,CAAC,IAAI,CAAC;wBACR,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC;qBACrC,CAAC,CAAC;iBACH;gBAED,IAAI,EAAE,GAAG,IAAI,CAAC,cAAc,CAAC;oBAC5B,CAAC,EAAE,EAAE,CAAC,CAAC;oBACP,CAAC,EAAE,EAAE,CAAC,CAAC;iBACP,EAAE,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;gBAEhB,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAEb;;;;;;;kBAOE;gBACF,KAAK,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;YAC5B,CAAC,CAAC,CAAC;YAEH,IAAI,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAC7C,IAAI,QAAQ,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,MAAM,EAClD;gBACC,IAAI,EAAE,GAAG,IAAI,CAAC,cAAc,CAAC;oBAC5B,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC;iBAChD,CAAC,CAAC;gBAEH,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;aACb;SACD;QAED,KAAK,GAAG,SAAS,CAAC;QAElB,OAAO,GAAG,CAAC;IACZ,CAAC;IAED,oEAAoE;IAEpE;;;;;;;OAOG;IACO,SAAS,CAAC,IAAY,EAAE,GAAW,EAAE,OAAc;QAE5D,IAAI,KAAK,CAAC,GAAG,CAAC;YAAE,GAAG,GAAG,CAAC,CAAC;QACxB,IAAI,GAAG,GAAY,EAAE,CAAC;QACtB,IAAI,CAAC,GAAG,KAAK,CAAC;QAEd,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC;QAE5B,YAAY;QACZ,OAAO,GAAG,GAAG,IAAI,CAAC,MAAM,EACxB;YACC,KAAK,IAAI,CAAC,IAAI,MAAM,EACpB;gBACC,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,CAAkB,CAAC,CAAC;gBAC7C,IAAI,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,EAClB;oBACC,GAAG,CAAC,IAAI,CAAC;wBACR,CAAC,EAAE,CAAC;wBACJ,CAAC,EAAE,GAAG;wBACN,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;qBACjB,CAAC,CAAC;iBACH;aACD;YACD,GAAG,EAAE,CAAC;SACN;QAED,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IAC5C,CAAC;IAED;;;;;;;OAOG;IACO,UAAU,CAAC,KAAc,EAAE,OAAc,EAAE,IAAY;QAEhE,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC;QAC1B,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC;QAC5B,IAAI,GAAG,GAAY,EAAE,CAAC;QAEtB,WAAW;QACX,IAAI,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QAC3C,iBAAiB;QAEjB;;;;;;;;;WASG;QACH,IAAI,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC9C,gBAAgB;QAChB,IAAI,MAAM,GAAsB,EAAE,CAAC,CAAE,MAAM;QAE3C,sBAAsB;QAEtB,YAAY;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,KAAc,EAAE,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EACtD;YACC,MAAM,CAAC,CAAC,CAAC,GAAG;gBACX,CAAC,EAAE,KAAK,CAAC,MAAM;gBACf,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBACJ,CAAC,EAAE,CAAC;gBAEJ,KAAK,EAAE,CAAC;aACR,CAAC;YACF,QAAQ;YACR,IAAI,EAAE,GAAG,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;YACpC,cAAc;YACd,IAAI,OAAO,GAAG,KAAK,CAAC,CAAE,SAAS;YAE/B,QAAQ;YACR,IAAI,IAAW,CAAC;YAEhB,IAAI,OAAO,EACX;gBACC;;;;;;;kBAOE;gBAEF,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;aAEpC;iBAED;gBACC,IAAI,GAAG,IAAI,CAAC;aACZ;YACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAQ,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAC3C;gBACC,IAAI,CAAC,CAAC,CAAC,IAAI,KAAK,EAChB;oBACC,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBACnB,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAG,MAAM;oBAE5B,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EAC7C;wBACC;;2BAEG;wBACH,OAAO,GAAG,IAAI,CAAC;qBACf;oBAED,8CAA8C;oBAC9C,IAAI,IAAI,EACR;wBACC,2BAA2B;wBAC3B,IACC,CAAC,IAAI,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;;gCAErB,CACC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;uCACf,CAAC,CAAC,CAAC,IAAI,gBAAQ,CAClB,EAEF;4BACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;yBACd;wBAED,WAAW;wBACX,IAAI,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,EACpB;4BACC,OAAO,GAAG,IAAI,CAAC;4BACf,iBAAiB;4BACjB,gCAAgC;4BAChC,gBAAgB;4BAEhB;;;;;;8BAME;4BAEF,kBAAkB;4BAClB,IAAI,IAAI,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,EACvB;gCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;6BACd;yBACD;wBACD,qCAAqC;wBACrC,IAAI,CACF,CAAC,IAAI,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;+BACnB,CAAC,IAAI,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;+BACtB,CAAC,IAAI,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,CACxB;4BACD,CACC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;mCACf,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;mCACnB,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;mCACnB,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;mCACnB,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,CACtB,EACF;4BACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;yBACd;wBACD,oBAAoB;wBACpB,IACC,CAAC,IAAI,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;;gCAErB,CACC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;uCACf,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,CACtB,EACF;4BACC,iBAAiB;4BACjB,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;yBACd;wBACD,iBAAiB;wBACjB,IACC,CACC,IAAI,CAAC,CAAC,IAAI,yBAAa;+BACpB,IAAI,CAAC,CAAC,IAAI,yBAAa,CAC1B;4BACD,CACC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;mCACf,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,CACtB,EACF;4BACC,iBAAiB;4BACjB,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;yBACd;wBAED;;2BAEG;wBACH,IAAI,iBAAS,CAAC,IAAI,CAAC,CAAC,EACjB,MAAM,CAAC,GAAG,EACV,MAAM,CAAC,IAAI,CACb,IAAI,iBAAS,CAAC,CAAC,CAAC,CAAC,EACf,MAAM,CAAC,GAAG,CACZ,EACD;4BACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;yBACnB;wBAED,SAAS;wBACT,IAAI,KAAK,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;wBACzB,IAAI,KAAK,EACT;4BACC,IAAI,KAAK,CAAC,CAAC,IAAI,KAAK,EACpB;gCACC,KAAK,CAAC,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;6BAC3B;4BAED,IAAI,QAAQ,GAAY,IAAI,CAAC;4BAE7B;;+BAEG;4BACH,IACC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC;mCACzB,KAAK,CAAC,CAAC,IAAI,CACb,CAAC,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;mCACnB,CAAC,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC;mCACtB,CAAC,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;mCACvB,CAAC,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;mCACvB,CAAC,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;mCACvB,CAAC,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,CAC1B,EACF;gCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;gCACnB,QAAQ,GAAG,KAAK,CAAC;6BACjB;4BACD;;+BAEG;iCACE,IAAI,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EACrC;gCACC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC;gCAEzB,IAAI,IAAI,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,EACtB;oCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oCACd,QAAQ,GAAG,KAAK,CAAC;iCACjB;qCACI,IAAI,CAAC,EACV;oCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;oCACpB,QAAQ,GAAG,KAAK,CAAC;oCAEjB,IAAI,CAAC,GAAG,MAAM,CAAC,GAAG,EAClB;wCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;qCACpB;iCACD;6BACD;4BAED;;+BAEG;4BACH,IAAI,QAAQ,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EAC5D;gCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gCACjB,QAAQ,GAAG,KAAK,CAAC;6BACjB;4BAED,IAAI,QAAQ,IAAI,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,EAC7C;gCACC,IAAI,KAAK,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,IAAI,CAC5B,KAAK,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAClB,EACD;oCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oCAEd,IAAI,IAAI,CAAC,CAAC,KAAK,GAAG,EAClB;wCACC;;2CAEG;wCACH,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;wCACjB,QAAQ,GAAG,KAAK,CAAC;qCACjB;iCACD;6BACD;4BAED,IAAI,QAAQ,IAAI,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,EAChC;gCACC,IAAI,iBAAS,CAAC,IAAI,CAAC,CAAC,EACnB,MAAM,CAAC,GAAG,CACV,IAAI,iBAAS,CAAC,KAAK,CAAC,CAAC,EACrB,MAAM,CAAC,GAAG,EACV,MAAM,CAAC,GAAG,CACV,EACD;oCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oCACd,QAAQ,GAAG,KAAK,CAAC;iCACjB;qCACI,IAAI,iBAAS,CAAC,IAAI,CAAC,CAAC,EACxB,MAAM,CAAC,GAAG,CACV,IAAI,iBAAS,CAAC,KAAK,CAAC,CAAC,EACrB,MAAM,CAAC,GAAG,CACV,EACD;oCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;oCACnB,QAAQ,GAAG,KAAK,CAAC;iCACjB;6BACD;4BAED,sBAAsB;4BACtB,IAAI,KAAK,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,IAAI,iBAAS,CAAC,IAAI,CAAC,CAAC,EAC1D,MAAM,CAAC,IAAI,EACX,MAAM,CAAC,GAAG,CACV,EACD;gCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;6BACd;4BACD,sBAAsB;iCACjB,IACJ,CACC,KAAK,CAAC,CAAC,KAAK,GAAG;mCACZ,KAAK,CAAC,CAAC,KAAK,GAAG,CAClB;mCACE,iBAAS,CAAC,CAAC,CAAC,CAAC,EAChB,MAAM,CAAC,GAAG,CACT,EAEF;gCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;6BACd;4BAED,IACC,CACC,CAAC,CAAC,CAAC,KAAK,GAAG;mCACR,CAAC,CAAC,CAAC,KAAK,GAAG,CACd;mCACE,iBAAS,CAAC,IAAI,CAAC,CAAC,EACnB,MAAM,CAAC,GAAG,CACT;mCACE,iBAAS,CAAC,KAAK,CAAC,CAAC,EACpB,MAAM,CAAC,GAAG,CACT,EAEF;gCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;6BACd;yBACD;6BAED;4BACC,IAAI,QAAQ,GAAY,IAAI,CAAC;4BAE7B;;+BAEG;4BACH,IAAI,QAAQ,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,iBAAS,CAAC,IAAI,CAAC,CAAC,EACrD,MAAM,CAAC,GAAG,CACV,EACD;gCACC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gCACjB,QAAQ,GAAG,KAAK,CAAC;6BACjB;yBACD;qBACD;oBACD,8CAA8C;iBAC9C;qBAED;oBACC,UAAU;oBACV,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;iBACd;gBACD,MAAM;gBACN,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;gBAC5C,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;aAChB;YAED,iBAAiB;YACjB,IAAI,OAAO,KAAK,KAAK;gBAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;YAE1C,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;YACzC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;SACzC;QAED,sBAAsB;QAEtB,OAAO;QACP,IAAI,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAC/B,IAAI,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;QAE5B,IAAI,KAAK,EACT;YACC,sBAAsB;YACtB,sCAAsC;YACtC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC;iBAChC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,GAAG,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,CAAsB,CAAC,EAAE,KAAK,EAAE,CAAA,CAAC,CAAC,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;YACrG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,SAAS,EAAE,CAAC,CAAC;YACvD,mBAAmB;YACnB,yBAAyB;SACzB;QAED,WAAW;QACX,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,IAAW,EAAE,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EACrD;YACC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,EACtB;gBACC,SAAS,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;aACzB;SACD;QACD,GAAG,GAAG,SAAS,CAAC;QAEhB,YAAY;QACZ,MAAM,GAAG,SAAS,CAAC;QACnB,MAAM,GAAG,SAAS,CAAC;QACnB,SAAS,GAAG,SAAS,CAAC;QACtB,GAAG,GAAG,SAAS,CAAC;QAChB,OAAO,GAAG,SAAS,CAAC;QAEpB,aAAa;QACb,OAAO,GAAG,CAAC;IACZ,CAAC;IAED;;;;;OAKG;IACH,OAAO,CAAC,MAAyB;QAEhC,gBAAgB;QAChB,SAAS;QACT,IAAI,GAAG,GAAe;YACrB,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YACd,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YACd,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YACd,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YACd,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;SACd,CAAC;QAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,GAAe,EAAE,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EACrD;YACC,IAAI,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;gBAAE,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAE,UAAU;YAC7C,IAAI,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;gBAAE,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAE,SAAS;YAC5C,IAAI,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;gBAAE,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAE,UAAU;YAC7C,IAAI,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;gBAAE,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAE,UAAU;YAC7C,IAAI,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;gBAAE,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAE,UAAU;SAC7C;QACD,aAAa;QAEb,OAAO;QACP,IAAI,IAAI,GAAa,EAAE,CAAC;QACxB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,GAAe,EAAE,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EACrD;YACC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACZ,WAAW;YACX,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;YACjC,YAAY;YACZ,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;gBAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACjC,YAAY;YACZ,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;gBAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACjC,YAAY;YACZ,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA,iBAAiB;YAC5C,gBAAgB;YAChB,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAE3D,GAAG,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAEpB,8BAA8B;SAC9B;QACD,yBAAyB;QAEzB,oBAAoB;QACpB,sBAAsB;QAEtB,0BAA0B;QAC1B,MAAM,UAAU,GAAG,KAAK,CAAC;QAEzB,SAAS;QACT,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACnB,KAAK,IAAI,CAAC,IAAI,IAAI,EAClB;YACC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAChB,IAAI,CAAC,GAAG,IAAI,EACZ;gBACC,KAAK,GAAG,CAAkB,CAAC;gBAC3B,IAAI,GAAG,CAAC,CAAC;aACT;iBACI,IAAI,CAAC,KAAK,IAAI,EACnB;gBACC;;;;mBAIG;gBACH,IAAI,CAAC,GAAG,CAAC,CAAC;gBACV,IAAI,CAAC,GAAG,CAAC,CAAC;gBACV,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EACjC;oBACC,CAAC,EAAE,CAAC;iBACJ;qBACI,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EACxC;oBACC,CAAC,EAAE,CAAC;iBACJ;gBACD,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EACjC;oBACC,CAAC,EAAE,CAAC;iBACJ;qBACI,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EACxC;oBACC,CAAC,EAAE,CAAC;iBACJ;gBACD,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EACjC;oBACC,CAAC,EAAE,CAAC;iBACJ;qBACI,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EACxC;oBACC,CAAC,EAAE,CAAC;iBACJ;gBACD,IAAI,CAAC,GAAG,CAAC,EACT;oBACC,KAAK,GAAG,CAAkB,CAAC;oBAC3B,IAAI,GAAG,CAAC,CAAC;iBACT;aACD;YACD,+BAA+B;SAC/B;QACD,kDAAkD;QAElD,MAAM,GAAG,SAAS,CAAC;QACnB,GAAG,GAAG,SAAS,CAAC;QAEhB,OAAO,KAAK,CAAC;IACd,CAAC;IAED;;;;;;OAMG;IACH,UAAU,CAAC,KAAc,EAAE,IAAY;QAItC,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,WAAW;QACX,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAC1C;YACC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EACpB;gBACC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;aACrB;YACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SAC3B;QACD,kBAAkB;QAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EACpC;YACC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EACf;gBACC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;aACjD;SACD;QAED,OAAO,OAAO,CAAC;IAChB,CAAC;IAED;;;;;;;;OAQG;IACH,SAAS,CAAC,OAET,EAAE,GAAW,EAAE,IAAa,EAAE,WAAW,GAAG,CAAC,EAAE,eAAwB;QAGvE;;;WAGG;QACH,IAAI,WAAW,KAAK,CAAC,EACrB;YACC,eAAe,GAAG,IAAI,CAAC,eAAe,CAAC;YAEvC;;eAEG;YACH,IAAI,IAAI,CAAC,MAAM,GAAG,eAAe,EACjC;gBACC,eAAe,IAAI,CAAC,CAAC;aACrB;SACD;aACI,IAAI,eAAe,IAAI,IAAI,CAAC,eAAe,EAChD;YACC,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,eAAe,GAAG,CAAC,EAAE,IAAI,CAAC,2BAA2B,EAAE,mCAA2B,CAAC,CAAA;SAC9G;aAED;YACC,4GAA4G;SAC5G;QAED;;;;WAIG;QACH,IAAI,CAAmB,CAAC;QACxB,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,EACnC;YACC,IAAI,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YACpC,IAAI,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YAEjC,IAAI,IAAI,GAAG;gBACV,CAAC,EAAE,EAAE;gBACL,CAAC,EAAE,GAAG;gBACN,CAAC,EAAE,CAAC;aACK,CAAC;YAEX,IAAI,IAAI,GAAc,EAAE,CAAC;YAEzB,IAAI,EAAE,KAAK,EAAE,EACb;gBACC,IAAI,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,GAAG,EAAE,CAAC,MAAM,EAAE,EAAE,EAAE,WAAW,EAAE,eAAe,CAAC,CAAC;gBAExF,KAAK,IAAI,EAAE,IAAI,MAAM,EACrB;oBACC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;iBAC7B;aACD;iBAED;gBACC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;aAClB;YAEJ,0BAA0B;YAC1B,EAAE;YACF,sBAAsB;YACtB,EAAE;YACF,2CAA2C;YAExC,OAAO,IAAI,CAAC;SACZ;QAED,WAAW,EAAE,CAAC;QAEd,IAAI,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAE/B,sCAAsC;QAExC,WAAW;QACX,iBAAiB;QACjB,2CAA2C;QAC3C,UAAU;QACV,WAAW;QACX,OAAO;QAEL,wBAAwB;QACxB,gBAAgB;QAChB,oBAAoB;QAEpB,IAAI,GAAG,GAAc,EAAE,CAAC;QACxB,KAAK,IAAI,IAAI,IAAI,KAAK,EACtB;YACC,cAAc;YACd,IAAI,OAAO,GAAG,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;YACrC;;eAEG;YACH,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,EACrB;gBACC,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;aACjB;iBACI,IAAI,WAAW,GAAG,eAAe,EACtC;gBACC,eAAe;gBAEnB,uCAAuC;gBACvC,6BAA6B;gBAEzB,IAAI,EAAE,GAAY,CAAC,IAAI,CAAC,CAAC;gBAEzB,IAAI,CAAC,GAAG,OAAO,CAAC;gBAChB,OAAO,CAAC,IAAI,OAAO,EACnB;oBACC,IAAI,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBAEvB,IAAI,EAAE,EACN;wBACC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;wBAEZ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;qBACjB;yBAED;wBACC,MAAM;qBACN;iBACD;gBAED,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;aACb;iBAED;gBACC,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;gBAElC,IAAI,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,WAAW,EAAE,eAAe,CAAE,CAAC;gBAChF,KAAK,IAAI,EAAE,IAAI,MAAM,EACrB;oBACC,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;iBAC5B;gBAED,MAAM,GAAG,IAAI,CAAC;aACd;SACD;QAED,KAAK,GAAG,SAAS,CAAC;QAClB,OAAO,GAAG,SAAS,CAAC;QACpB,CAAC,GAAG,SAAS,CAAC;QAEd,OAAO,GAAG,CAAC;IACZ,CAAC;CACD;AA31BD,sCA21BC;AAkDY,QAAA,IAAI,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAuC,CAAC;AAEjG,kBAAe,aAAa,CAAC","sourcesContent":["'use strict';\n\nimport { SubSModule, SubSModuleTokenizer, ISubTokenizerCreate } from '../mod';\n// @ts-ignore\nimport { UString } from 'uni-string';\nimport { ITableDictRow } from '../table/dict';\nimport { hexAndAny, toHex } from '../util/index';\nimport CHS_NAMES, { FAMILY_NAME_1, FAMILY_NAME_2, SINGLE_NAME, DOUBLE_NAME_1, DOUBLE_NAME_2 } from '../mod/CHS_NAMES';\nimport Segment, { IDICT, IWord, IDICT2 } from '../Segment';\nimport { debug } from '../util';\nimport { DATETIME } from '../mod/const';\nimport IPOSTAG from '../POSTAG';\n\nexport const DEFAULT_MAX_CHUNK_COUNT = 40;\nexport const DEFAULT_MAX_CHUNK_COUNT_MIN = 30;\n\n/**\n * 字典识别模块\n *\n * @author 老雷<leizongmin@gmail.com>\n */\nexport class DictTokenizer extends SubSModuleTokenizer\n{\n\n\t/**\n\t * 防止因無分段導致分析過久甚至超過處理負荷\n\t * 越高越精準但是處理時間會加倍成長甚至超過記憶體能處理的程度\n\t *\n\t * 數字越小越快\n\t *\n\t * FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap out of memory\n\t *\n\t * @type {number}\n\t */\n\tMAX_CHUNK_COUNT = DEFAULT_MAX_CHUNK_COUNT;\n\t/**\n\t *\n\t * 追加新模式使 MAX_CHUNK_COUNT 遞減來防止無分段長段落的總處理次數過高 由 DEFAULT_MAX_CHUNK_COUNT_MIN 來限制最小值\n\t */\n\tDEFAULT_MAX_CHUNK_COUNT_MIN = DEFAULT_MAX_CHUNK_COUNT_MIN;\n\n\tprotected _TABLE: IDICT<IWord>;\n\tprotected _TABLE2: IDICT2<IWord>;\n\n\t_cache()\n\t{\n\t\tsuper._cache();\n\t\tthis._TABLE = this.segment.getDict('TABLE');\n\t\tthis._TABLE2 = this.segment.getDict('TABLE2');\n\t\tthis._POSTAG = this.segment.POSTAG;\n\n\t\tif (typeof this.segment.options.maxChunkCount == 'number' && this.segment.options.maxChunkCount > DEFAULT_MAX_CHUNK_COUNT_MIN)\n\t\t{\n\t\t\tthis.MAX_CHUNK_COUNT = this.segment.options.maxChunkCount;\n\t\t}\n\n\t\tif (typeof this.segment.options.minChunkCount == 'number' && this.segment.options.minChunkCount > DEFAULT_MAX_CHUNK_COUNT_MIN)\n\t\t{\n\t\t\tthis.DEFAULT_MAX_CHUNK_COUNT_MIN = this.segment.options.minChunkCount;\n\t\t}\n\t}\n\n\t/**\n\t * 对未识别的单词进行分词\n\t *\n\t * @param {array} words 单词数组\n\t * @return {array}\n\t */\n\tsplit(words: IWord[]): IWord[]\n\t{\n\t\t//debug(words);\n\t\tconst TABLE = this._TABLE;\n\t\t//const POSTAG = this._POSTAG;\n\n\t\tconst self = this;\n\n\t\tlet ret: IWord[] = [];\n\t\tfor (let i = 0, word; word = words[i]; i++)\n\t\t{\n\t\t\tif (word.p > 0)\n\t\t\t{\n\t\t\t\tret.push(word);\n\t\t\t\tcontinue;\n\t\t\t}\n\n\t\t\t// 仅对未识别的词进行匹配\n\t\t\tlet wordinfo = this.matchWord(word.w, 0, words[i - 1]);\n\t\t\tif (wordinfo.length < 1)\n\t\t\t{\n\t\t\t\tret.push(word);\n\t\t\t\tcontinue;\n\t\t\t}\n\n\t\t\t// 分离出已识别的单词\n\t\t\tlet lastc = 0;\n\n\t\t\twordinfo.forEach(function (bw, ui)\n\t\t\t{\n\t\t\t\tif (bw.c > lastc)\n\t\t\t\t{\n\t\t\t\t\tret.push({\n\t\t\t\t\t\tw: word.w.substr(lastc, bw.c - lastc),\n\t\t\t\t\t});\n\t\t\t\t}\n\n\t\t\t\tlet cw = self.createRawToken({\n\t\t\t\t\tw: bw.w,\n\t\t\t\t\tf: bw.f,\n\t\t\t\t}, TABLE[bw.w]);\n\n\t\t\t\tret.push(cw);\n\n\t\t\t\t/*\n\t\t\t\tret.push({\n\t\t\t\t\tw: bw.w,\n\t\t\t\t\tp: ww.p,\n\t\t\t\t\tf: bw.f,\n\t\t\t\t\ts: ww.s,\n\t\t\t\t});\n\t\t\t\t*/\n\t\t\t\tlastc = bw.c + bw.w.length;\n\t\t\t});\n\n\t\t\tlet lastword = wordinfo[wordinfo.length - 1];\n\t\t\tif (lastword.c + lastword.w.length < word.w.length)\n\t\t\t{\n\t\t\t\tlet cw = self.createRawToken({\n\t\t\t\t\tw: word.w.substr(lastword.c + lastword.w.length),\n\t\t\t\t});\n\n\t\t\t\tret.push(cw);\n\t\t\t}\n\t\t}\n\n\t\twords = undefined;\n\n\t\treturn ret;\n\t}\n\n\t// =================================================================\n\n\t/**\n\t * 匹配单词,返回相关信息\n\t *\n\t * @param {string} text 文本\n\t * @param {int} cur 开始位置\n\t * @param {object} preword 上一个单词\n\t * @return {array} 返回格式 {w: '单词', c: 开始位置}\n\t */\n\tprotected matchWord(text: string, cur: number, preword: IWord)\n\t{\n\t\tif (isNaN(cur)) cur = 0;\n\t\tlet ret: IWord[] = [];\n\t\tlet s = false;\n\n\t\tconst TABLE2 = this._TABLE2;\n\n\t\t// 匹配可能出现的单词\n\t\twhile (cur < text.length)\n\t\t{\n\t\t\tfor (let i in TABLE2)\n\t\t\t{\n\t\t\t\tlet w = text.substr(cur, i as any as number);\n\t\t\t\tif (w in TABLE2[i])\n\t\t\t\t{\n\t\t\t\t\tret.push({\n\t\t\t\t\t\tw: w,\n\t\t\t\t\t\tc: cur,\n\t\t\t\t\t\tf: TABLE2[i][w].f,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t\tcur++;\n\t\t}\n\n\t\treturn this.filterWord(ret, preword, text);\n\t}\n\n\t/**\n\t * 选择最有可能匹配的单词\n\t *\n\t * @param {array} words 单词信息数组\n\t * @param {object} preword 上一个单词\n\t * @param {string} text 本节要分词的文本\n\t * @return {array}\n\t */\n\tprotected filterWord(words: IWord[], preword: IWord, text: string)\n\t{\n\t\tconst TABLE = this._TABLE;\n\t\tconst POSTAG = this._POSTAG;\n\t\tlet ret: IWord[] = [];\n\n\t\t// 将单词按位置分组\n\t\tlet wordpos = this.getPosInfo(words, text);\n\t\t//debug(wordpos);\n\n\t\t/**\n\t\t * 使用类似于MMSG的分词算法\n\t\t * 找出所有分词可能,主要根据一下几项来评价:\n\t\t * x、词数量最少;\n\t\t * a、词平均频率最大;\n\t\t * b、每个词长度标准差最小;\n\t\t * c、未识别词最少;\n\t\t * d、符合语法结构项:如两个连续的动词减分,数词后面跟量词加分;\n\t\t * 取以上几项综合排名最最好的\n\t\t */\n\t\tlet chunks = this.getChunks(wordpos, 0, text);\n\t\t//debug(chunks);\n\t\tlet assess: Array<IAssessRow> = []; // 评价表\n\n\t\t//console.log(chunks);\n\n\t\t// 对各个分支就行评估\n\t\tfor (let i = 0, chunk: IWord[]; chunk = chunks[i]; i++)\n\t\t{\n\t\t\tassess[i] = {\n\t\t\t\tx: chunk.length,\n\t\t\t\ta: 0,\n\t\t\t\tb: 0,\n\t\t\t\tc: 0,\n\t\t\t\td: 0,\n\n\t\t\t\tindex: i,\n\t\t\t};\n\t\t\t// 词平均长度\n\t\t\tlet sp = text.length / chunk.length;\n\t\t\t// 句子经常包含的语法结构\n\t\t\tlet has_D_V = false; // 是否包含动词\n\n\t\t\t// 遍历各个词\n\t\t\tlet prew: IWord;\n\n\t\t\tif (preword)\n\t\t\t{\n\t\t\t\t/*\n\t\t\t\tprew = {\n\t\t\t\t\tw: preword.w,\n\t\t\t\t\tp: preword.p,\n\t\t\t\t\tf: preword.f,\n\t\t\t\t\ts: preword.s,\n\t\t\t\t}\n\t\t\t\t*/\n\n\t\t\t\tprew = this.createRawToken(preword);\n\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tprew = null;\n\t\t\t}\n\t\t\tfor (let j = 0, w: IWord; w = chunk[j]; j++)\n\t\t\t{\n\t\t\t\tif (w.w in TABLE)\n\t\t\t\t{\n\t\t\t\t\tw.p = TABLE[w.w].p;\n\t\t\t\t\tassess[i].a += w.f; // 总词频\n\n\t\t\t\t\tif (j === 0 && !preword && (w.p & POSTAG.D_V))\n\t\t\t\t\t{\n\t\t\t\t\t\t/**\n\t\t\t\t\t\t * 將第一個字也計算進去是否包含動詞\n\t\t\t\t\t\t */\n\t\t\t\t\t\thas_D_V = true;\n\t\t\t\t\t}\n\n\t\t\t\t\t// ================ 检查语法结构 ===================\n\t\t\t\t\tif (prew)\n\t\t\t\t\t{\n\t\t\t\t\t\t// 如果上一个词是数词且当前词是量词(单位),则加分\n\t\t\t\t\t\tif (\n\t\t\t\t\t\t\t(prew.p & POSTAG.A_M)\n\t\t\t\t\t\t\t&&\n\t\t\t\t\t\t\t(\n\t\t\t\t\t\t\t\t(w.p & POSTAG.A_Q)\n\t\t\t\t\t\t\t\t|| w.w in DATETIME\n\t\t\t\t\t\t\t)\n\t\t\t\t\t\t)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tassess[i].d++;\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\t// 如果当前词是动词\n\t\t\t\t\t\tif (w.p & POSTAG.D_V)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\thas_D_V = true;\n\t\t\t\t\t\t\t// 如果是连续的两个动词,则减分\n\t\t\t\t\t\t\t//if ((prew.p & POSTAG.D_V) > 0)\n\t\t\t\t\t\t\t//assess[i].d--;\n\n\t\t\t\t\t\t\t/*\n\t\t\t\t\t\t\t// 如果是 形容词 + 动词,则加分\n\t\t\t\t\t\t\tif ((prew.p & POSTAG.D_A))\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tassess[i].d++;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t*/\n\n\t\t\t\t\t\t\t// 如果是 副词 + 动词,则加分\n\t\t\t\t\t\t\tif (prew.p & POSTAG.D_D)\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tassess[i].d++;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t\t// 如果是地区名、机构名或形容词,后面跟地区、机构、代词、名词等,则加分\n\t\t\t\t\t\tif ((\n\t\t\t\t\t\t\t\t(prew.p & POSTAG.A_NS)\n\t\t\t\t\t\t\t\t|| (prew.p & POSTAG.A_NT)\n\t\t\t\t\t\t\t\t|| (prew.p & POSTAG.D_A)\n\t\t\t\t\t\t\t) &&\n\t\t\t\t\t\t\t(\n\t\t\t\t\t\t\t\t(w.p & POSTAG.D_N)\n\t\t\t\t\t\t\t\t|| (w.p & POSTAG.A_NR)\n\t\t\t\t\t\t\t\t|| (w.p & POSTAG.A_NS)\n\t\t\t\t\t\t\t\t|| (w.p & POSTAG.A_NZ)\n\t\t\t\t\t\t\t\t|| (w.p & POSTAG.A_NT)\n\t\t\t\t\t\t\t))\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tassess[i].d++;\n\t\t\t\t\t\t}\n\t\t\t\t\t\t// 如果是 方位词 + 数量词,则加分\n\t\t\t\t\t\tif (\n\t\t\t\t\t\t\t(prew.p & POSTAG.D_F)\n\t\t\t\t\t\t\t&&\n\t\t\t\t\t\t\t(\n\t\t\t\t\t\t\t\t(w.p & POSTAG.A_M)\n\t\t\t\t\t\t\t\t|| (w.p & POSTAG.D_MQ)\n\t\t\t\t\t\t\t))\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t//debug(prew, w);\n\t\t\t\t\t\t\tassess[i].d++;\n\t\t\t\t\t\t}\n\t\t\t\t\t\t// 如果是 姓 + 名词,则加分\n\t\t\t\t\t\tif (\n\t\t\t\t\t\t\t(\n\t\t\t\t\t\t\t\tprew.w in FAMILY_NAME_1\n\t\t\t\t\t\t\t\t|| prew.w in FAMILY_NAME_2\n\t\t\t\t\t\t\t) &&\n\t\t\t\t\t\t\t(\n\t\t\t\t\t\t\t\t(w.p & POSTAG.D_N)\n\t\t\t\t\t\t\t\t|| (w.p & POSTAG.A_NZ)\n\t\t\t\t\t\t\t))\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\t//debug(prew, w);\n\t\t\t\t\t\t\tassess[i].d++;\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\t/**\n\t\t\t\t\t\t * 地名/处所 + 方位\n\t\t\t\t\t\t */\n\t\t\t\t\t\tif (hexAndAny(prew.p\n\t\t\t\t\t\t\t, POSTAG.D_S\n\t\t\t\t\t\t\t, POSTAG.A_NS,\n\t\t\t\t\t\t) && hexAndAny(w.p\n\t\t\t\t\t\t\t, POSTAG.D_F,\n\t\t\t\t\t\t))\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tassess[i].d += 0.5;\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\t// 探测下一个词\n\t\t\t\t\t\tlet nextw = chunk[j + 1];\n\t\t\t\t\t\tif (nextw)\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tif (nextw.w in TABLE)\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tnextw.p = TABLE[nextw.w].p;\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tlet _temp_ok: boolean = true;\n\n\t\t\t\t\t\t\t/**\n\t\t\t\t\t\t\t * 如果当前是“的”+ 名词,则加分\n\t\t\t\t\t\t\t */\n\t\t\t\t\t\t\tif (\n\t\t\t\t\t\t\t\t(w.w === '的' || w.w === '之')\n\t\t\t\t\t\t\t\t&& nextw.p && (\n\t\t\t\t\t\t\t\t\t(nextw.p & POSTAG.D_N)\n\t\t\t\t\t\t\t\t\t|| (nextw.p & POSTAG.D_V)\n\t\t\t\t\t\t\t\t\t|| (nextw.p & POSTAG.A_NR)\n\t\t\t\t\t\t\t\t\t|| (nextw.p & POSTAG.A_NS)\n\t\t\t\t\t\t\t\t\t|| (nextw.p & POSTAG.A_NZ)\n\t\t\t\t\t\t\t\t\t|| (nextw.p & POSTAG.A_NT)\n\t\t\t\t\t\t\t\t))\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tassess[i].d += 1.5;\n\t\t\t\t\t\t\t\t_temp_ok = false;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t/**\n\t\t\t\t\t\t\t * 如果是连词,前后两个词词性相同则加分\n\t\t\t\t\t\t\t */\n\t\t\t\t\t\t\telse if (prew.p && (w.p & POSTAG.D_C))\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tlet p = prew.p & nextw.p;\n\n\t\t\t\t\t\t\t\tif (prew.p === nextw.p)\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\tassess[i].d++;\n\t\t\t\t\t\t\t\t\t_temp_ok = false;\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\telse if (p)\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\tassess[i].d += 0.25;\n\t\t\t\t\t\t\t\t\t_temp_ok = false;\n\n\t\t\t\t\t\t\t\t\tif (p & POSTAG.D_N)\n\t\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\tassess[i].d += 0.75;\n\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\t/**\n\t\t\t\t\t\t\t * 在感動的重逢中有余在的話就太過閃耀\n\t\t\t\t\t\t\t */\n\t\t\t\t\t\t\tif (_temp_ok && (w.p & POSTAG.D_R) && (nextw.p & POSTAG.D_P))\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tassess[i].d += 1;\n\t\t\t\t\t\t\t\t_temp_ok = false;\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tif (_temp_ok && nextw.p && (w.p & POSTAG.D_P))\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tif (nextw.p & POSTAG.A_NR && (\n\t\t\t\t\t\t\t\t\tnextw.w.length > 1\n\t\t\t\t\t\t\t\t))\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\tassess[i].d++;\n\n\t\t\t\t\t\t\t\t\tif (prew.w === '的')\n\t\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\t\t/**\n\t\t\t\t\t\t\t\t\t\t * 的 + 介詞 + 人名\n\t\t\t\t\t\t\t\t\t\t */\n\t\t\t\t\t\t\t\t\t\tassess[i].d += 1;\n\t\t\t\t\t\t\t\t\t\t_temp_ok = false;\n\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tif (_temp_ok && w.p & POSTAG.D_P)\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tif (hexAndAny(prew.p,\n\t\t\t\t\t\t\t\t\tPOSTAG.D_N,\n\t\t\t\t\t\t\t\t) && hexAndAny(nextw.p,\n\t\t\t\t\t\t\t\t\tPOSTAG.D_N,\n\t\t\t\t\t\t\t\t\tPOSTAG.D_V,\n\t\t\t\t\t\t\t\t))\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\tassess[i].d++;\n\t\t\t\t\t\t\t\t\t_temp_ok = false;\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\telse if (hexAndAny(prew.p,\n\t\t\t\t\t\t\t\t\tPOSTAG.D_R,\n\t\t\t\t\t\t\t\t) && hexAndAny(nextw.p,\n\t\t\t\t\t\t\t\t\tPOSTAG.D_R,\n\t\t\t\t\t\t\t\t))\n\t\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\t\tassess[i].d += 0.5;\n\t\t\t\t\t\t\t\t\t_temp_ok = false;\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\t// @FIXME 暴力解決 三天后 的問題\n\t\t\t\t\t\t\tif (nextw.w === '后' && w.p & POSTAG.D_T && hexAndAny(prew.p,\n\t\t\t\t\t\t\t\tPOSTAG.D_MQ,\n\t\t\t\t\t\t\t\tPOSTAG.A_M,\n\t\t\t\t\t\t\t))\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tassess[i].d++;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t// @FIXME 到湖中間后手終於能休息了\n\t\t\t\t\t\t\telse if (\n\t\t\t\t\t\t\t\t(\n\t\t\t\t\t\t\t\t\tnextw.w === '后'\n\t\t\t\t\t\t\t\t\t|| nextw.w === '後'\n\t\t\t\t\t\t\t\t)\n\t\t\t\t\t\t\t\t&& hexAndAny(w.p,\n\t\t\t\t\t\t\t\tPOSTAG.D_F,\n\t\t\t\t\t\t\t\t)\n\t\t\t\t\t\t\t)\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tassess[i].d++;\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tif (\n\t\t\t\t\t\t\t\t(\n\t\t\t\t\t\t\t\t\tw.w === '后'\n\t\t\t\t\t\t\t\t\t|| w.w === '後'\n\t\t\t\t\t\t\t\t)\n\t\t\t\t\t\t\t\t&& hexAndAny(prew.p,\n\t\t\t\t\t\t\t\tPOSTAG.D_F,\n\t\t\t\t\t\t\t\t)\n\t\t\t\t\t\t\t\t&& hexAndAny(nextw.p,\n\t\t\t\t\t\t\t\tPOSTAG.D_N,\n\t\t\t\t\t\t\t\t)\n\t\t\t\t\t\t\t)\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tassess[i].d++;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t\telse\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\tlet _temp_ok: boolean = true;\n\n\t\t\t\t\t\t\t/**\n\t\t\t\t\t\t\t * 她把荷包蛋摆在像是印度烤饼的面包上\n\t\t\t\t\t\t\t */\n\t\t\t\t\t\t\tif (_temp_ok && (w.p & POSTAG.D_F) && hexAndAny(prew.p,\n\t\t\t\t\t\t\t\tPOSTAG.D_N,\n\t\t\t\t\t\t\t))\n\t\t\t\t\t\t\t{\n\t\t\t\t\t\t\t\tassess[i].d += 1;\n\t\t\t\t\t\t\t\t_temp_ok = false;\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t\t// ===========================================\n\t\t\t\t}\n\t\t\t\telse\n\t\t\t\t{\n\t\t\t\t\t// 未识别的词数量\n\t\t\t\t\tassess[i].c++;\n\t\t\t\t}\n\t\t\t\t// 标准差\n\t\t\t\tassess[i].b += Math.pow(sp - w.w.length, 2);\n\t\t\t\tprew = chunk[j];\n\t\t\t}\n\n\t\t\t// 如果句子中包含了至少一个动词\n\t\t\tif (has_D_V === false) assess[i].d -= 0.5;\n\n\t\t\tassess[i].a = assess[i].a / chunk.length;\n\t\t\tassess[i].b = assess[i].b / chunk.length;\n\t\t}\n\n\t\t//console.dir(assess);\n\n\t\t// 计算排名\n\t\tlet top = this.getTops(assess);\n\t\tlet currchunk = chunks[top];\n\n\t\tif (false)\n\t\t{\n\t\t\t//console.log(assess);\n\t\t\t//console.log(Object.entries(chunks));\n\t\t\tconsole.dir(Object.entries(chunks)\n\t\t\t\t.map(([i, chunk]) => { return { i, asses: assess[i as unknown as number], chunk } }), { depth: 5 });\n\t\t\tconsole.dir({ i: top, asses: assess[top], currchunk });\n\t\t\t//console.log(top);\n\t\t\t//console.log(currchunk);\n\t\t}\n\n\t\t// 剔除不能识别的词\n\t\tfor (let i = 0, word: IWord; word = currchunk[i]; i++)\n\t\t{\n\t\t\tif (!(word.w in TABLE))\n\t\t\t{\n\t\t\t\tcurrchunk.splice(i--, 1);\n\t\t\t}\n\t\t}\n\t\tret = currchunk;\n\n\t\t// 試圖主動清除記憶體\n\t\tassess = undefined;\n\t\tchunks = undefined;\n\t\tcurrchunk = undefined;\n\t\ttop = undefined;\n\t\twordpos = undefined;\n\n\t\t//debug(ret);\n\t\treturn ret;\n\t}\n\n\t/**\n\t * 评价排名\n\t *\n\t * @param {object} assess\n\t * @return {object}\n\t */\n\tgetTops(assess: Array<IAssessRow>)\n\t{\n\t\t//debug(assess);\n\t\t// 取各项最大值\n\t\tlet top: IAssessRow = {\n\t\t\tx: assess[0].x,\n\t\t\ta: assess[0].a,\n\t\t\tb: assess[0].b,\n\t\t\tc: assess[0].c,\n\t\t\td: assess[0].d,\n\t\t};\n\n\t\tfor (let i = 1, ass: IAssessRow; ass = assess[i]; i++)\n\t\t{\n\t\t\tif (ass.a > top.a) top.a = ass.a; // 取最大平均词频\n\t\t\tif (ass.b < top.b) top.b = ass.b; // 取最小标准差\n\t\t\tif (ass.c > top.c) top.c = ass.c; // 取最大未识别词\n\t\t\tif (ass.d < top.d) top.d = ass.d; // 取最小语法分数\n\t\t\tif (ass.x > top.x) top.x = ass.x; // 取最大单词数量\n\t\t}\n\t\t//debug(top);\n\n\t\t// 评估排名\n\t\tlet tops: number[] = [];\n\t\tfor (let i = 0, ass: IAssessRow; ass = assess[i]; i++)\n\t\t{\n\t\t\ttops[i] = 0;\n\t\t\t// 词数量,越小越好\n\t\t\ttops[i] += (top.x - ass.x) * 1.5;\n\t\t\t// 词总频率,越大越好\n\t\t\tif (ass.a >= top.a) tops[i] += 1;\n\t\t\t// 词标准差,越小越好\n\t\t\tif (ass.b <= top.b) tops[i] += 1;\n\t\t\t// 未识别词,越小越好\n\t\t\ttops[i] += (top.c - ass.c);//debug(tops[i]);\n\t\t\t// 符合语法结构程度,越大越好\n\t\t\ttops[i] += (ass.d < 0 ? top.d + ass.d : ass.d - top.d) * 1;\n\n\t\t\tass.score = tops[i];\n\n\t\t\t//debug(tops[i]);debug('---');\n\t\t}\n\t\t//debug(tops.join(' '));\n\n\t\t//console.log(tops);\n\t\t//console.log(assess);\n\n\t\t//const old_method = true;\n\t\tconst old_method = false;\n\n\t\t// 取分数最高的\n\t\tlet curri = 0;\n\t\tlet maxs = tops[0];\n\t\tfor (let i in tops)\n\t\t{\n\t\t\tlet s = tops[i];\n\t\t\tif (s > maxs)\n\t\t\t{\n\t\t\t\tcurri = i as any as number;\n\t\t\t\tmaxs = s;\n\t\t\t}\n\t\t\telse if (s === maxs)\n\t\t\t{\n\t\t\t\t/**\n\t\t\t\t * 如果分数相同,则根据词长度、未识别词个数和平均频率来选择\n\t\t\t\t *\n\t\t\t\t * 如果依然同分,則保持不變\n\t\t\t\t */\n\t\t\t\tlet a = 0;\n\t\t\t\tlet b = 0;\n\t\t\t\tif (assess[i].c < assess[curri].c)\n\t\t\t\t{\n\t\t\t\t\ta++;\n\t\t\t\t}\n\t\t\t\telse if (assess[i].c !== assess[curri].c)\n\t\t\t\t{\n\t\t\t\t\tb++;\n\t\t\t\t}\n\t\t\t\tif (assess[i].a > assess[curri].a)\n\t\t\t\t{\n\t\t\t\t\ta++;\n\t\t\t\t}\n\t\t\t\telse if (assess[i].a !== assess[curri].a)\n\t\t\t\t{\n\t\t\t\t\tb++;\n\t\t\t\t}\n\t\t\t\tif (assess[i].x < assess[curri].x)\n\t\t\t\t{\n\t\t\t\t\ta++;\n\t\t\t\t}\n\t\t\t\telse if (assess[i].x !== assess[curri].x)\n\t\t\t\t{\n\t\t\t\t\tb++;\n\t\t\t\t}\n\t\t\t\tif (a > b)\n\t\t\t\t{\n\t\t\t\t\tcurri = i as any as number;\n\t\t\t\t\tmaxs = s;\n\t\t\t\t}\n\t\t\t}\n\t\t\t//debug({ i, s, maxs, curri });\n\t\t}\n\t\t//debug('max: i=' + curri + ', s=' + tops[curri]);\n\n\t\tassess = undefined;\n\t\ttop = undefined;\n\n\t\treturn curri;\n\t}\n\n\t/**\n\t * 将单词按照位置排列\n\t *\n\t * @param {array} words\n\t * @param {string} text\n\t * @return {object}\n\t */\n\tgetPosInfo(words: IWord[], text: string): {\n\t\t[index: number]: IWord[];\n\t}\n\t{\n\t\tlet wordpos = {};\n\t\t// 将单词按位置分组\n\t\tfor (let i = 0, word; word = words[i]; i++)\n\t\t{\n\t\t\tif (!wordpos[word.c])\n\t\t\t{\n\t\t\t\twordpos[word.c] = [];\n\t\t\t}\n\t\t\twordpos[word.c].push(word);\n\t\t}\n\t\t// 按单字分割文本,填补空缺的位置\n\t\tfor (let i = 0; i < text.length; i++)\n\t\t{\n\t\t\tif (!wordpos[i])\n\t\t\t{\n\t\t\t\twordpos[i] = [{ w: text.charAt(i), c: i, f: 0 }];\n\t\t\t}\n\t\t}\n\n\t\treturn wordpos;\n\t}\n\n\t/**\n\t * 取所有分支\n\t *\n\t * @param {{[p: number]: Segment.IWord[]}} wordpos\n\t * @param {number} pos 当前位置\n\t * @param {string} text 本节要分词的文本\n\t * @param {number} total_count\n\t * @returns {Segment.IWord[][]}\n\t */\n\tgetChunks(wordpos: {\n\t\t[index: number]: IWord[];\n\t}, pos: number, text?: string, total_count = 0, MAX_CHUNK_COUNT?: number): IWord[][]\n\t{\n\n\t\t/**\n\t\t *\n\t\t * 追加新模式使 MAX_CHUNK_COUNT 遞減來防止無分段長段落的總處理次數過高 由 DEFAULT_MAX_CHUNK_COUNT_MIN 來限制最小值\n\t\t */\n\t\tif (total_count === 0)\n\t\t{\n\t\t\tMAX_CHUNK_COUNT = this.MAX_CHUNK_COUNT;\n\n\t\t\t/**\n\t\t\t * 只有當目前文字長度大於 MAX_CHUNK_COUNT 時才遞減\n\t\t\t */\n\t\t\tif (text.length < MAX_CHUNK_COUNT)\n\t\t\t{\n\t\t\t\tMAX_CHUNK_COUNT += 1;\n\t\t\t}\n\t\t}\n\t\telse if (MAX_CHUNK_COUNT <= this.MAX_CHUNK_COUNT)\n\t\t{\n\t\t\tMAX_CHUNK_COUNT = Math.max(MAX_CHUNK_COUNT - 1, this.DEFAULT_MAX_CHUNK_COUNT_MIN, DEFAULT_MAX_CHUNK_COUNT_MIN)\n\t\t}\n\t\telse\n\t\t{\n\t\t\t//MAX_CHUNK_COUNT = Math.max(MAX_CHUNK_COUNT, this.DEFAULT_MAX_CHUNK_COUNT_MIN, DEFAULT_MAX_CHUNK_COUNT_MIN)\n\t\t}\n\n\t\t/**\n\t\t * 忽略連字\n\t\t *\n\t\t * 例如: 啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊\n\t\t */\n\t\tlet m: RegExpMatchArray;\n\t\tif (m = text.match(/^((.+)\\2{5,})/))\n\t\t{\n\t\t\tlet s1 = text.slice(0, m[1].length);\n\t\t\tlet s2 = text.slice(m[1].length);\n\n\t\t\tlet word = {\n\t\t\t\tw: s1,\n\t\t\t\tc: pos,\n\t\t\t\tf: 0,\n\t\t\t} as IWord;\n\n\t\t\tlet _ret: IWord[][] = [];\n\n\t\t\tif (s2 !== '')\n\t\t\t{\n\t\t\t\tlet chunks = this.getChunks(wordpos, pos + s1.length, s2, total_count, MAX_CHUNK_COUNT);\n\n\t\t\t\tfor (let ws of chunks)\n\t\t\t\t{\n\t\t\t\t\t_ret.push([word].concat(ws));\n\t\t\t\t}\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\t_ret.push([word]);\n\t\t\t}\n\n//\t\t\tconsole.dir(wordpos);\n//\n//\t\t\tconsole.dir(ret);\n//\n//\t\t\tconsole.dir([pos, text, total_count]);\n\n\t\t\treturn _ret;\n\t\t}\n\n\t\ttotal_count++;\n\n\t\tlet words = wordpos[pos] || [];\n\n\t\t//debug(total_count, MAX_CHUNK_COUNT);\n\n//\t\tdebug({\n//\t\t\ttotal_count,\n//\t\t\tMAX_CHUNK_COUNT: this.MAX_CHUNK_COUNT,\n//\t\t\ttext,\n//\t\t\twords,\n//\t\t});\n\n\t\t// debug('getChunks: ');\n\t\t// debug(words);\n\t\t//throw new Error();\n\n\t\tlet ret: IWord[][] = [];\n\t\tfor (let word of words)\n\t\t{\n\t\t\t//debug(word);\n\t\t\tlet nextcur = word.c + word.w.length;\n\t\t\t/**\n\t\t\t * @FIXME\n\t\t\t */\n\t\t\tif (!wordpos[nextcur])\n\t\t\t{\n\t\t\t\tret.push([word]);\n\t\t\t}\n\t\t\telse if (total_count > MAX_CHUNK_COUNT)\n\t\t\t{\n\t\t\t\t// do something\n\n//\t\t\t\tconsole.log(444, words.slice(i));\n//\t\t\t\tconsole.log(333, word);\n\n\t\t\t\tlet w1: IWord[] = [word];\n\n\t\t\t\tlet j = nextcur;\n\t\t\t\twhile (j in wordpos)\n\t\t\t\t{\n\t\t\t\t\tlet w2 = wordpos[j][0];\n\n\t\t\t\t\tif (w2)\n\t\t\t\t\t{\n\t\t\t\t\t\tw1.push(w2);\n\n\t\t\t\t\t\tj += w2.w.length;\n\t\t\t\t\t}\n\t\t\t\t\telse\n\t\t\t\t\t{\n\t\t\t\t\t\tbreak;\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tret.push(w1);\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tlet t = text.slice(word.w.length);\n\n\t\t\t\tlet chunks = this.getChunks(wordpos, nextcur, t, total_count, MAX_CHUNK_COUNT );\n\t\t\t\tfor (let ws of chunks)\n\t\t\t\t{\n\t\t\t\t\tret.push([word].concat(ws));\n\t\t\t\t}\n\n\t\t\t\tchunks = null;\n\t\t\t}\n\t\t}\n\n\t\twords = undefined;\n\t\twordpos = undefined;\n\t\tm = undefined;\n\n\t\treturn ret;\n\t}\n}\n\nexport namespace DictTokenizer\n{\n\t/**\n\t * 使用类似于MMSG的分词算法\n\t * 找出所有分词可能,主要根据一下几项来评价:\n\t *\n\t * x、词数量最少;\n\t * a、词平均频率最大;\n\t * b、每个词长度标准差最小;\n\t * c、未识别词最少;\n\t * d、符合语法结构项:如两个连续的动词减分,数词后面跟量词加分;\n\t *\n\t * 取以上几项综合排名最最好的\n\t */\n\texport type IAssessRow = {\n\t\t/**\n\t\t * 词数量,越小越好\n\t\t */\n\t\tx: number,\n\t\t/**\n\t\t * 词总频率,越大越好\n\t\t */\n\t\ta: number,\n\t\t/**\n\t\t * 词标准差,越小越好\n\t\t * 每个词长度标准差最小\n\t\t */\n\t\tb: number,\n\t\t/**\n\t\t * 未识别词,越小越好\n\t\t */\n\t\tc: number,\n\t\t/**\n\t\t * 符合语法结构程度,越大越好\n\t\t * 符合语法结构项:如两个连续的动词减分,数词后面跟量词加分\n\t\t */\n\t\td: number,\n\n\t\t/**\n\t\t * 結算評分(自動計算)\n\t\t */\n\t\tscore?: number,\n\t\treadonly index?: number,\n\t};\n}\n\nexport import IAssessRow = DictTokenizer.IAssessRow;\n\nexport const init = DictTokenizer.init.bind(DictTokenizer) as ISubTokenizerCreate<DictTokenizer>;\n\nexport default DictTokenizer;\n"]}
\No newline at end of file