UNPKG

4.19 kBJavaScriptView Raw
1"use strict";
2/**
3 * @license
4 * Copyright 2021 Google LLC
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17Object.defineProperty(exports, "__esModule", { value: true });
18exports.Parser = void 0;
19/**
20 * Base BudouX parser.
21 */
22class Parser {
23 /**
24 * Constructs a BudouX parser.
25 * @param model A model data.
26 */
27 constructor(model) {
28 this.model = new Map(Object.entries(model).map(([k, v]) => [k, new Map(Object.entries(v))]));
29 this.baseScore =
30 -0.5 *
31 [...this.model.values()]
32 .map(group => [...group.values()])
33 .flat()
34 .reduce((prev, curr) => prev + curr, 0);
35 }
36 /**
37 * Parses the input sentence and returns a list of semantic chunks.
38 *
39 * @param sentence An input sentence.
40 * @return The retrieved chunks.
41 */
42 parse(sentence) {
43 if (sentence === '')
44 return [];
45 const boundaries = this.parseBoundaries(sentence);
46 const result = [];
47 let start = 0;
48 for (const boundary of boundaries) {
49 result.push(sentence.slice(start, boundary));
50 start = boundary;
51 }
52 result.push(sentence.slice(start));
53 return result;
54 }
55 /**
56 * Parses the input sentence and returns a list of boundaries.
57 *
58 * @param sentence An input sentence.
59 * @return The list of boundaries.
60 */
61 parseBoundaries(sentence) {
62 var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
63 const result = [];
64 for (let i = 1; i < sentence.length; i++) {
65 let score = this.baseScore;
66 // NOTE: Score values in models may be negative.
67 /* eslint-disable */
68 score += ((_a = this.model.get('UW1')) === null || _a === void 0 ? void 0 : _a.get(sentence.substring(i - 3, i - 2))) || 0;
69 score += ((_b = this.model.get('UW2')) === null || _b === void 0 ? void 0 : _b.get(sentence.substring(i - 2, i - 1))) || 0;
70 score += ((_c = this.model.get('UW3')) === null || _c === void 0 ? void 0 : _c.get(sentence.substring(i - 1, i))) || 0;
71 score += ((_d = this.model.get('UW4')) === null || _d === void 0 ? void 0 : _d.get(sentence.substring(i, i + 1))) || 0;
72 score += ((_e = this.model.get('UW5')) === null || _e === void 0 ? void 0 : _e.get(sentence.substring(i + 1, i + 2))) || 0;
73 score += ((_f = this.model.get('UW6')) === null || _f === void 0 ? void 0 : _f.get(sentence.substring(i + 2, i + 3))) || 0;
74 score += ((_g = this.model.get('BW1')) === null || _g === void 0 ? void 0 : _g.get(sentence.substring(i - 2, i))) || 0;
75 score += ((_h = this.model.get('BW2')) === null || _h === void 0 ? void 0 : _h.get(sentence.substring(i - 1, i + 1))) || 0;
76 score += ((_j = this.model.get('BW3')) === null || _j === void 0 ? void 0 : _j.get(sentence.substring(i, i + 2))) || 0;
77 score += ((_k = this.model.get('TW1')) === null || _k === void 0 ? void 0 : _k.get(sentence.substring(i - 3, i))) || 0;
78 score += ((_l = this.model.get('TW2')) === null || _l === void 0 ? void 0 : _l.get(sentence.substring(i - 2, i + 1))) || 0;
79 score += ((_m = this.model.get('TW3')) === null || _m === void 0 ? void 0 : _m.get(sentence.substring(i - 1, i + 2))) || 0;
80 score += ((_o = this.model.get('TW4')) === null || _o === void 0 ? void 0 : _o.get(sentence.substring(i, i + 3))) || 0;
81 /* eslint-enable */
82 if (score > 0)
83 result.push(i);
84 }
85 return result;
86 }
87}
88exports.Parser = Parser;
89//# sourceMappingURL=parser.js.map
\No newline at end of file