UNPKG

4.07 kBJavaScriptView Raw
1/**
2 * @license
3 * Copyright 2021 Google LLC
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * Base BudouX parser.
18 */
19export class Parser {
20 /**
21 * Constructs a BudouX parser.
22 * @param model A model data.
23 */
24 constructor(model) {
25 this.model = new Map(Object.entries(model).map(([k, v]) => [k, new Map(Object.entries(v))]));
26 this.baseScore =
27 -0.5 *
28 [...this.model.values()]
29 .map(group => [...group.values()])
30 .flat()
31 .reduce((prev, curr) => prev + curr, 0);
32 }
33 /**
34 * Parses the input sentence and returns a list of semantic chunks.
35 *
36 * @param sentence An input sentence.
37 * @return The retrieved chunks.
38 */
39 parse(sentence) {
40 if (sentence === '')
41 return [];
42 const boundaries = this.parseBoundaries(sentence);
43 const result = [];
44 let start = 0;
45 for (const boundary of boundaries) {
46 result.push(sentence.slice(start, boundary));
47 start = boundary;
48 }
49 result.push(sentence.slice(start));
50 return result;
51 }
52 /**
53 * Parses the input sentence and returns a list of boundaries.
54 *
55 * @param sentence An input sentence.
56 * @return The list of boundaries.
57 */
58 parseBoundaries(sentence) {
59 var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
60 const result = [];
61 for (let i = 1; i < sentence.length; i++) {
62 let score = this.baseScore;
63 // NOTE: Score values in models may be negative.
64 /* eslint-disable */
65 score += ((_a = this.model.get('UW1')) === null || _a === void 0 ? void 0 : _a.get(sentence.substring(i - 3, i - 2))) || 0;
66 score += ((_b = this.model.get('UW2')) === null || _b === void 0 ? void 0 : _b.get(sentence.substring(i - 2, i - 1))) || 0;
67 score += ((_c = this.model.get('UW3')) === null || _c === void 0 ? void 0 : _c.get(sentence.substring(i - 1, i))) || 0;
68 score += ((_d = this.model.get('UW4')) === null || _d === void 0 ? void 0 : _d.get(sentence.substring(i, i + 1))) || 0;
69 score += ((_e = this.model.get('UW5')) === null || _e === void 0 ? void 0 : _e.get(sentence.substring(i + 1, i + 2))) || 0;
70 score += ((_f = this.model.get('UW6')) === null || _f === void 0 ? void 0 : _f.get(sentence.substring(i + 2, i + 3))) || 0;
71 score += ((_g = this.model.get('BW1')) === null || _g === void 0 ? void 0 : _g.get(sentence.substring(i - 2, i))) || 0;
72 score += ((_h = this.model.get('BW2')) === null || _h === void 0 ? void 0 : _h.get(sentence.substring(i - 1, i + 1))) || 0;
73 score += ((_j = this.model.get('BW3')) === null || _j === void 0 ? void 0 : _j.get(sentence.substring(i, i + 2))) || 0;
74 score += ((_k = this.model.get('TW1')) === null || _k === void 0 ? void 0 : _k.get(sentence.substring(i - 3, i))) || 0;
75 score += ((_l = this.model.get('TW2')) === null || _l === void 0 ? void 0 : _l.get(sentence.substring(i - 2, i + 1))) || 0;
76 score += ((_m = this.model.get('TW3')) === null || _m === void 0 ? void 0 : _m.get(sentence.substring(i - 1, i + 2))) || 0;
77 score += ((_o = this.model.get('TW4')) === null || _o === void 0 ? void 0 : _o.get(sentence.substring(i, i + 3))) || 0;
78 /* eslint-enable */
79 if (score > 0)
80 result.push(i);
81 }
82 return result;
83 }
84}
85//# sourceMappingURL=parser.js.map
\No newline at end of file