UNPKG

3.27 kBPlain TextView Raw
1/**
2 * @license
3 * Copyright 2021 Google LLC
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * Base BudouX parser.
19 */
20export class Parser {
21 /** BudouX model data */
22 private readonly model: Map<string, Map<string, number>>;
23 private readonly baseScore: number;
24
25 /**
26 * Constructs a BudouX parser.
27 * @param model A model data.
28 */
29 constructor(model: {[key: string]: {[key: string]: number}}) {
30 this.model = new Map(
31 Object.entries(model).map(([k, v]) => [k, new Map(Object.entries(v))])
32 );
33 this.baseScore =
34 -0.5 *
35 [...this.model.values()]
36 .map(group => [...group.values()])
37 .flat()
38 .reduce((prev, curr) => prev + curr, 0);
39 }
40
41 /**
42 * Parses the input sentence and returns a list of semantic chunks.
43 *
44 * @param sentence An input sentence.
45 * @return The retrieved chunks.
46 */
47 parse(sentence: string): string[] {
48 if (sentence === '') return [];
49 const boundaries = this.parseBoundaries(sentence);
50 const result = [];
51 let start = 0;
52 for (const boundary of boundaries) {
53 result.push(sentence.slice(start, boundary));
54 start = boundary;
55 }
56 result.push(sentence.slice(start));
57 return result;
58 }
59
60 /**
61 * Parses the input sentence and returns a list of boundaries.
62 *
63 * @param sentence An input sentence.
64 * @return The list of boundaries.
65 */
66 parseBoundaries(sentence: string): number[] {
67 const result = [];
68
69 for (let i = 1; i < sentence.length; i++) {
70 let score = this.baseScore;
71 // NOTE: Score values in models may be negative.
72 /* eslint-disable */
73 score += this.model.get('UW1')?.get(sentence.substring(i - 3, i - 2)) || 0;
74 score += this.model.get('UW2')?.get(sentence.substring(i - 2, i - 1)) || 0;
75 score += this.model.get('UW3')?.get(sentence.substring(i - 1, i)) || 0;
76 score += this.model.get('UW4')?.get(sentence.substring(i, i + 1)) || 0;
77 score += this.model.get('UW5')?.get(sentence.substring(i + 1, i + 2)) || 0;
78 score += this.model.get('UW6')?.get(sentence.substring(i + 2, i + 3)) || 0;
79 score += this.model.get('BW1')?.get(sentence.substring(i - 2, i)) || 0;
80 score += this.model.get('BW2')?.get(sentence.substring(i - 1, i + 1)) || 0;
81 score += this.model.get('BW3')?.get(sentence.substring(i, i + 2)) || 0;
82 score += this.model.get('TW1')?.get(sentence.substring(i - 3, i)) || 0;
83 score += this.model.get('TW2')?.get(sentence.substring(i - 2, i + 1)) || 0;
84 score += this.model.get('TW3')?.get(sentence.substring(i - 1, i + 2)) || 0;
85 score += this.model.get('TW4')?.get(sentence.substring(i, i + 3)) || 0;
86 /* eslint-enable */
87 if (score > 0) result.push(i);
88 }
89 return result;
90 }
91}