1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 | export class Parser {
|
21 |
|
22 | private readonly model: Map<string, Map<string, number>>;
|
23 | private readonly baseScore: number;
|
24 |
|
25 | |
26 |
|
27 |
|
28 |
|
29 | constructor(model: {[key: string]: {[key: string]: number}}) {
|
30 | this.model = new Map(
|
31 | Object.entries(model).map(([k, v]) => [k, new Map(Object.entries(v))])
|
32 | );
|
33 | this.baseScore =
|
34 | -0.5 *
|
35 | [...this.model.values()]
|
36 | .map(group => [...group.values()])
|
37 | .flat()
|
38 | .reduce((prev, curr) => prev + curr, 0);
|
39 | }
|
40 |
|
41 | |
42 |
|
43 |
|
44 |
|
45 |
|
46 |
|
47 | parse(sentence: string): string[] {
|
48 | if (sentence === '') return [];
|
49 | const boundaries = this.parseBoundaries(sentence);
|
50 | const result = [];
|
51 | let start = 0;
|
52 | for (const boundary of boundaries) {
|
53 | result.push(sentence.slice(start, boundary));
|
54 | start = boundary;
|
55 | }
|
56 | result.push(sentence.slice(start));
|
57 | return result;
|
58 | }
|
59 |
|
60 | |
61 |
|
62 |
|
63 |
|
64 |
|
65 |
|
66 | parseBoundaries(sentence: string): number[] {
|
67 | const result = [];
|
68 |
|
69 | for (let i = 1; i < sentence.length; i++) {
|
70 | let score = this.baseScore;
|
71 |
|
72 |
|
73 | score += this.model.get('UW1')?.get(sentence.substring(i - 3, i - 2)) || 0;
|
74 | score += this.model.get('UW2')?.get(sentence.substring(i - 2, i - 1)) || 0;
|
75 | score += this.model.get('UW3')?.get(sentence.substring(i - 1, i)) || 0;
|
76 | score += this.model.get('UW4')?.get(sentence.substring(i, i + 1)) || 0;
|
77 | score += this.model.get('UW5')?.get(sentence.substring(i + 1, i + 2)) || 0;
|
78 | score += this.model.get('UW6')?.get(sentence.substring(i + 2, i + 3)) || 0;
|
79 | score += this.model.get('BW1')?.get(sentence.substring(i - 2, i)) || 0;
|
80 | score += this.model.get('BW2')?.get(sentence.substring(i - 1, i + 1)) || 0;
|
81 | score += this.model.get('BW3')?.get(sentence.substring(i, i + 2)) || 0;
|
82 | score += this.model.get('TW1')?.get(sentence.substring(i - 3, i)) || 0;
|
83 | score += this.model.get('TW2')?.get(sentence.substring(i - 2, i + 1)) || 0;
|
84 | score += this.model.get('TW3')?.get(sentence.substring(i - 1, i + 2)) || 0;
|
85 | score += this.model.get('TW4')?.get(sentence.substring(i, i + 3)) || 0;
|
86 |
|
87 | if (score > 0) result.push(i);
|
88 | }
|
89 | return result;
|
90 | }
|
91 | }
|