1 | 'use strict';
|
2 |
|
3 | const { ERROR_TOO_MANY_ERRORS } = require('yandex-speller');
|
4 |
|
5 | const exitCodes = require('./exit-codes');
|
6 |
|
7 | const { replaceRusLettersWithAsterisk } = require('./helpers/string');
|
8 | const { hasEngRusLetters, replaceEngLettersWithAsterisk } = require('./helpers/string');
|
9 | const { loadFileAsJson } = require('./helpers/file');
|
10 | const { uniq, notUniq } = require('./helpers/array');
|
11 | const { consoleError, consoleWarn, consoleLog, consoleDebug } = require('./helpers/console');
|
12 |
|
13 | const letters = '[a-zа-яё\\d-]';
|
14 | const reNotOptimized = new RegExp('(^|' + letters + ')' +
|
15 | '(\\(|\\[)' + letters + '(\\)|\\])(' + letters + '|\\/$|$)', 'i');
|
16 | const rePrepare = new RegExp('^(' + letters + ')(' + letters + '|$)');
|
17 |
|
18 | class Dictionary {
|
19 | constructor() {
|
20 | this.dict = [];
|
21 | }
|
22 | |
23 |
|
24 |
|
25 |
|
26 |
|
27 | set(words) {
|
28 | this.dict = this.prepareDictionaryWords(words);
|
29 | }
|
30 |
|
31 | |
32 |
|
33 |
|
34 |
|
35 |
|
36 | get() {
|
37 | return this.dict;
|
38 | }
|
39 |
|
40 | |
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 | loadDictionary(file) {
|
47 | let data = [];
|
48 |
|
49 | consoleDebug(`Get/check dictionary: ${file}`);
|
50 |
|
51 | try {
|
52 | data = loadFileAsJson(file, true);
|
53 |
|
54 | consoleDebug(`Use dictionary: ${file}`);
|
55 | } catch (e) {
|
56 | consoleError(e);
|
57 | process.exit(exitCodes.ERROR_DICTIONARY);
|
58 | }
|
59 |
|
60 | return data;
|
61 | }
|
62 |
|
63 | |
64 |
|
65 |
|
66 |
|
67 |
|
68 |
|
69 | loadDictionaries(files, configDictionary) {
|
70 | let count = 0;
|
71 | let result = [];
|
72 |
|
73 | const prepare = (words, file) => {
|
74 | result = result.concat(uniq(words));
|
75 | this.checkDuplicateWords(words, `Dictionary duplicate words in "${file}":`);
|
76 | this.checkTyposInDictionary(words, file);
|
77 |
|
78 | count++;
|
79 | };
|
80 |
|
81 | if (configDictionary) {
|
82 | prepare(configDictionary, '.yaspellerrc');
|
83 | }
|
84 |
|
85 | files && files.forEach(file => {
|
86 | prepare(this.loadDictionary(file), file);
|
87 | });
|
88 |
|
89 | if (count >= 2) {
|
90 | this.checkDuplicateWords(result, 'Duplicate words in dictionaries:');
|
91 | }
|
92 |
|
93 | this.set(result);
|
94 | }
|
95 |
|
96 | |
97 |
|
98 |
|
99 |
|
100 |
|
101 |
|
102 |
|
103 | checkDuplicateWords(words, title) {
|
104 | const duplicates = notUniq(words);
|
105 | if (duplicates.length) {
|
106 | consoleWarn(title + '\n' + duplicates.join('\n') + '\n');
|
107 |
|
108 | return true;
|
109 | }
|
110 |
|
111 | return false;
|
112 | }
|
113 |
|
114 | |
115 |
|
116 |
|
117 |
|
118 |
|
119 |
|
120 |
|
121 | checkTyposInDictionary(words, file) {
|
122 | const typos = [];
|
123 | words.forEach(item => {
|
124 | if (hasEngRusLetters(item)) {
|
125 | typos.push(item);
|
126 | }
|
127 | });
|
128 |
|
129 | const hasTypos = Boolean(typos.length);
|
130 | if (hasTypos) {
|
131 | consoleWarn(`Has typos in "${file}":`);
|
132 | typos.forEach(item => {
|
133 | consoleWarn(item +
|
134 | ' - en: ' + replaceRusLettersWithAsterisk(item) +
|
135 | ', ru: ' + replaceEngLettersWithAsterisk(item)
|
136 | );
|
137 | });
|
138 | consoleLog('');
|
139 | }
|
140 |
|
141 | return hasTypos;
|
142 | }
|
143 |
|
144 | |
145 |
|
146 |
|
147 |
|
148 |
|
149 |
|
150 | removeDictionaryWordsFromData(data) {
|
151 | const result = [];
|
152 | const dictionary = this.get();
|
153 |
|
154 | data.forEach(typo => {
|
155 | if (typo.code === ERROR_TOO_MANY_ERRORS || this.isTypo(typo.word, dictionary)) {
|
156 | result.push(typo);
|
157 | }
|
158 | });
|
159 |
|
160 | return result;
|
161 | }
|
162 |
|
163 | |
164 |
|
165 |
|
166 |
|
167 |
|
168 |
|
169 |
|
170 | isTypo(word, dictionary) {
|
171 | return !dictionary.some(item => item.test(word));
|
172 | }
|
173 |
|
174 | |
175 |
|
176 |
|
177 |
|
178 |
|
179 |
|
180 | prepareDictionaryWords(dictionaryWords) {
|
181 | const result = [];
|
182 |
|
183 | dictionaryWords.forEach(word => {
|
184 | if (this.isNotOptimizedRegExp(word)) {
|
185 | consoleWarn(`Not optimized dictionary RegExp in "${word}"`);
|
186 | }
|
187 |
|
188 |
|
189 |
|
190 |
|
191 | let preparedWord = word.replace(rePrepare, ($, $1, $2) => '[' + $1 + $1.toUpperCase() + ']' + $2);
|
192 |
|
193 | if (preparedWord.search(/\^/) !== 0) {
|
194 | preparedWord = '^' + preparedWord;
|
195 | }
|
196 |
|
197 | if (preparedWord.search(/\$/) !== preparedWord.length - 1) {
|
198 | preparedWord += '$';
|
199 | }
|
200 |
|
201 | try {
|
202 | result.push(new RegExp(preparedWord));
|
203 | } catch (e) {
|
204 | consoleError(`Incorrect dictionary RegExp in "${word}", ${e}`);
|
205 | }
|
206 | });
|
207 |
|
208 | return result;
|
209 | }
|
210 |
|
211 | |
212 |
|
213 |
|
214 |
|
215 |
|
216 |
|
217 | isNotOptimizedRegExp(text) {
|
218 | if (text.search(/(\(\)|\[\])/) !== -1) {
|
219 | return true;
|
220 | }
|
221 |
|
222 | if (text.search(reNotOptimized) !== -1) {
|
223 | return true;
|
224 | }
|
225 |
|
226 | return false;
|
227 | }
|
228 | }
|
229 |
|
230 | module.exports = new Dictionary();
|