UNPKG

yaspeller/lib/dictionary.js

Version:

5.83 kBJavaScriptView Raw

1'use strict';
2
3const { ERROR_TOO_MANY_ERRORS } = require('yandex-speller');
4
5const exitCodes = require('./exit-codes');
6
7const { replaceRusLettersWithAsterisk } = require('./helpers/string');
8const { hasEngRusLetters, replaceEngLettersWithAsterisk } = require('./helpers/string');
9const { loadFileAsJson } = require('./helpers/file');
10const { uniq, notUniq } = require('./helpers/array');
11const { consoleError, consoleWarn, consoleLog, consoleDebug } = require('./helpers/console');
12
13const letters = '[a-zа-яё\\d-]';
14const reNotOptimized = new RegExp('(^|' + letters + ')' +
      '(\\(|\\[)' + letters + '(\\)|\\])(' + letters + '|\\/$|$)', 'i');
16const rePrepare = new RegExp('^(' + letters + ')(' + letters + '|$)');
17
18class Dictionary {
  constructor() {
      this.dict = [];
  }
  /**
   * Set dictionary.
   *
   * @param {string[]} words
   */
  set(words) {
      this.dict = this.prepareDictionaryWords(words);
  }
30
  /**
   * Get dictionary.
   *
   * @returns {RegExp[]}
   */
  get() {
      return this.dict;
  }
39
  /**
   * Load dictionary.
   *
   * @param {string} file - JSON file.
   * @returns {string[]}
   */
  loadDictionary(file) {
      let data = [];
48
      consoleDebug(`Get/check dictionary: ${file}`);
50
      try {
          data = loadFileAsJson(file, true);
53
          consoleDebug(`Use dictionary: ${file}`);
      } catch (e) {
          consoleError(e);
          process.exit(exitCodes.ERROR_DICTIONARY);
      }
59
      return data;
  }
62
  /**
   * Load dictionaries.
   *
   * @param {string[]} files
   * @param {string[]} configDictionary - Dictionary from .yaspellerrc
   */
  loadDictionaries(files, configDictionary) {
      let count = 0;
      let result = [];
72
      const prepare = (words, file) => {
          result = result.concat(uniq(words));
          this.checkDuplicateWords(words, `Dictionary duplicate words in "${file}":`);
          this.checkTyposInDictionary(words, file);
77
          count++;
      };
80
      if (configDictionary) {
          prepare(configDictionary, '.yaspellerrc');
      }
84
      files && files.forEach(file => {
          prepare(this.loadDictionary(file), file);
      });
88
      if (count >= 2) {
          this.checkDuplicateWords(result, 'Duplicate words in dictionaries:');
      }
92
      this.set(result);
  }
95
  /**
   * Check duplicate words in dictionary.
   *
   * @param {string[]} words
   * @param {string} title
   * @returns {boolean}
   */
  checkDuplicateWords(words, title) {
      const duplicates = notUniq(words);
      if (duplicates.length) {
          consoleWarn(title + '\n' + duplicates.join('\n') + '\n');
107
          return true;
      }
110
      return false;
  }
113
  /**
   * Check typos in dictionary.
   *
   * @param {string[]} words
   * @param {string} file
   * @returns {boolean}
   */
  checkTyposInDictionary(words, file) {
      const typos = [];
      words.forEach(item => {
          if (hasEngRusLetters(item)) {
              typos.push(item);
          }
      });
128
      const hasTypos = Boolean(typos.length);
      if (hasTypos) {
          consoleWarn(`Has typos in "${file}":`);
          typos.forEach(item => {
              consoleWarn(item +
                  ' - en: ' + replaceRusLettersWithAsterisk(item) +
                  ', ru: ' + replaceEngLettersWithAsterisk(item)
              );
          });
          consoleLog('');
      }
140
      return hasTypos;
  }
143
  /**
   * Remove typos that is in the dictionary.
   *
   * @param {Object[]} data - Array of typos.
   * @returns {Object[]}
   */
  removeDictionaryWordsFromData(data) {
      const result = [];
      const dictionary = this.get();
153
      data.forEach(typo => {
          if (typo.code === ERROR_TOO_MANY_ERRORS || this.isTypo(typo.word, dictionary)) {
              result.push(typo);
          }
      });
159
      return result;
  }
162
  /**
   * It's a typo?
   *
   * @param {string} word
   * @param {RegExp[]} dictionary
   * @returns {boolean}
   */
  isTypo(word, dictionary) {
      return !dictionary.some(item => item.test(word));
  }
173
  /**
   * Prepare dictionary words.
   *
   * @param {string[]} dictionaryWords
   * @returns {RegExp[]}
   */
  prepareDictionaryWords(dictionaryWords) {
      const result = [];
182
      dictionaryWords.forEach(word => {
          if (this.isNotOptimizedRegExp(word)) {
              consoleWarn(`Not optimized dictionary RegExp in "${word}"`);
          }
187
          // unknownWord(s)? = unknownWord(s)? and UnknownWord(s)?
          // UnknownWord(s)? = UnknownWord(s)?
190
          let preparedWord = word.replace(rePrepare, ($, $1, $2) => '[' + $1 + $1.toUpperCase() + ']' + $2);
192
          if (preparedWord.search(/\^/) !== 0) {
              preparedWord = '^' + preparedWord;
          }
196
          if (preparedWord.search(/\$/) !== preparedWord.length - 1) {
              preparedWord += '$';
          }
200
          try {
              result.push(new RegExp(preparedWord));
          } catch (e) {
              consoleError(`Incorrect dictionary RegExp in "${word}", ${e}`);
          }
      });
207
      return result;
  }
210
  /**
   * Is not optimized RegExp?
   *
   * @param {string} text
   * @returns {boolean}
   */
  isNotOptimizedRegExp(text) {
      if (text.search(/(\(\)|\[\])/) !== -1) { // /[]Unknownword()/
          return true;
      }
221
      if (text.search(reNotOptimized) !== -1) { // /Unknow(n)wo[r]d/
          return true;
      }
225
      return false;
  }
228}
229
230module.exports = new Dictionary();

1	`'use strict';`
2
3	`const { ERROR_TOO_MANY_ERRORS } = require('yandex-speller');`
4
5	`const exitCodes = require('./exit-codes');`
6
7	`const { replaceRusLettersWithAsterisk } = require('./helpers/string');`
8	`const { hasEngRusLetters, replaceEngLettersWithAsterisk } = require('./helpers/string');`
9	`const { loadFileAsJson } = require('./helpers/file');`
10	`const { uniq, notUniq } = require('./helpers/array');`
11	`const { consoleError, consoleWarn, consoleLog, consoleDebug } = require('./helpers/console');`
12
13	`const letters = '[a-zа-яё\\d-]';`
14	`const reNotOptimized = new RegExp('(^\|' + letters + ')' +`
15	`'(\\(\|\\[)' + letters + '(\\)\|\\])(' + letters + '\|\\/$\|$)', 'i');`
16	`const rePrepare = new RegExp('^(' + letters + ')(' + letters + '\|$)');`
17
18	`class Dictionary {`
19	`constructor() {`
20	`this.dict = [];`
21	`}`
22	`/**`
23	`* Set dictionary.`
24	`*`
25	`* @param {string[]} words`
26	`*/`
27	`set(words) {`
28	`this.dict = this.prepareDictionaryWords(words);`
29	`}`
30
31	`/**`
32	`* Get dictionary.`
33	`*`
34	`* @returns {RegExp[]}`
35	`*/`
36	`get() {`
37	`return this.dict;`
38	`}`
39
40	`/**`
41	`* Load dictionary.`
42	`*`
43	`* @param {string} file - JSON file.`
44	`* @returns {string[]}`
45	`*/`
46	`loadDictionary(file) {`
47	`let data = [];`
48
49	consoleDebug(`Get/check dictionary: ${file}`);
50
51	`try {`
52	`data = loadFileAsJson(file, true);`
53
54	consoleDebug(`Use dictionary: ${file}`);
55	`} catch (e) {`
56	`consoleError(e);`
57	`process.exit(exitCodes.ERROR_DICTIONARY);`
58	`}`
59
60	`return data;`
61	`}`
62
63	`/**`
64	`* Load dictionaries.`
65	`*`
66	`* @param {string[]} files`
67	`* @param {string[]} configDictionary - Dictionary from .yaspellerrc`
68	`*/`
69	`loadDictionaries(files, configDictionary) {`
70	`let count = 0;`
71	`let result = [];`
72
73	`const prepare = (words, file) => {`
74	`result = result.concat(uniq(words));`
75	this.checkDuplicateWords(words, `Dictionary duplicate words in "${file}":`);
76	`this.checkTyposInDictionary(words, file);`
77
78	`count++;`
79	`};`
80
81	`if (configDictionary) {`
82	`prepare(configDictionary, '.yaspellerrc');`
83	`}`
84
85	`files && files.forEach(file => {`
86	`prepare(this.loadDictionary(file), file);`
87	`});`
88
89	`if (count >= 2) {`
90	`this.checkDuplicateWords(result, 'Duplicate words in dictionaries:');`
91	`}`
92
93	`this.set(result);`
94	`}`
95
96	`/**`
97	`* Check duplicate words in dictionary.`
98	`*`
99	`* @param {string[]} words`
100	`* @param {string} title`
101	`* @returns {boolean}`
102	`*/`
103	`checkDuplicateWords(words, title) {`
104	`const duplicates = notUniq(words);`
105	`if (duplicates.length) {`
106	`consoleWarn(title + '\n' + duplicates.join('\n') + '\n');`
107
108	`return true;`
109	`}`
110
111	`return false;`
112	`}`
113
114	`/**`
115	`* Check typos in dictionary.`
116	`*`
117	`* @param {string[]} words`
118	`* @param {string} file`
119	`* @returns {boolean}`
120	`*/`
121	`checkTyposInDictionary(words, file) {`
122	`const typos = [];`
123	`words.forEach(item => {`
124	`if (hasEngRusLetters(item)) {`
125	`typos.push(item);`
126	`}`
127	`});`
128
129	`const hasTypos = Boolean(typos.length);`
130	`if (hasTypos) {`
131	consoleWarn(`Has typos in "${file}":`);
132	`typos.forEach(item => {`
133	`consoleWarn(item +`
134	`' - en: ' + replaceRusLettersWithAsterisk(item) +`
135	`', ru: ' + replaceEngLettersWithAsterisk(item)`
136	`);`
137	`});`
138	`consoleLog('');`
139	`}`
140
141	`return hasTypos;`
142	`}`
143
144	`/**`
145	`* Remove typos that is in the dictionary.`
146	`*`
147	`* @param {Object[]} data - Array of typos.`
148	`* @returns {Object[]}`
149	`*/`
150	`removeDictionaryWordsFromData(data) {`
151	`const result = [];`
152	`const dictionary = this.get();`
153
154	`data.forEach(typo => {`
155	`if (typo.code === ERROR_TOO_MANY_ERRORS \|\| this.isTypo(typo.word, dictionary)) {`
156	`result.push(typo);`
157	`}`
158	`});`
159
160	`return result;`
161	`}`
162
163	`/**`
164	`* It's a typo?`
165	`*`
166	`* @param {string} word`
167	`* @param {RegExp[]} dictionary`
168	`* @returns {boolean}`
169	`*/`
170	`isTypo(word, dictionary) {`
171	`return !dictionary.some(item => item.test(word));`
172	`}`
173
174	`/**`
175	`* Prepare dictionary words.`
176	`*`
177	`* @param {string[]} dictionaryWords`
178	`* @returns {RegExp[]}`
179	`*/`
180	`prepareDictionaryWords(dictionaryWords) {`
181	`const result = [];`
182
183	`dictionaryWords.forEach(word => {`
184	`if (this.isNotOptimizedRegExp(word)) {`
185	consoleWarn(`Not optimized dictionary RegExp in "${word}"`);
186	`}`
187
188	`// unknownWord(s)? = unknownWord(s)? and UnknownWord(s)?`
189	`// UnknownWord(s)? = UnknownWord(s)?`
190
191	`let preparedWord = word.replace(rePrepare, ($, $1, $2) => '[' + $1 + $1.toUpperCase() + ']' + $2);`
192
193	`if (preparedWord.search(/\^/) !== 0) {`
194	`preparedWord = '^' + preparedWord;`
195	`}`
196
197	`if (preparedWord.search(/\$/) !== preparedWord.length - 1) {`
198	`preparedWord += '$';`
199	`}`
200
201	`try {`
202	`result.push(new RegExp(preparedWord));`
203	`} catch (e) {`
204	consoleError(`Incorrect dictionary RegExp in "${word}", ${e}`);
205	`}`
206	`});`
207
208	`return result;`
209	`}`
210
211	`/**`
212	`* Is not optimized RegExp?`
213	`*`
214	`* @param {string} text`
215	`* @returns {boolean}`
216	`*/`
217	`isNotOptimizedRegExp(text) {`
218	`if (text.search(/(\(\)\|\[\])/) !== -1) { // /[]Unknownword()/`
219	`return true;`
220	`}`
221
222	`if (text.search(reNotOptimized) !== -1) { // /Unknow(n)wo[r]d/`
223	`return true;`
224	`}`
225
226	`return false;`
227	`}`
228	`}`
229
230	`module.exports = new Dictionary();`