1 | const async = require('async');
|
2 | const Translate = require('@google-cloud/translate');
|
3 | const locales = require('scratch-l10n');
|
4 | const client = new Translate({
|
5 | credentials: {
|
6 | private_key: process.env.GOOGLE_PRIVATE_KEY,
|
7 | client_email: process.env.GOOGLE_CLIENT_EMAIL
|
8 | }
|
9 | });
|
10 |
|
11 | // List of languages that the translate extension menu used to have. The extension
|
12 | // launched with an outdated list of Scratch languages. This list is used to keep backwards
|
13 | // compatibility with any projects that may have blocks with these selected in them before
|
14 | // we removed them from the menu.
|
15 | const PREVIOUSLY_SUPPORTED_LIST = ['ab', 'ms', 'be', 'eo', 'hy', 'hi', 'kn',
|
16 | 'ht', 'ku', 'la', 'mk', 'ml', 'mt', 'mr', 'mn', 'my', 'nn', 'sq', 'te', 'uz'];
|
17 |
|
18 | const SUPPORTED_LOCALES = Object.keys(locales.default).concat(PREVIOUSLY_SUPPORTED_LIST);
|
19 |
|
20 | // Names of spoken languages, for use by the Text to Speech extension.
|
21 | const SPOKEN_LANGUAGES = {
|
22 | 'zh-cn': 'Chinese (Mandarin)', // distinct from the written "chinese (simplified)" and "chinese (traditional)"
|
23 | 'hi': 'Hindi', // available in text to speech but not yet in supported_locales
|
24 | 'pt-br': 'Portuguese (Brazilian)', // not a separate entry in the translate extension menu
|
25 | 'es-419': 'Spanish (Latin American)' // not a separate entry the translate extension menu
|
26 | };
|
27 | const spokenLanguageKeys = Object.keys(SPOKEN_LANGUAGES);
|
28 | const spokenLanguageNamesEn = Object.values(SPOKEN_LANGUAGES);
|
29 |
|
30 | // We need to provide a custom translation into Chinese of the name for the spoken
|
31 | // version of Chinese we are providing.
|
32 | const CUSTOM_NAMES_FOR_MANDARIN = {
|
33 | 'zh-cn': '中文',
|
34 | 'zh-tw': '中文'
|
35 | };
|
36 |
|
37 | // Scratch and Google translate have different language codes for some languages. These
|
38 | // maps are used to convert between them.
|
39 | const scratchToGoogleMap = {
|
40 | 'zh-cn': 'zh',
|
41 | 'nb': 'no',
|
42 | 'he': 'iw',
|
43 | 'es-419': 'es',
|
44 | 'pt-br': 'pt',
|
45 | 'ja-hira': 'ja'
|
46 | };
|
47 |
|
48 | // Construct the reverse map of the scratch to google mapping.
|
49 | const googleToScratchMap = Object.keys(scratchToGoogleMap).reduce((mem, key) => {
|
50 | mem[scratchToGoogleMap[key]] = key;
|
51 | return mem;
|
52 | }, {});
|
53 |
|
54 | /**
|
55 | * Builds a map from translated language name to language code. e.g.
|
56 | * {espanol: es, spanish: es, japanese: ja, aleman: de, ... etc.}
|
57 | * This is used by the language menu in the translate block to decide whether to
|
58 | * accept a language name dropped on top of the menu.
|
59 | * @param {object} languageMap mapping language code to a list of langauge code, name pairs we can translate to.
|
60 | * @return {object} Object mapping from a language name to language code.
|
61 | */
|
62 | var buildNameToCodeMap = function (languageMap) {
|
63 | var nameMap = {};
|
64 | let codes = Object.keys(languageMap);
|
65 | for (let i = 0; i < codes.length; ++i) {
|
66 | for (let j = 0; j < languageMap[codes[i]].length; ++j) {
|
67 | // Lowercase all the language codes for ease of comparison later.
|
68 | nameMap[languageMap[codes[i]][j].name.toLowerCase()] = languageMap[codes[i]][j].code.toLowerCase();
|
69 | }
|
70 | }
|
71 | // Add the Hiragana version of Japanese in Japanese (nihongo) by hand since Google Translate
|
72 | // only gives us the kanji version.
|
73 | nameMap['にほんご'] = 'ja';
|
74 | return nameMap;
|
75 | };
|
76 |
|
77 | /**
|
78 | * Gets an individual language's language list from Google Translate and adds the result to the
|
79 | * accumulator object.
|
80 | * @param {object} acc Accumulates results from the set of transform calls to get supported languages.
|
81 | * @param {string} langCode The language code to look up.
|
82 | * @param {number} index The index into the list of langauges we're looking up.
|
83 | * @param {function} callback The function which is called after all the iteratee functions have finished.
|
84 | */
|
85 | var getLanguageList = function (acc, langCode, index, callback) {
|
86 | client.getLanguages(langCode, function (err, translateObj) {
|
87 | if (err) {
|
88 | // Invalid languages happen since Scratch supports some that Google
|
89 | // translate does not. For ones where there is a mismatch in langauge codes,
|
90 | // .e.g. es-419 and cs, we'll add them later.
|
91 | if (err.code === 400 && err.message.indexOf('language is invalid')) {
|
92 | return callback();
|
93 | }
|
94 | // Avoid unhandled rejection, and allow exiting with error status
|
95 | return async.nextTick(callback, err);
|
96 | }
|
97 | const result = [];
|
98 | // Build up the list of languages (code and name) that we can translate to.
|
99 | for (let i in translateObj) {
|
100 | if (SUPPORTED_LOCALES.indexOf(translateObj[i].code.toLowerCase()) !== -1) {
|
101 | // Lowercase all the language codes for ease of comparison later.
|
102 | translateObj[i].code = translateObj[i].code.toLowerCase();
|
103 | result.push(translateObj[i]);
|
104 | } else if (googleToScratchMap[translateObj[i].code.toLowerCase()]) {
|
105 | // If this langauge code is a Google translate one, look up the scratch
|
106 | // version and put that in the result instead.
|
107 | let copy = Object.assign({}, translateObj[i]);
|
108 | copy.code = googleToScratchMap[translateObj[i].code].toLowerCase();
|
109 | result.push(copy);
|
110 | }
|
111 | }
|
112 | acc[langCode.toLowerCase()] = result;
|
113 | // If there's a language code that differs, e.g. scratch has es-419, but
|
114 | // Google Translate has es, add that to the map as well.
|
115 | if (googleToScratchMap[langCode.toLowerCase()] &&
|
116 | !acc[googleToScratchMap[langCode.toLowerCase()].toLowerCase()]) {
|
117 | acc[googleToScratchMap[langCode.toLowerCase()].toLowerCase()] = result;
|
118 | }
|
119 | return callback();
|
120 | });
|
121 | };
|
122 |
|
123 | /**
|
124 | * Removes languages from the previously supported list from the menu map so they
|
125 | * don't show up in the translate menu's list.
|
126 | * @param {object} menuMap A map of language code to an object that contains the
|
127 | * language code list of all
|
128 | */
|
129 | var removePreviouslySupported = function (menuMap) {
|
130 | const codes = Object.keys(menuMap);
|
131 | for (let i = 0; i < codes.length; ++i) {
|
132 | const filtered = menuMap[codes[i]].filter(function (langInfo) {
|
133 | return PREVIOUSLY_SUPPORTED_LIST.indexOf(langInfo.code) === -1;
|
134 | });
|
135 | menuMap[codes[i]] = filtered;
|
136 | }
|
137 |
|
138 | for (let i = 0; i < PREVIOUSLY_SUPPORTED_LIST.length; ++i) {
|
139 | if (menuMap[PREVIOUSLY_SUPPORTED_LIST[i]]) {
|
140 | delete menuMap[PREVIOUSLY_SUPPORTED_LIST[i]];
|
141 | }
|
142 | }
|
143 | };
|
144 |
|
145 | /**
|
146 | * Fix a problem with some translations of language names containing parentheses,
|
147 | * where the open paren is missing. If the final character is a close paren, and
|
148 | * there is no open paren, add an open paren after the first space.
|
149 | * @param {string} item The string to fix
|
150 | * @return {string} the fixed string
|
151 | */
|
152 | var fixParens = function (item) {
|
153 | const endsWithCloseParen = item[item.length - 1] === ')';
|
154 | const hasOpenParen = item.includes('(');
|
155 | if (endsWithCloseParen && !hasOpenParen){
|
156 | let fixed = item.split(' ');
|
157 | if (fixed.length > 1) {
|
158 | fixed[1] = '(' + fixed[1];
|
159 | item = fixed.join(' ');
|
160 | }
|
161 | }
|
162 | return item;
|
163 | };
|
164 |
|
165 | /**
|
166 | * Gets the translations into a particular language of the names of a set of spoken languages,
|
167 | * and adds these to an accumulator object.
|
168 | * @param {object} acc Accumulates results from the set of transform calls.
|
169 | * @param {string} langCode The language code to to translate into.
|
170 | * @param {number} index The index into the list of langauges we're looking up.
|
171 | * @param {function} callback The function which is called after all the iteratee functions have finished.
|
172 | */
|
173 | var translateSpokenLanguageNames = function (acc, langCode, index, callback) {
|
174 | const options = {
|
175 | from: 'en',
|
176 | to: langCode
|
177 | };
|
178 | client.translate(spokenLanguageNamesEn, options,
|
179 | function (err, translation) {
|
180 | if (err) {
|
181 | // Invalid languages happen since Scratch supports some that Google
|
182 | // translate does not. For ones where there is a mismatch in langauge codes,
|
183 | // .e.g. es-419 and cs, we'll add them later.
|
184 | if (err.code === 400 && err.message.indexOf('language is invalid')) {
|
185 | return callback();
|
186 | }
|
187 | // Avoid unhandled rejection, and allow exiting with error status
|
188 | return async.nextTick(callback, err);
|
189 | }
|
190 | const translatedSpokenLanguageNames = translation.map((item, i) => {
|
191 | item = fixParens(item);
|
192 | return {
|
193 | code: spokenLanguageKeys[i],
|
194 | name: item
|
195 | };
|
196 | });
|
197 | acc[langCode.toLowerCase()] = translatedSpokenLanguageNames;
|
198 | return callback();
|
199 | });
|
200 | };
|
201 |
|
202 | /**
|
203 | * Add entries to the spoken languages map for Scratch-specific language codes.
|
204 | * @param {object} spokenLanguages An object containing names of spoken languages
|
205 | * translated into other languages.
|
206 | */
|
207 | var addScratchEntriesToSpokenLanguages = function (spokenLanguages) {
|
208 | Object.keys(scratchToGoogleMap).forEach(key => {
|
209 | if (!spokenLanguages[key]) {
|
210 | const googleKey = scratchToGoogleMap[key];
|
211 | if (googleKey) {
|
212 | spokenLanguages[key] = spokenLanguages[googleKey];
|
213 | }
|
214 | }
|
215 | });
|
216 | };
|
217 |
|
218 | /**
|
219 | * Modify the entries in the spoken languages map for Chinese languages, to use
|
220 | * custom names for spoken Chinese (instead of the google translate version).
|
221 | * @param {object} spokenLanguages An object containing names of spoken languages
|
222 | * translated into other languages.
|
223 | */
|
224 | var useCustomChineseNames = function (spokenLanguages) {
|
225 | Object.keys(CUSTOM_NAMES_FOR_MANDARIN).forEach(key => {
|
226 | if (spokenLanguages[key]) {
|
227 | const customName = CUSTOM_NAMES_FOR_MANDARIN[key];
|
228 | const langObj = spokenLanguages[key];
|
229 | const cnObj = langObj.find(lang => lang.code === 'zh-cn');
|
230 | if (cnObj) {
|
231 | cnObj.name = customName;
|
232 | }
|
233 | }
|
234 | });
|
235 | };
|
236 |
|
237 | /**
|
238 | * Builds up an object containing information about language codes and language names.
|
239 | * menuMap is a mapping from a scratch language code to a list of languges to show in the Google Translate menu.
|
240 | * nameMap is a mapping from language names (translated into lots of lanuages) to language code.
|
241 | * scratchToGoogleMap is a mapping from Scratch language codes to Google langauge codes.
|
242 | * previouslySupported is a list of language codes that we used to put in the language list for the translate block
|
243 | but no longer do.
|
244 | * spokenLanguages is a mapping from scratch language code to a list of spoken language names that are distinct from
|
245 | * written language names, for use in the Text to Speech extension's language menu.
|
246 | * @param {function} callback Function called with the result when building all the maps finishes.
|
247 | */
|
248 | const generateMapping = module.exports = function (callback) {
|
249 |
|
250 | // the spokenLanguageNameMap is generated by translation requests, but we need
|
251 | // to seed the English data, because the translation from English to English does
|
252 | // not provide any results. Only this one name is needed, because it is the only
|
253 | // name in English that differs from the names of written language provided in the menuMap.
|
254 | const spokenLanguageNameMap = {
|
255 | en: [
|
256 | {
|
257 | code: 'zh-cn',
|
258 | name: 'Chinese (Mandarin)'
|
259 | }
|
260 | ]
|
261 | };
|
262 |
|
263 | // First, translate the spoken language names into each language.
|
264 | async.transform(
|
265 | SUPPORTED_LOCALES, spokenLanguageNameMap, translateSpokenLanguageNames
|
266 | ).then(spokenLanguages => {
|
267 |
|
268 | addScratchEntriesToSpokenLanguages(spokenLanguages);
|
269 | useCustomChineseNames(spokenLanguages);
|
270 |
|
271 | // Then, generate the full menuMap
|
272 | async.transform(SUPPORTED_LOCALES, {}, getLanguageList,
|
273 | function (err, result) {
|
274 | if (err) {
|
275 | throw new Error(err);
|
276 | }
|
277 | // Result is a single element list containing a map from langauge code
|
278 | // to the lang code/name pairs we can translate to. e.g.
|
279 | const nameToLanguageCode = buildNameToCodeMap(result);
|
280 | // After we build the language code name map, we remove languages that used
|
281 | // to be in the list but aren't now. We want those languges to be in the name
|
282 | // map so that if someone drops a block into the menu it still works.
|
283 | // For example, a block with value esperanto dropped into the language menu should
|
284 | // continue working even though esperanto isn't in the list anymore.
|
285 | removePreviouslySupported(result);
|
286 | const finalObject = {menuMap: result,
|
287 | nameMap: nameToLanguageCode,
|
288 | scratchToGoogleMap: scratchToGoogleMap,
|
289 | previouslySupported: PREVIOUSLY_SUPPORTED_LIST,
|
290 | spokenLanguages: spokenLanguages
|
291 | };
|
292 | callback(finalObject);
|
293 | });
|
294 | });
|
295 | };
|
296 |
|
297 | if (require.main === module) {
|
298 | generateMapping(result => {
|
299 | process.stdout.write(JSON.stringify(result));
|
300 | });
|
301 | }
|