UNPKG

15.2 kBJavaScriptView Raw
1// Sect 6.2 Language Tags
2// ======================
3
4import {
5 expBCP47Syntax,
6 expExtSequences,
7 expVariantDupes,
8 expSingletonDupes,
9} from './exp';
10
11import {
12 hop,
13 arrJoin,
14 arrSlice,
15} from "./util.js";
16
17// Default locale is the first-added locale data for us
18export let defaultLocale;
19export function setDefaultLocale(locale) {
20 defaultLocale = locale;
21}
22
23// IANA Subtag Registry redundant tag and subtag maps
24const redundantTags = {
25 tags: {
26 "art-lojban": "jbo",
27 "i-ami": "ami",
28 "i-bnn": "bnn",
29 "i-hak": "hak",
30 "i-klingon": "tlh",
31 "i-lux": "lb",
32 "i-navajo": "nv",
33 "i-pwn": "pwn",
34 "i-tao": "tao",
35 "i-tay": "tay",
36 "i-tsu": "tsu",
37 "no-bok": "nb",
38 "no-nyn": "nn",
39 "sgn-BE-FR": "sfb",
40 "sgn-BE-NL": "vgt",
41 "sgn-CH-DE": "sgg",
42 "zh-guoyu": "cmn",
43 "zh-hakka": "hak",
44 "zh-min-nan": "nan",
45 "zh-xiang": "hsn",
46 "sgn-BR": "bzs",
47 "sgn-CO": "csn",
48 "sgn-DE": "gsg",
49 "sgn-DK": "dsl",
50 "sgn-ES": "ssp",
51 "sgn-FR": "fsl",
52 "sgn-GB": "bfi",
53 "sgn-GR": "gss",
54 "sgn-IE": "isg",
55 "sgn-IT": "ise",
56 "sgn-JP": "jsl",
57 "sgn-MX": "mfs",
58 "sgn-NI": "ncs",
59 "sgn-NL": "dse",
60 "sgn-NO": "nsl",
61 "sgn-PT": "psr",
62 "sgn-SE": "swl",
63 "sgn-US": "ase",
64 "sgn-ZA": "sfs",
65 "zh-cmn": "cmn",
66 "zh-cmn-Hans": "cmn-Hans",
67 "zh-cmn-Hant": "cmn-Hant",
68 "zh-gan": "gan",
69 "zh-wuu": "wuu",
70 "zh-yue": "yue",
71 },
72 subtags: {
73 BU: "MM",
74 DD: "DE",
75 FX: "FR",
76 TP: "TL",
77 YD: "YE",
78 ZR: "CD",
79 heploc: "alalc97",
80 'in': "id",
81 iw: "he",
82 ji: "yi",
83 jw: "jv",
84 mo: "ro",
85 ayx: "nun",
86 bjd: "drl",
87 ccq: "rki",
88 cjr: "mom",
89 cka: "cmr",
90 cmk: "xch",
91 drh: "khk",
92 drw: "prs",
93 gav: "dev",
94 hrr: "jal",
95 ibi: "opa",
96 kgh: "kml",
97 lcq: "ppr",
98 mst: "mry",
99 myt: "mry",
100 sca: "hle",
101 tie: "ras",
102 tkk: "twm",
103 tlw: "weo",
104 tnf: "prs",
105 ybd: "rki",
106 yma: "lrr",
107 },
108 extLang: {
109 aao: ["aao", "ar"],
110 abh: ["abh", "ar"],
111 abv: ["abv", "ar"],
112 acm: ["acm", "ar"],
113 acq: ["acq", "ar"],
114 acw: ["acw", "ar"],
115 acx: ["acx", "ar"],
116 acy: ["acy", "ar"],
117 adf: ["adf", "ar"],
118 ads: ["ads", "sgn"],
119 aeb: ["aeb", "ar"],
120 aec: ["aec", "ar"],
121 aed: ["aed", "sgn"],
122 aen: ["aen", "sgn"],
123 afb: ["afb", "ar"],
124 afg: ["afg", "sgn"],
125 ajp: ["ajp", "ar"],
126 apc: ["apc", "ar"],
127 apd: ["apd", "ar"],
128 arb: ["arb", "ar"],
129 arq: ["arq", "ar"],
130 ars: ["ars", "ar"],
131 ary: ["ary", "ar"],
132 arz: ["arz", "ar"],
133 ase: ["ase", "sgn"],
134 asf: ["asf", "sgn"],
135 asp: ["asp", "sgn"],
136 asq: ["asq", "sgn"],
137 asw: ["asw", "sgn"],
138 auz: ["auz", "ar"],
139 avl: ["avl", "ar"],
140 ayh: ["ayh", "ar"],
141 ayl: ["ayl", "ar"],
142 ayn: ["ayn", "ar"],
143 ayp: ["ayp", "ar"],
144 bbz: ["bbz", "ar"],
145 bfi: ["bfi", "sgn"],
146 bfk: ["bfk", "sgn"],
147 bjn: ["bjn", "ms"],
148 bog: ["bog", "sgn"],
149 bqn: ["bqn", "sgn"],
150 bqy: ["bqy", "sgn"],
151 btj: ["btj", "ms"],
152 bve: ["bve", "ms"],
153 bvl: ["bvl", "sgn"],
154 bvu: ["bvu", "ms"],
155 bzs: ["bzs", "sgn"],
156 cdo: ["cdo", "zh"],
157 cds: ["cds", "sgn"],
158 cjy: ["cjy", "zh"],
159 cmn: ["cmn", "zh"],
160 coa: ["coa", "ms"],
161 cpx: ["cpx", "zh"],
162 csc: ["csc", "sgn"],
163 csd: ["csd", "sgn"],
164 cse: ["cse", "sgn"],
165 csf: ["csf", "sgn"],
166 csg: ["csg", "sgn"],
167 csl: ["csl", "sgn"],
168 csn: ["csn", "sgn"],
169 csq: ["csq", "sgn"],
170 csr: ["csr", "sgn"],
171 czh: ["czh", "zh"],
172 czo: ["czo", "zh"],
173 doq: ["doq", "sgn"],
174 dse: ["dse", "sgn"],
175 dsl: ["dsl", "sgn"],
176 dup: ["dup", "ms"],
177 ecs: ["ecs", "sgn"],
178 esl: ["esl", "sgn"],
179 esn: ["esn", "sgn"],
180 eso: ["eso", "sgn"],
181 eth: ["eth", "sgn"],
182 fcs: ["fcs", "sgn"],
183 fse: ["fse", "sgn"],
184 fsl: ["fsl", "sgn"],
185 fss: ["fss", "sgn"],
186 gan: ["gan", "zh"],
187 gds: ["gds", "sgn"],
188 gom: ["gom", "kok"],
189 gse: ["gse", "sgn"],
190 gsg: ["gsg", "sgn"],
191 gsm: ["gsm", "sgn"],
192 gss: ["gss", "sgn"],
193 gus: ["gus", "sgn"],
194 hab: ["hab", "sgn"],
195 haf: ["haf", "sgn"],
196 hak: ["hak", "zh"],
197 hds: ["hds", "sgn"],
198 hji: ["hji", "ms"],
199 hks: ["hks", "sgn"],
200 hos: ["hos", "sgn"],
201 hps: ["hps", "sgn"],
202 hsh: ["hsh", "sgn"],
203 hsl: ["hsl", "sgn"],
204 hsn: ["hsn", "zh"],
205 icl: ["icl", "sgn"],
206 ils: ["ils", "sgn"],
207 inl: ["inl", "sgn"],
208 ins: ["ins", "sgn"],
209 ise: ["ise", "sgn"],
210 isg: ["isg", "sgn"],
211 isr: ["isr", "sgn"],
212 jak: ["jak", "ms"],
213 jax: ["jax", "ms"],
214 jcs: ["jcs", "sgn"],
215 jhs: ["jhs", "sgn"],
216 jls: ["jls", "sgn"],
217 jos: ["jos", "sgn"],
218 jsl: ["jsl", "sgn"],
219 jus: ["jus", "sgn"],
220 kgi: ["kgi", "sgn"],
221 knn: ["knn", "kok"],
222 kvb: ["kvb", "ms"],
223 kvk: ["kvk", "sgn"],
224 kvr: ["kvr", "ms"],
225 kxd: ["kxd", "ms"],
226 lbs: ["lbs", "sgn"],
227 lce: ["lce", "ms"],
228 lcf: ["lcf", "ms"],
229 liw: ["liw", "ms"],
230 lls: ["lls", "sgn"],
231 lsg: ["lsg", "sgn"],
232 lsl: ["lsl", "sgn"],
233 lso: ["lso", "sgn"],
234 lsp: ["lsp", "sgn"],
235 lst: ["lst", "sgn"],
236 lsy: ["lsy", "sgn"],
237 ltg: ["ltg", "lv"],
238 lvs: ["lvs", "lv"],
239 lzh: ["lzh", "zh"],
240 max: ["max", "ms"],
241 mdl: ["mdl", "sgn"],
242 meo: ["meo", "ms"],
243 mfa: ["mfa", "ms"],
244 mfb: ["mfb", "ms"],
245 mfs: ["mfs", "sgn"],
246 min: ["min", "ms"],
247 mnp: ["mnp", "zh"],
248 mqg: ["mqg", "ms"],
249 mre: ["mre", "sgn"],
250 msd: ["msd", "sgn"],
251 msi: ["msi", "ms"],
252 msr: ["msr", "sgn"],
253 mui: ["mui", "ms"],
254 mzc: ["mzc", "sgn"],
255 mzg: ["mzg", "sgn"],
256 mzy: ["mzy", "sgn"],
257 nan: ["nan", "zh"],
258 nbs: ["nbs", "sgn"],
259 ncs: ["ncs", "sgn"],
260 nsi: ["nsi", "sgn"],
261 nsl: ["nsl", "sgn"],
262 nsp: ["nsp", "sgn"],
263 nsr: ["nsr", "sgn"],
264 nzs: ["nzs", "sgn"],
265 okl: ["okl", "sgn"],
266 orn: ["orn", "ms"],
267 ors: ["ors", "ms"],
268 pel: ["pel", "ms"],
269 pga: ["pga", "ar"],
270 pks: ["pks", "sgn"],
271 prl: ["prl", "sgn"],
272 prz: ["prz", "sgn"],
273 psc: ["psc", "sgn"],
274 psd: ["psd", "sgn"],
275 pse: ["pse", "ms"],
276 psg: ["psg", "sgn"],
277 psl: ["psl", "sgn"],
278 pso: ["pso", "sgn"],
279 psp: ["psp", "sgn"],
280 psr: ["psr", "sgn"],
281 pys: ["pys", "sgn"],
282 rms: ["rms", "sgn"],
283 rsi: ["rsi", "sgn"],
284 rsl: ["rsl", "sgn"],
285 sdl: ["sdl", "sgn"],
286 sfb: ["sfb", "sgn"],
287 sfs: ["sfs", "sgn"],
288 sgg: ["sgg", "sgn"],
289 sgx: ["sgx", "sgn"],
290 shu: ["shu", "ar"],
291 slf: ["slf", "sgn"],
292 sls: ["sls", "sgn"],
293 sqk: ["sqk", "sgn"],
294 sqs: ["sqs", "sgn"],
295 ssh: ["ssh", "ar"],
296 ssp: ["ssp", "sgn"],
297 ssr: ["ssr", "sgn"],
298 svk: ["svk", "sgn"],
299 swc: ["swc", "sw"],
300 swh: ["swh", "sw"],
301 swl: ["swl", "sgn"],
302 syy: ["syy", "sgn"],
303 tmw: ["tmw", "ms"],
304 tse: ["tse", "sgn"],
305 tsm: ["tsm", "sgn"],
306 tsq: ["tsq", "sgn"],
307 tss: ["tss", "sgn"],
308 tsy: ["tsy", "sgn"],
309 tza: ["tza", "sgn"],
310 ugn: ["ugn", "sgn"],
311 ugy: ["ugy", "sgn"],
312 ukl: ["ukl", "sgn"],
313 uks: ["uks", "sgn"],
314 urk: ["urk", "ms"],
315 uzn: ["uzn", "uz"],
316 uzs: ["uzs", "uz"],
317 vgt: ["vgt", "sgn"],
318 vkk: ["vkk", "ms"],
319 vkt: ["vkt", "ms"],
320 vsi: ["vsi", "sgn"],
321 vsl: ["vsl", "sgn"],
322 vsv: ["vsv", "sgn"],
323 wuu: ["wuu", "zh"],
324 xki: ["xki", "sgn"],
325 xml: ["xml", "sgn"],
326 xmm: ["xmm", "ms"],
327 xms: ["xms", "sgn"],
328 yds: ["yds", "sgn"],
329 ysl: ["ysl", "sgn"],
330 yue: ["yue", "zh"],
331 zib: ["zib", "sgn"],
332 zlm: ["zlm", "ms"],
333 zmi: ["zmi", "ms"],
334 zsl: ["zsl", "sgn"],
335 zsm: ["zsm", "ms"],
336 },
337};
338
339/**
340 * Convert only a-z to uppercase as per section 6.1 of the spec
341 */
342export function toLatinUpperCase (str) {
343 let i = str.length;
344
345 while (i--) {
346 let ch = str.charAt(i);
347
348 if (ch >= "a" && ch <= "z")
349 str = str.slice(0, i) + ch.toUpperCase() + str.slice(i+1);
350 }
351
352 return str;
353}
354
355/**
356 * The IsStructurallyValidLanguageTag abstract operation verifies that the locale
357 * argument (which must be a String value)
358 *
359 * - represents a well-formed BCP 47 language tag as specified in RFC 5646 section
360 * 2.1, or successor,
361 * - does not include duplicate variant subtags, and
362 * - does not include duplicate singleton subtags.
363 *
364 * The abstract operation returns true if locale can be generated from the ABNF
365 * grammar in section 2.1 of the RFC, starting with Language-Tag, and does not
366 * contain duplicate variant or singleton subtags (other than as a private use
367 * subtag). It returns false otherwise. Terminal value characters in the grammar are
368 * interpreted as the Unicode equivalents of the ASCII octet values given.
369 */
370export function /* 6.2.2 */IsStructurallyValidLanguageTag(locale) {
371 // represents a well-formed BCP 47 language tag as specified in RFC 5646
372 if (!expBCP47Syntax.test(locale))
373 return false;
374
375 // does not include duplicate variant subtags, and
376 if (expVariantDupes.test(locale))
377 return false;
378
379 // does not include duplicate singleton subtags.
380 if (expSingletonDupes.test(locale))
381 return false;
382
383 return true;
384}
385
386/**
387 * The CanonicalizeLanguageTag abstract operation returns the canonical and case-
388 * regularized form of the locale argument (which must be a String value that is
389 * a structurally valid BCP 47 language tag as verified by the
390 * IsStructurallyValidLanguageTag abstract operation). It takes the steps
391 * specified in RFC 5646 section 4.5, or successor, to bring the language tag
392 * into canonical form, and to regularize the case of the subtags, but does not
393 * take the steps to bring a language tag into “extlang form” and to reorder
394 * variant subtags.
395
396 * The specifications for extensions to BCP 47 language tags, such as RFC 6067,
397 * may include canonicalization rules for the extension subtag sequences they
398 * define that go beyond the canonicalization rules of RFC 5646 section 4.5.
399 * Implementations are allowed, but not required, to apply these additional rules.
400 */
401export function /* 6.2.3 */CanonicalizeLanguageTag (locale) {
402 let match, parts;
403
404 // A language tag is in 'canonical form' when the tag is well-formed
405 // according to the rules in Sections 2.1 and 2.2
406
407 // Section 2.1 says all subtags use lowercase...
408 locale = locale.toLowerCase();
409
410 // ...with 2 exceptions: 'two-letter and four-letter subtags that neither
411 // appear at the start of the tag nor occur after singletons. Such two-letter
412 // subtags are all uppercase (as in the tags "en-CA-x-ca" or "sgn-BE-FR") and
413 // four-letter subtags are titlecase (as in the tag "az-Latn-x-latn").
414 parts = locale.split('-');
415 for (let i = 1, max = parts.length; i < max; i++) {
416 // Two-letter subtags are all uppercase
417 if (parts[i].length === 2)
418 parts[i] = parts[i].toUpperCase();
419
420 // Four-letter subtags are titlecase
421 else if (parts[i].length === 4)
422 parts[i] = parts[i].charAt(0).toUpperCase() + parts[i].slice(1);
423
424 // Is it a singleton?
425 else if (parts[i].length === 1 && parts[i] !== 'x')
426 break;
427 }
428 locale = arrJoin.call(parts, '-');
429
430 // The steps laid out in RFC 5646 section 4.5 are as follows:
431
432 // 1. Extension sequences are ordered into case-insensitive ASCII order
433 // by singleton subtag.
434 if ((match = locale.match(expExtSequences)) && match.length > 1) {
435 // The built-in sort() sorts by ASCII order, so use that
436 match.sort();
437
438 // Replace all extensions with the joined, sorted array
439 locale = locale.replace(
440 RegExp('(?:' + expExtSequences.source + ')+', 'i'),
441 arrJoin.call(match, '')
442 );
443 }
444
445 // 2. Redundant or grandfathered tags are replaced by their 'Preferred-
446 // Value', if there is one.
447 if (hop.call(redundantTags.tags, locale))
448 locale = redundantTags.tags[locale];
449
450 // 3. Subtags are replaced by their 'Preferred-Value', if there is one.
451 // For extlangs, the original primary language subtag is also
452 // replaced if there is a primary language subtag in the 'Preferred-
453 // Value'.
454 parts = locale.split('-');
455
456 for (let i = 1, max = parts.length; i < max; i++) {
457 if (hop.call(redundantTags.subtags, parts[i]))
458 parts[i] = redundantTags.subtags[parts[i]];
459
460 else if (hop.call(redundantTags.extLang, parts[i])) {
461 parts[i] = redundantTags.extLang[parts[i]][0];
462
463 // For extlang tags, the prefix needs to be removed if it is redundant
464 if (i === 1 && redundantTags.extLang[parts[1]][1] === parts[0]) {
465 parts = arrSlice.call(parts, i++);
466 max -= 1;
467 }
468 }
469 }
470
471 return arrJoin.call(parts, '-');
472}
473
474/**
475 * The DefaultLocale abstract operation returns a String value representing the
476 * structurally valid (6.2.2) and canonicalized (6.2.3) BCP 47 language tag for the
477 * host environment’s current locale.
478 */
479export function /* 6.2.4 */DefaultLocale () {
480 return defaultLocale;
481}
482
483// Sect 6.3 Currency Codes
484// =======================
485
486const expCurrencyCode = /^[A-Z]{3}$/;
487
488/**
489 * The IsWellFormedCurrencyCode abstract operation verifies that the currency argument
490 * (after conversion to a String value) represents a well-formed 3-letter ISO currency
491 * code. The following steps are taken:
492 */
493export function /* 6.3.1 */IsWellFormedCurrencyCode(currency) {
494 // 1. Let `c` be ToString(currency)
495 let c = String(currency);
496
497 // 2. Let `normalized` be the result of mapping c to upper case as described
498 // in 6.1.
499 let normalized = toLatinUpperCase(c);
500
501 // 3. If the string length of normalized is not 3, return false.
502 // 4. If normalized contains any character that is not in the range "A" to "Z"
503 // (U+0041 to U+005A), return false.
504 if (expCurrencyCode.test(normalized) === false)
505 return false;
506
507 // 5. Return true
508 return true;
509}