UNPKG

13.3 kBJavaScriptView Raw
1/*! https://mths.be/punycode v1.4.1 by @mathias */
2
3/** Highest positive signed 32-bit float value */
4var maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
5
6/** Bootstring parameters */
7var base = 36;
8var tMin = 1;
9var tMax = 26;
10var skew = 38;
11var damp = 700;
12var initialBias = 72;
13var initialN = 128; // 0x80
14var delimiter = '-'; // '\x2D'
15
16/** Regular expressions */
17var regexPunycode = /^xn--/;
18var regexNonASCII = /[^\x20-\x7E]/; // unprintable ASCII chars + non-ASCII chars
19var regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
20
21/** Error messages */
22var errors = {
23 'overflow': 'Overflow: input needs wider integers to process',
24 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
25 'invalid-input': 'Invalid input'
26};
27
28/** Convenience shortcuts */
29var baseMinusTMin = base - tMin;
30var floor = Math.floor;
31var stringFromCharCode = String.fromCharCode;
32
33/*--------------------------------------------------------------------------*/
34
35/**
36 * A generic error utility function.
37 * @private
38 * @param {String} type The error type.
39 * @returns {Error} Throws a `RangeError` with the applicable error message.
40 */
41function error(type) {
42 throw new RangeError(errors[type]);
43}
44
45/**
46 * A generic `Array#map` utility function.
47 * @private
48 * @param {Array} array The array to iterate over.
49 * @param {Function} callback The function that gets called for every array
50 * item.
51 * @returns {Array} A new array of values returned by the callback function.
52 */
53function map(array, fn) {
54 var length = array.length;
55 var result = [];
56 while (length--) {
57 result[length] = fn(array[length]);
58 }
59 return result;
60}
61
62/**
63 * A simple `Array#map`-like wrapper to work with domain name strings or email
64 * addresses.
65 * @private
66 * @param {String} domain The domain name or email address.
67 * @param {Function} callback The function that gets called for every
68 * character.
69 * @returns {Array} A new string of characters returned by the callback
70 * function.
71 */
72function mapDomain(string, fn) {
73 var parts = string.split('@');
74 var result = '';
75 if (parts.length > 1) {
76 // In email addresses, only the domain name should be punycoded. Leave
77 // the local part (i.e. everything up to `@`) intact.
78 result = parts[0] + '@';
79 string = parts[1];
80 }
81 // Avoid `split(regex)` for IE8 compatibility. See #17.
82 string = string.replace(regexSeparators, '\x2E');
83 var labels = string.split('.');
84 var encoded = map(labels, fn).join('.');
85 return result + encoded;
86}
87
88/**
89 * Creates an array containing the numeric code points of each Unicode
90 * character in the string. While JavaScript uses UCS-2 internally,
91 * this function will convert a pair of surrogate halves (each of which
92 * UCS-2 exposes as separate characters) into a single code point,
93 * matching UTF-16.
94 * @see `punycode.ucs2.encode`
95 * @see <https://mathiasbynens.be/notes/javascript-encoding>
96 * @memberOf punycode.ucs2
97 * @name decode
98 * @param {String} string The Unicode input string (UCS-2).
99 * @returns {Array} The new array of code points.
100 */
101function ucs2decode(string) {
102 var output = [],
103 counter = 0,
104 length = string.length,
105 value,
106 extra;
107 while (counter < length) {
108 value = string.charCodeAt(counter++);
109 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
110 // high surrogate, and there is a next character
111 extra = string.charCodeAt(counter++);
112 if ((extra & 0xFC00) == 0xDC00) { // low surrogate
113 output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
114 } else {
115 // unmatched surrogate; only append this code unit, in case the next
116 // code unit is the high surrogate of a surrogate pair
117 output.push(value);
118 counter--;
119 }
120 } else {
121 output.push(value);
122 }
123 }
124 return output;
125}
126
127/**
128 * Creates a string based on an array of numeric code points.
129 * @see `punycode.ucs2.decode`
130 * @memberOf punycode.ucs2
131 * @name encode
132 * @param {Array} codePoints The array of numeric code points.
133 * @returns {String} The new Unicode string (UCS-2).
134 */
135function ucs2encode(array) {
136 return map(array, function(value) {
137 var output = '';
138 if (value > 0xFFFF) {
139 value -= 0x10000;
140 output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
141 value = 0xDC00 | value & 0x3FF;
142 }
143 output += stringFromCharCode(value);
144 return output;
145 }).join('');
146}
147
148/**
149 * Converts a basic code point into a digit/integer.
150 * @see `digitToBasic()`
151 * @private
152 * @param {Number} codePoint The basic numeric code point value.
153 * @returns {Number} The numeric value of a basic code point (for use in
154 * representing integers) in the range `0` to `base - 1`, or `base` if
155 * the code point does not represent a value.
156 */
157function basicToDigit(codePoint) {
158 if (codePoint - 48 < 10) {
159 return codePoint - 22;
160 }
161 if (codePoint - 65 < 26) {
162 return codePoint - 65;
163 }
164 if (codePoint - 97 < 26) {
165 return codePoint - 97;
166 }
167 return base;
168}
169
170/**
171 * Converts a digit/integer into a basic code point.
172 * @see `basicToDigit()`
173 * @private
174 * @param {Number} digit The numeric value of a basic code point.
175 * @returns {Number} The basic code point whose value (when used for
176 * representing integers) is `digit`, which needs to be in the range
177 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
178 * used; else, the lowercase form is used. The behavior is undefined
179 * if `flag` is non-zero and `digit` has no uppercase form.
180 */
181function digitToBasic(digit, flag) {
182 // 0..25 map to ASCII a..z or A..Z
183 // 26..35 map to ASCII 0..9
184 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
185}
186
187/**
188 * Bias adaptation function as per section 3.4 of RFC 3492.
189 * https://tools.ietf.org/html/rfc3492#section-3.4
190 * @private
191 */
192function adapt(delta, numPoints, firstTime) {
193 var k = 0;
194 delta = firstTime ? floor(delta / damp) : delta >> 1;
195 delta += floor(delta / numPoints);
196 for ( /* no initialization */ ; delta > baseMinusTMin * tMax >> 1; k += base) {
197 delta = floor(delta / baseMinusTMin);
198 }
199 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
200}
201
202/**
203 * Converts a Punycode string of ASCII-only symbols to a string of Unicode
204 * symbols.
205 * @memberOf punycode
206 * @param {String} input The Punycode string of ASCII-only symbols.
207 * @returns {String} The resulting string of Unicode symbols.
208 */
209export function decode(input) {
210 // Don't use UCS-2
211 var output = [],
212 inputLength = input.length,
213 out,
214 i = 0,
215 n = initialN,
216 bias = initialBias,
217 basic,
218 j,
219 index,
220 oldi,
221 w,
222 k,
223 digit,
224 t,
225 /** Cached calculation results */
226 baseMinusT;
227
228 // Handle the basic code points: let `basic` be the number of input code
229 // points before the last delimiter, or `0` if there is none, then copy
230 // the first basic code points to the output.
231
232 basic = input.lastIndexOf(delimiter);
233 if (basic < 0) {
234 basic = 0;
235 }
236
237 for (j = 0; j < basic; ++j) {
238 // if it's not a basic code point
239 if (input.charCodeAt(j) >= 0x80) {
240 error('not-basic');
241 }
242 output.push(input.charCodeAt(j));
243 }
244
245 // Main decoding loop: start just after the last delimiter if any basic code
246 // points were copied; start at the beginning otherwise.
247
248 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */ ) {
249
250 // `index` is the index of the next character to be consumed.
251 // Decode a generalized variable-length integer into `delta`,
252 // which gets added to `i`. The overflow checking is easier
253 // if we increase `i` as we go, then subtract off its starting
254 // value at the end to obtain `delta`.
255 for (oldi = i, w = 1, k = base; /* no condition */ ; k += base) {
256
257 if (index >= inputLength) {
258 error('invalid-input');
259 }
260
261 digit = basicToDigit(input.charCodeAt(index++));
262
263 if (digit >= base || digit > floor((maxInt - i) / w)) {
264 error('overflow');
265 }
266
267 i += digit * w;
268 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
269
270 if (digit < t) {
271 break;
272 }
273
274 baseMinusT = base - t;
275 if (w > floor(maxInt / baseMinusT)) {
276 error('overflow');
277 }
278
279 w *= baseMinusT;
280
281 }
282
283 out = output.length + 1;
284 bias = adapt(i - oldi, out, oldi == 0);
285
286 // `i` was supposed to wrap around from `out` to `0`,
287 // incrementing `n` each time, so we'll fix that now:
288 if (floor(i / out) > maxInt - n) {
289 error('overflow');
290 }
291
292 n += floor(i / out);
293 i %= out;
294
295 // Insert `n` at position `i` of the output
296 output.splice(i++, 0, n);
297
298 }
299
300 return ucs2encode(output);
301}
302
303/**
304 * Converts a string of Unicode symbols (e.g. a domain name label) to a
305 * Punycode string of ASCII-only symbols.
306 * @memberOf punycode
307 * @param {String} input The string of Unicode symbols.
308 * @returns {String} The resulting Punycode string of ASCII-only symbols.
309 */
310export function encode(input) {
311 var n,
312 delta,
313 handledCPCount,
314 basicLength,
315 bias,
316 j,
317 m,
318 q,
319 k,
320 t,
321 currentValue,
322 output = [],
323 /** `inputLength` will hold the number of code points in `input`. */
324 inputLength,
325 /** Cached calculation results */
326 handledCPCountPlusOne,
327 baseMinusT,
328 qMinusT;
329
330 // Convert the input in UCS-2 to Unicode
331 input = ucs2decode(input);
332
333 // Cache the length
334 inputLength = input.length;
335
336 // Initialize the state
337 n = initialN;
338 delta = 0;
339 bias = initialBias;
340
341 // Handle the basic code points
342 for (j = 0; j < inputLength; ++j) {
343 currentValue = input[j];
344 if (currentValue < 0x80) {
345 output.push(stringFromCharCode(currentValue));
346 }
347 }
348
349 handledCPCount = basicLength = output.length;
350
351 // `handledCPCount` is the number of code points that have been handled;
352 // `basicLength` is the number of basic code points.
353
354 // Finish the basic string - if it is not empty - with a delimiter
355 if (basicLength) {
356 output.push(delimiter);
357 }
358
359 // Main encoding loop:
360 while (handledCPCount < inputLength) {
361
362 // All non-basic code points < n have been handled already. Find the next
363 // larger one:
364 for (m = maxInt, j = 0; j < inputLength; ++j) {
365 currentValue = input[j];
366 if (currentValue >= n && currentValue < m) {
367 m = currentValue;
368 }
369 }
370
371 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
372 // but guard against overflow
373 handledCPCountPlusOne = handledCPCount + 1;
374 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
375 error('overflow');
376 }
377
378 delta += (m - n) * handledCPCountPlusOne;
379 n = m;
380
381 for (j = 0; j < inputLength; ++j) {
382 currentValue = input[j];
383
384 if (currentValue < n && ++delta > maxInt) {
385 error('overflow');
386 }
387
388 if (currentValue == n) {
389 // Represent delta as a generalized variable-length integer
390 for (q = delta, k = base; /* no condition */ ; k += base) {
391 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
392 if (q < t) {
393 break;
394 }
395 qMinusT = q - t;
396 baseMinusT = base - t;
397 output.push(
398 stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
399 );
400 q = floor(qMinusT / baseMinusT);
401 }
402
403 output.push(stringFromCharCode(digitToBasic(q, 0)));
404 bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
405 delta = 0;
406 ++handledCPCount;
407 }
408 }
409
410 ++delta;
411 ++n;
412
413 }
414 return output.join('');
415}
416
417/**
418 * Converts a Punycode string representing a domain name or an email address
419 * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
420 * it doesn't matter if you call it on a string that has already been
421 * converted to Unicode.
422 * @memberOf punycode
423 * @param {String} input The Punycoded domain name or email address to
424 * convert to Unicode.
425 * @returns {String} The Unicode representation of the given Punycode
426 * string.
427 */
428export function toUnicode(input) {
429 return mapDomain(input, function(string) {
430 return regexPunycode.test(string) ?
431 decode(string.slice(4).toLowerCase()) :
432 string;
433 });
434}
435
436/**
437 * Converts a Unicode string representing a domain name or an email address to
438 * Punycode. Only the non-ASCII parts of the domain name will be converted,
439 * i.e. it doesn't matter if you call it with a domain that's already in
440 * ASCII.
441 * @memberOf punycode
442 * @param {String} input The domain name or email address to convert, as a
443 * Unicode string.
444 * @returns {String} The Punycode representation of the given domain name or
445 * email address.
446 */
447export function toASCII(input) {
448 return mapDomain(input, function(string) {
449 return regexNonASCII.test(string) ?
450 'xn--' + encode(string) :
451 string;
452 });
453}
454export var version = '1.4.1';
455/**
456 * An object of methods to convert from JavaScript's internal character
457 * representation (UCS-2) to Unicode code points, and back.
458 * @see <https://mathiasbynens.be/notes/javascript-encoding>
459 * @memberOf punycode
460 * @type Object
461 */
462
463export var ucs2 = {
464 decode: ucs2decode,
465 encode: ucs2encode
466};
467export default {
468 version: version,
469 ucs2: ucs2,
470 toASCII: toASCII,
471 toUnicode: toUnicode,
472 encode: encode,
473 decode: decode
474}