UNPKG

13.3 kBJavaScriptView Raw
1/*! https://mths.be/punycode v1.4.1 by @mathias */
2
3
4/** Highest positive signed 32-bit float value */
5var maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
6
7/** Bootstring parameters */
8var base = 36;
9var tMin = 1;
10var tMax = 26;
11var skew = 38;
12var damp = 700;
13var initialBias = 72;
14var initialN = 128; // 0x80
15var delimiter = '-'; // '\x2D'
16
17/** Regular expressions */
18var regexPunycode = /^xn--/;
19var regexNonASCII = /[^\x20-\x7E]/; // unprintable ASCII chars + non-ASCII chars
20var regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
21
22/** Error messages */
23var errors = {
24 'overflow': 'Overflow: input needs wider integers to process',
25 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
26 'invalid-input': 'Invalid input'
27};
28
29/** Convenience shortcuts */
30var baseMinusTMin = base - tMin;
31var floor = Math.floor;
32var stringFromCharCode = String.fromCharCode;
33
34/*--------------------------------------------------------------------------*/
35
36/**
37 * A generic error utility function.
38 * @private
39 * @param {String} type The error type.
40 * @returns {Error} Throws a `RangeError` with the applicable error message.
41 */
42function error(type) {
43 throw new RangeError(errors[type]);
44}
45
46/**
47 * A generic `Array#map` utility function.
48 * @private
49 * @param {Array} array The array to iterate over.
50 * @param {Function} callback The function that gets called for every array
51 * item.
52 * @returns {Array} A new array of values returned by the callback function.
53 */
54function map(array, fn) {
55 var length = array.length;
56 var result = [];
57 while (length--) {
58 result[length] = fn(array[length]);
59 }
60 return result;
61}
62
63/**
64 * A simple `Array#map`-like wrapper to work with domain name strings or email
65 * addresses.
66 * @private
67 * @param {String} domain The domain name or email address.
68 * @param {Function} callback The function that gets called for every
69 * character.
70 * @returns {Array} A new string of characters returned by the callback
71 * function.
72 */
73function mapDomain(string, fn) {
74 var parts = string.split('@');
75 var result = '';
76 if (parts.length > 1) {
77 // In email addresses, only the domain name should be punycoded. Leave
78 // the local part (i.e. everything up to `@`) intact.
79 result = parts[0] + '@';
80 string = parts[1];
81 }
82 // Avoid `split(regex)` for IE8 compatibility. See #17.
83 string = string.replace(regexSeparators, '\x2E');
84 var labels = string.split('.');
85 var encoded = map(labels, fn).join('.');
86 return result + encoded;
87}
88
89/**
90 * Creates an array containing the numeric code points of each Unicode
91 * character in the string. While JavaScript uses UCS-2 internally,
92 * this function will convert a pair of surrogate halves (each of which
93 * UCS-2 exposes as separate characters) into a single code point,
94 * matching UTF-16.
95 * @see `punycode.ucs2.encode`
96 * @see <https://mathiasbynens.be/notes/javascript-encoding>
97 * @memberOf punycode.ucs2
98 * @name decode
99 * @param {String} string The Unicode input string (UCS-2).
100 * @returns {Array} The new array of code points.
101 */
102function ucs2decode(string) {
103 var output = [],
104 counter = 0,
105 length = string.length,
106 value,
107 extra;
108 while (counter < length) {
109 value = string.charCodeAt(counter++);
110 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
111 // high surrogate, and there is a next character
112 extra = string.charCodeAt(counter++);
113 if ((extra & 0xFC00) == 0xDC00) { // low surrogate
114 output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
115 } else {
116 // unmatched surrogate; only append this code unit, in case the next
117 // code unit is the high surrogate of a surrogate pair
118 output.push(value);
119 counter--;
120 }
121 } else {
122 output.push(value);
123 }
124 }
125 return output;
126}
127
128/**
129 * Creates a string based on an array of numeric code points.
130 * @see `punycode.ucs2.decode`
131 * @memberOf punycode.ucs2
132 * @name encode
133 * @param {Array} codePoints The array of numeric code points.
134 * @returns {String} The new Unicode string (UCS-2).
135 */
136function ucs2encode(array) {
137 return map(array, function(value) {
138 var output = '';
139 if (value > 0xFFFF) {
140 value -= 0x10000;
141 output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
142 value = 0xDC00 | value & 0x3FF;
143 }
144 output += stringFromCharCode(value);
145 return output;
146 }).join('');
147}
148
149/**
150 * Converts a basic code point into a digit/integer.
151 * @see `digitToBasic()`
152 * @private
153 * @param {Number} codePoint The basic numeric code point value.
154 * @returns {Number} The numeric value of a basic code point (for use in
155 * representing integers) in the range `0` to `base - 1`, or `base` if
156 * the code point does not represent a value.
157 */
158function basicToDigit(codePoint) {
159 if (codePoint - 48 < 10) {
160 return codePoint - 22;
161 }
162 if (codePoint - 65 < 26) {
163 return codePoint - 65;
164 }
165 if (codePoint - 97 < 26) {
166 return codePoint - 97;
167 }
168 return base;
169}
170
171/**
172 * Converts a digit/integer into a basic code point.
173 * @see `basicToDigit()`
174 * @private
175 * @param {Number} digit The numeric value of a basic code point.
176 * @returns {Number} The basic code point whose value (when used for
177 * representing integers) is `digit`, which needs to be in the range
178 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
179 * used; else, the lowercase form is used. The behavior is undefined
180 * if `flag` is non-zero and `digit` has no uppercase form.
181 */
182function digitToBasic(digit, flag) {
183 // 0..25 map to ASCII a..z or A..Z
184 // 26..35 map to ASCII 0..9
185 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
186}
187
188/**
189 * Bias adaptation function as per section 3.4 of RFC 3492.
190 * https://tools.ietf.org/html/rfc3492#section-3.4
191 * @private
192 */
193function adapt(delta, numPoints, firstTime) {
194 var k = 0;
195 delta = firstTime ? floor(delta / damp) : delta >> 1;
196 delta += floor(delta / numPoints);
197 for ( /* no initialization */ ; delta > baseMinusTMin * tMax >> 1; k += base) {
198 delta = floor(delta / baseMinusTMin);
199 }
200 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
201}
202
203/**
204 * Converts a Punycode string of ASCII-only symbols to a string of Unicode
205 * symbols.
206 * @memberOf punycode
207 * @param {String} input The Punycode string of ASCII-only symbols.
208 * @returns {String} The resulting string of Unicode symbols.
209 */
210export function decode(input) {
211 // Don't use UCS-2
212 var output = [],
213 inputLength = input.length,
214 out,
215 i = 0,
216 n = initialN,
217 bias = initialBias,
218 basic,
219 j,
220 index,
221 oldi,
222 w,
223 k,
224 digit,
225 t,
226 /** Cached calculation results */
227 baseMinusT;
228
229 // Handle the basic code points: let `basic` be the number of input code
230 // points before the last delimiter, or `0` if there is none, then copy
231 // the first basic code points to the output.
232
233 basic = input.lastIndexOf(delimiter);
234 if (basic < 0) {
235 basic = 0;
236 }
237
238 for (j = 0; j < basic; ++j) {
239 // if it's not a basic code point
240 if (input.charCodeAt(j) >= 0x80) {
241 error('not-basic');
242 }
243 output.push(input.charCodeAt(j));
244 }
245
246 // Main decoding loop: start just after the last delimiter if any basic code
247 // points were copied; start at the beginning otherwise.
248
249 for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */ ) {
250
251 // `index` is the index of the next character to be consumed.
252 // Decode a generalized variable-length integer into `delta`,
253 // which gets added to `i`. The overflow checking is easier
254 // if we increase `i` as we go, then subtract off its starting
255 // value at the end to obtain `delta`.
256 for (oldi = i, w = 1, k = base; /* no condition */ ; k += base) {
257
258 if (index >= inputLength) {
259 error('invalid-input');
260 }
261
262 digit = basicToDigit(input.charCodeAt(index++));
263
264 if (digit >= base || digit > floor((maxInt - i) / w)) {
265 error('overflow');
266 }
267
268 i += digit * w;
269 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
270
271 if (digit < t) {
272 break;
273 }
274
275 baseMinusT = base - t;
276 if (w > floor(maxInt / baseMinusT)) {
277 error('overflow');
278 }
279
280 w *= baseMinusT;
281
282 }
283
284 out = output.length + 1;
285 bias = adapt(i - oldi, out, oldi == 0);
286
287 // `i` was supposed to wrap around from `out` to `0`,
288 // incrementing `n` each time, so we'll fix that now:
289 if (floor(i / out) > maxInt - n) {
290 error('overflow');
291 }
292
293 n += floor(i / out);
294 i %= out;
295
296 // Insert `n` at position `i` of the output
297 output.splice(i++, 0, n);
298
299 }
300
301 return ucs2encode(output);
302}
303
304/**
305 * Converts a string of Unicode symbols (e.g. a domain name label) to a
306 * Punycode string of ASCII-only symbols.
307 * @memberOf punycode
308 * @param {String} input The string of Unicode symbols.
309 * @returns {String} The resulting Punycode string of ASCII-only symbols.
310 */
311export function encode(input) {
312 var n,
313 delta,
314 handledCPCount,
315 basicLength,
316 bias,
317 j,
318 m,
319 q,
320 k,
321 t,
322 currentValue,
323 output = [],
324 /** `inputLength` will hold the number of code points in `input`. */
325 inputLength,
326 /** Cached calculation results */
327 handledCPCountPlusOne,
328 baseMinusT,
329 qMinusT;
330
331 // Convert the input in UCS-2 to Unicode
332 input = ucs2decode(input);
333
334 // Cache the length
335 inputLength = input.length;
336
337 // Initialize the state
338 n = initialN;
339 delta = 0;
340 bias = initialBias;
341
342 // Handle the basic code points
343 for (j = 0; j < inputLength; ++j) {
344 currentValue = input[j];
345 if (currentValue < 0x80) {
346 output.push(stringFromCharCode(currentValue));
347 }
348 }
349
350 handledCPCount = basicLength = output.length;
351
352 // `handledCPCount` is the number of code points that have been handled;
353 // `basicLength` is the number of basic code points.
354
355 // Finish the basic string - if it is not empty - with a delimiter
356 if (basicLength) {
357 output.push(delimiter);
358 }
359
360 // Main encoding loop:
361 while (handledCPCount < inputLength) {
362
363 // All non-basic code points < n have been handled already. Find the next
364 // larger one:
365 for (m = maxInt, j = 0; j < inputLength; ++j) {
366 currentValue = input[j];
367 if (currentValue >= n && currentValue < m) {
368 m = currentValue;
369 }
370 }
371
372 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
373 // but guard against overflow
374 handledCPCountPlusOne = handledCPCount + 1;
375 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
376 error('overflow');
377 }
378
379 delta += (m - n) * handledCPCountPlusOne;
380 n = m;
381
382 for (j = 0; j < inputLength; ++j) {
383 currentValue = input[j];
384
385 if (currentValue < n && ++delta > maxInt) {
386 error('overflow');
387 }
388
389 if (currentValue == n) {
390 // Represent delta as a generalized variable-length integer
391 for (q = delta, k = base; /* no condition */ ; k += base) {
392 t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
393 if (q < t) {
394 break;
395 }
396 qMinusT = q - t;
397 baseMinusT = base - t;
398 output.push(
399 stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
400 );
401 q = floor(qMinusT / baseMinusT);
402 }
403
404 output.push(stringFromCharCode(digitToBasic(q, 0)));
405 bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
406 delta = 0;
407 ++handledCPCount;
408 }
409 }
410
411 ++delta;
412 ++n;
413
414 }
415 return output.join('');
416}
417
418/**
419 * Converts a Punycode string representing a domain name or an email address
420 * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
421 * it doesn't matter if you call it on a string that has already been
422 * converted to Unicode.
423 * @memberOf punycode
424 * @param {String} input The Punycoded domain name or email address to
425 * convert to Unicode.
426 * @returns {String} The Unicode representation of the given Punycode
427 * string.
428 */
429export function toUnicode(input) {
430 return mapDomain(input, function(string) {
431 return regexPunycode.test(string) ?
432 decode(string.slice(4).toLowerCase()) :
433 string;
434 });
435}
436
437/**
438 * Converts a Unicode string representing a domain name or an email address to
439 * Punycode. Only the non-ASCII parts of the domain name will be converted,
440 * i.e. it doesn't matter if you call it with a domain that's already in
441 * ASCII.
442 * @memberOf punycode
443 * @param {String} input The domain name or email address to convert, as a
444 * Unicode string.
445 * @returns {String} The Punycode representation of the given domain name or
446 * email address.
447 */
448export function toASCII(input) {
449 return mapDomain(input, function(string) {
450 return regexNonASCII.test(string) ?
451 'xn--' + encode(string) :
452 string;
453 });
454}
455export var version = '1.4.1';
456/**
457 * An object of methods to convert from JavaScript's internal character
458 * representation (UCS-2) to Unicode code points, and back.
459 * @see <https://mathiasbynens.be/notes/javascript-encoding>
460 * @memberOf punycode
461 * @type Object
462 */
463
464export var ucs2 = {
465 decode: ucs2decode,
466 encode: ucs2encode
467};
468export default {
469 version: version,
470 ucs2: ucs2,
471 toASCII: toASCII,
472 toUnicode: toUnicode,
473 encode: encode,
474 decode: decode
475}