UNPKG

12.6 kBJavaScriptView Raw
1'use strict';
2
3/** Highest positive signed 32-bit float value */
4const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
5
6/** Bootstring parameters */
7const base = 36;
8const tMin = 1;
9const tMax = 26;
10const skew = 38;
11const damp = 700;
12const initialBias = 72;
13const initialN = 128; // 0x80
14const delimiter = '-'; // '\x2D'
15
16/** Regular expressions */
17const regexPunycode = /^xn--/;
18const regexNonASCII = /[^\0-\x7E]/; // non-ASCII chars
19const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
20
21/** Error messages */
22const errors = {
23 'overflow': 'Overflow: input needs wider integers to process',
24 'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
25 'invalid-input': 'Invalid input'
26};
27
28/** Convenience shortcuts */
29const baseMinusTMin = base - tMin;
30const floor = Math.floor;
31const stringFromCharCode = String.fromCharCode;
32
33/*--------------------------------------------------------------------------*/
34
35/**
36 * A generic error utility function.
37 * @private
38 * @param {String} type The error type.
39 * @returns {Error} Throws a `RangeError` with the applicable error message.
40 */
41function error(type) {
42 throw new RangeError(errors[type]);
43}
44
45/**
46 * A generic `Array#map` utility function.
47 * @private
48 * @param {Array} array The array to iterate over.
49 * @param {Function} callback The function that gets called for every array
50 * item.
51 * @returns {Array} A new array of values returned by the callback function.
52 */
53function map(array, fn) {
54 const result = [];
55 let length = array.length;
56 while (length--) {
57 result[length] = fn(array[length]);
58 }
59 return result;
60}
61
62/**
63 * A simple `Array#map`-like wrapper to work with domain name strings or email
64 * addresses.
65 * @private
66 * @param {String} domain The domain name or email address.
67 * @param {Function} callback The function that gets called for every
68 * character.
69 * @returns {Array} A new string of characters returned by the callback
70 * function.
71 */
72function mapDomain(string, fn) {
73 const parts = string.split('@');
74 let result = '';
75 if (parts.length > 1) {
76 // In email addresses, only the domain name should be punycoded. Leave
77 // the local part (i.e. everything up to `@`) intact.
78 result = parts[0] + '@';
79 string = parts[1];
80 }
81 // Avoid `split(regex)` for IE8 compatibility. See #17.
82 string = string.replace(regexSeparators, '\x2E');
83 const labels = string.split('.');
84 const encoded = map(labels, fn).join('.');
85 return result + encoded;
86}
87
88/**
89 * Creates an array containing the numeric code points of each Unicode
90 * character in the string. While JavaScript uses UCS-2 internally,
91 * this function will convert a pair of surrogate halves (each of which
92 * UCS-2 exposes as separate characters) into a single code point,
93 * matching UTF-16.
94 * @see `punycode.ucs2.encode`
95 * @see <https://mathiasbynens.be/notes/javascript-encoding>
96 * @memberOf punycode.ucs2
97 * @name decode
98 * @param {String} string The Unicode input string (UCS-2).
99 * @returns {Array} The new array of code points.
100 */
101function ucs2decode(string) {
102 const output = [];
103 let counter = 0;
104 const length = string.length;
105 while (counter < length) {
106 const value = string.charCodeAt(counter++);
107 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
108 // It's a high surrogate, and there is a next character.
109 const extra = string.charCodeAt(counter++);
110 if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
111 output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
112 } else {
113 // It's an unmatched surrogate; only append this code unit, in case the
114 // next code unit is the high surrogate of a surrogate pair.
115 output.push(value);
116 counter--;
117 }
118 } else {
119 output.push(value);
120 }
121 }
122 return output;
123}
124
125/**
126 * Creates a string based on an array of numeric code points.
127 * @see `punycode.ucs2.decode`
128 * @memberOf punycode.ucs2
129 * @name encode
130 * @param {Array} codePoints The array of numeric code points.
131 * @returns {String} The new Unicode string (UCS-2).
132 */
133const ucs2encode = array => String.fromCodePoint(...array);
134
135/**
136 * Converts a basic code point into a digit/integer.
137 * @see `digitToBasic()`
138 * @private
139 * @param {Number} codePoint The basic numeric code point value.
140 * @returns {Number} The numeric value of a basic code point (for use in
141 * representing integers) in the range `0` to `base - 1`, or `base` if
142 * the code point does not represent a value.
143 */
144const basicToDigit = function(codePoint) {
145 if (codePoint - 0x30 < 0x0A) {
146 return codePoint - 0x16;
147 }
148 if (codePoint - 0x41 < 0x1A) {
149 return codePoint - 0x41;
150 }
151 if (codePoint - 0x61 < 0x1A) {
152 return codePoint - 0x61;
153 }
154 return base;
155};
156
157/**
158 * Converts a digit/integer into a basic code point.
159 * @see `basicToDigit()`
160 * @private
161 * @param {Number} digit The numeric value of a basic code point.
162 * @returns {Number} The basic code point whose value (when used for
163 * representing integers) is `digit`, which needs to be in the range
164 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
165 * used; else, the lowercase form is used. The behavior is undefined
166 * if `flag` is non-zero and `digit` has no uppercase form.
167 */
168const digitToBasic = function(digit, flag) {
169 // 0..25 map to ASCII a..z or A..Z
170 // 26..35 map to ASCII 0..9
171 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
172};
173
174/**
175 * Bias adaptation function as per section 3.4 of RFC 3492.
176 * https://tools.ietf.org/html/rfc3492#section-3.4
177 * @private
178 */
179const adapt = function(delta, numPoints, firstTime) {
180 let k = 0;
181 delta = firstTime ? floor(delta / damp) : delta >> 1;
182 delta += floor(delta / numPoints);
183 for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
184 delta = floor(delta / baseMinusTMin);
185 }
186 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
187};
188
189/**
190 * Converts a Punycode string of ASCII-only symbols to a string of Unicode
191 * symbols.
192 * @memberOf punycode
193 * @param {String} input The Punycode string of ASCII-only symbols.
194 * @returns {String} The resulting string of Unicode symbols.
195 */
196const decode = function(input) {
197 // Don't use UCS-2.
198 const output = [];
199 const inputLength = input.length;
200 let i = 0;
201 let n = initialN;
202 let bias = initialBias;
203
204 // Handle the basic code points: let `basic` be the number of input code
205 // points before the last delimiter, or `0` if there is none, then copy
206 // the first basic code points to the output.
207
208 let basic = input.lastIndexOf(delimiter);
209 if (basic < 0) {
210 basic = 0;
211 }
212
213 for (let j = 0; j < basic; ++j) {
214 // if it's not a basic code point
215 if (input.charCodeAt(j) >= 0x80) {
216 error('not-basic');
217 }
218 output.push(input.charCodeAt(j));
219 }
220
221 // Main decoding loop: start just after the last delimiter if any basic code
222 // points were copied; start at the beginning otherwise.
223
224 for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
225
226 // `index` is the index of the next character to be consumed.
227 // Decode a generalized variable-length integer into `delta`,
228 // which gets added to `i`. The overflow checking is easier
229 // if we increase `i` as we go, then subtract off its starting
230 // value at the end to obtain `delta`.
231 let oldi = i;
232 for (let w = 1, k = base; /* no condition */; k += base) {
233
234 if (index >= inputLength) {
235 error('invalid-input');
236 }
237
238 const digit = basicToDigit(input.charCodeAt(index++));
239
240 if (digit >= base || digit > floor((maxInt - i) / w)) {
241 error('overflow');
242 }
243
244 i += digit * w;
245 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
246
247 if (digit < t) {
248 break;
249 }
250
251 const baseMinusT = base - t;
252 if (w > floor(maxInt / baseMinusT)) {
253 error('overflow');
254 }
255
256 w *= baseMinusT;
257
258 }
259
260 const out = output.length + 1;
261 bias = adapt(i - oldi, out, oldi == 0);
262
263 // `i` was supposed to wrap around from `out` to `0`,
264 // incrementing `n` each time, so we'll fix that now:
265 if (floor(i / out) > maxInt - n) {
266 error('overflow');
267 }
268
269 n += floor(i / out);
270 i %= out;
271
272 // Insert `n` at position `i` of the output.
273 output.splice(i++, 0, n);
274
275 }
276
277 return String.fromCodePoint(...output);
278};
279
280/**
281 * Converts a string of Unicode symbols (e.g. a domain name label) to a
282 * Punycode string of ASCII-only symbols.
283 * @memberOf punycode
284 * @param {String} input The string of Unicode symbols.
285 * @returns {String} The resulting Punycode string of ASCII-only symbols.
286 */
287const encode = function(input) {
288 const output = [];
289
290 // Convert the input in UCS-2 to an array of Unicode code points.
291 input = ucs2decode(input);
292
293 // Cache the length.
294 let inputLength = input.length;
295
296 // Initialize the state.
297 let n = initialN;
298 let delta = 0;
299 let bias = initialBias;
300
301 // Handle the basic code points.
302 for (const currentValue of input) {
303 if (currentValue < 0x80) {
304 output.push(stringFromCharCode(currentValue));
305 }
306 }
307
308 let basicLength = output.length;
309 let handledCPCount = basicLength;
310
311 // `handledCPCount` is the number of code points that have been handled;
312 // `basicLength` is the number of basic code points.
313
314 // Finish the basic string with a delimiter unless it's empty.
315 if (basicLength) {
316 output.push(delimiter);
317 }
318
319 // Main encoding loop:
320 while (handledCPCount < inputLength) {
321
322 // All non-basic code points < n have been handled already. Find the next
323 // larger one:
324 let m = maxInt;
325 for (const currentValue of input) {
326 if (currentValue >= n && currentValue < m) {
327 m = currentValue;
328 }
329 }
330
331 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
332 // but guard against overflow.
333 const handledCPCountPlusOne = handledCPCount + 1;
334 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
335 error('overflow');
336 }
337
338 delta += (m - n) * handledCPCountPlusOne;
339 n = m;
340
341 for (const currentValue of input) {
342 if (currentValue < n && ++delta > maxInt) {
343 error('overflow');
344 }
345 if (currentValue == n) {
346 // Represent delta as a generalized variable-length integer.
347 let q = delta;
348 for (let k = base; /* no condition */; k += base) {
349 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
350 if (q < t) {
351 break;
352 }
353 const qMinusT = q - t;
354 const baseMinusT = base - t;
355 output.push(
356 stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
357 );
358 q = floor(qMinusT / baseMinusT);
359 }
360
361 output.push(stringFromCharCode(digitToBasic(q, 0)));
362 bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
363 delta = 0;
364 ++handledCPCount;
365 }
366 }
367
368 ++delta;
369 ++n;
370
371 }
372 return output.join('');
373};
374
375/**
376 * Converts a Punycode string representing a domain name or an email address
377 * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
378 * it doesn't matter if you call it on a string that has already been
379 * converted to Unicode.
380 * @memberOf punycode
381 * @param {String} input The Punycoded domain name or email address to
382 * convert to Unicode.
383 * @returns {String} The Unicode representation of the given Punycode
384 * string.
385 */
386const toUnicode = function(input) {
387 return mapDomain(input, function(string) {
388 return regexPunycode.test(string)
389 ? decode(string.slice(4).toLowerCase())
390 : string;
391 });
392};
393
394/**
395 * Converts a Unicode string representing a domain name or an email address to
396 * Punycode. Only the non-ASCII parts of the domain name will be converted,
397 * i.e. it doesn't matter if you call it with a domain that's already in
398 * ASCII.
399 * @memberOf punycode
400 * @param {String} input The domain name or email address to convert, as a
401 * Unicode string.
402 * @returns {String} The Punycode representation of the given domain name or
403 * email address.
404 */
405const toASCII = function(input) {
406 return mapDomain(input, function(string) {
407 return regexNonASCII.test(string)
408 ? 'xn--' + encode(string)
409 : string;
410 });
411};
412
413/*--------------------------------------------------------------------------*/
414
415/** Define the public API */
416const punycode = {
417 /**
418 * A string representing the current Punycode.js version number.
419 * @memberOf punycode
420 * @type String
421 */
422 'version': '2.1.0',
423 /**
424 * An object of methods to convert from JavaScript's internal character
425 * representation (UCS-2) to Unicode code points, and back.
426 * @see <https://mathiasbynens.be/notes/javascript-encoding>
427 * @memberOf punycode
428 * @type Object
429 */
430 'ucs2': {
431 'decode': ucs2decode,
432 'encode': ucs2encode
433 },
434 'decode': decode,
435 'encode': encode,
436 'toASCII': toASCII,
437 'toUnicode': toUnicode
438};
439
440module.exports = punycode;