UNPKG

5.57 kBJavaScriptView Raw
1'use strict';
2// based on https://github.com/bestiejs/punycode.js/blob/master/punycode.js
3var uncurryThis = require('../internals/function-uncurry-this');
4
5var maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
6var base = 36;
7var tMin = 1;
8var tMax = 26;
9var skew = 38;
10var damp = 700;
11var initialBias = 72;
12var initialN = 128; // 0x80
13var delimiter = '-'; // '\x2D'
14var regexNonASCII = /[^\0-\u007E]/; // non-ASCII chars
15var regexSeparators = /[.\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
16var OVERFLOW_ERROR = 'Overflow: input needs wider integers to process';
17var baseMinusTMin = base - tMin;
18
19var $RangeError = RangeError;
20var exec = uncurryThis(regexSeparators.exec);
21var floor = Math.floor;
22var fromCharCode = String.fromCharCode;
23var charCodeAt = uncurryThis(''.charCodeAt);
24var join = uncurryThis([].join);
25var push = uncurryThis([].push);
26var replace = uncurryThis(''.replace);
27var split = uncurryThis(''.split);
28var toLowerCase = uncurryThis(''.toLowerCase);
29
30/**
31 * Creates an array containing the numeric code points of each Unicode
32 * character in the string. While JavaScript uses UCS-2 internally,
33 * this function will convert a pair of surrogate halves (each of which
34 * UCS-2 exposes as separate characters) into a single code point,
35 * matching UTF-16.
36 */
37var ucs2decode = function (string) {
38 var output = [];
39 var counter = 0;
40 var length = string.length;
41 while (counter < length) {
42 var value = charCodeAt(string, counter++);
43 if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
44 // It's a high surrogate, and there is a next character.
45 var extra = charCodeAt(string, counter++);
46 if ((extra & 0xFC00) === 0xDC00) { // Low surrogate.
47 push(output, ((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
48 } else {
49 // It's an unmatched surrogate; only append this code unit, in case the
50 // next code unit is the high surrogate of a surrogate pair.
51 push(output, value);
52 counter--;
53 }
54 } else {
55 push(output, value);
56 }
57 }
58 return output;
59};
60
61/**
62 * Converts a digit/integer into a basic code point.
63 */
64var digitToBasic = function (digit) {
65 // 0..25 map to ASCII a..z or A..Z
66 // 26..35 map to ASCII 0..9
67 return digit + 22 + 75 * (digit < 26);
68};
69
70/**
71 * Bias adaptation function as per section 3.4 of RFC 3492.
72 * https://tools.ietf.org/html/rfc3492#section-3.4
73 */
74var adapt = function (delta, numPoints, firstTime) {
75 var k = 0;
76 delta = firstTime ? floor(delta / damp) : delta >> 1;
77 delta += floor(delta / numPoints);
78 while (delta > baseMinusTMin * tMax >> 1) {
79 delta = floor(delta / baseMinusTMin);
80 k += base;
81 }
82 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
83};
84
85/**
86 * Converts a string of Unicode symbols (e.g. a domain name label) to a
87 * Punycode string of ASCII-only symbols.
88 */
89var encode = function (input) {
90 var output = [];
91
92 // Convert the input in UCS-2 to an array of Unicode code points.
93 input = ucs2decode(input);
94
95 // Cache the length.
96 var inputLength = input.length;
97
98 // Initialize the state.
99 var n = initialN;
100 var delta = 0;
101 var bias = initialBias;
102 var i, currentValue;
103
104 // Handle the basic code points.
105 for (i = 0; i < input.length; i++) {
106 currentValue = input[i];
107 if (currentValue < 0x80) {
108 push(output, fromCharCode(currentValue));
109 }
110 }
111
112 var basicLength = output.length; // number of basic code points.
113 var handledCPCount = basicLength; // number of code points that have been handled;
114
115 // Finish the basic string with a delimiter unless it's empty.
116 if (basicLength) {
117 push(output, delimiter);
118 }
119
120 // Main encoding loop:
121 while (handledCPCount < inputLength) {
122 // All non-basic code points < n have been handled already. Find the next larger one:
123 var m = maxInt;
124 for (i = 0; i < input.length; i++) {
125 currentValue = input[i];
126 if (currentValue >= n && currentValue < m) {
127 m = currentValue;
128 }
129 }
130
131 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>, but guard against overflow.
132 var handledCPCountPlusOne = handledCPCount + 1;
133 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
134 throw new $RangeError(OVERFLOW_ERROR);
135 }
136
137 delta += (m - n) * handledCPCountPlusOne;
138 n = m;
139
140 for (i = 0; i < input.length; i++) {
141 currentValue = input[i];
142 if (currentValue < n && ++delta > maxInt) {
143 throw new $RangeError(OVERFLOW_ERROR);
144 }
145 if (currentValue === n) {
146 // Represent delta as a generalized variable-length integer.
147 var q = delta;
148 var k = base;
149 while (true) {
150 var t = k <= bias ? tMin : k >= bias + tMax ? tMax : k - bias;
151 if (q < t) break;
152 var qMinusT = q - t;
153 var baseMinusT = base - t;
154 push(output, fromCharCode(digitToBasic(t + qMinusT % baseMinusT)));
155 q = floor(qMinusT / baseMinusT);
156 k += base;
157 }
158
159 push(output, fromCharCode(digitToBasic(q)));
160 bias = adapt(delta, handledCPCountPlusOne, handledCPCount === basicLength);
161 delta = 0;
162 handledCPCount++;
163 }
164 }
165
166 delta++;
167 n++;
168 }
169 return join(output, '');
170};
171
172module.exports = function (input) {
173 var encoded = [];
174 var labels = split(replace(toLowerCase(input), regexSeparators, '\u002E'), '.');
175 var i, label;
176 for (i = 0; i < labels.length; i++) {
177 label = labels[i];
178 push(encoded, exec(regexNonASCII, label) ? 'xn--' + encode(label) : label);
179 }
180 return join(encoded, '.');
181};