1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 | #ifndef NAN_STRING_BYTES_H_
|
23 | #define NAN_STRING_BYTES_H_
|
24 |
|
25 |
|
26 |
|
27 | namespace imp {
|
28 |
|
29 | using v8::Local;
|
30 | using v8::Object;
|
31 | using v8::String;
|
32 | using v8::Value;
|
33 |
|
34 |
|
35 |
|
36 |
|
37 | #define base64_encoded_size(size) ((size + 2 - ((size + 2) % 3)) / 3 * 4)
|
38 |
|
39 |
|
40 |
|
41 |
|
42 |
|
43 | static bool contains_non_ascii_slow(const char* buf, size_t len) {
|
44 | for (size_t i = 0; i < len; ++i) {
|
45 | if (buf[i] & 0x80) return true;
|
46 | }
|
47 | return false;
|
48 | }
|
49 |
|
50 |
|
51 | static bool contains_non_ascii(const char* src, size_t len) {
|
52 | if (len < 16) {
|
53 | return contains_non_ascii_slow(src, len);
|
54 | }
|
55 |
|
56 | const unsigned bytes_per_word = sizeof(void*);
|
57 | const unsigned align_mask = bytes_per_word - 1;
|
58 | const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
|
59 |
|
60 | if (unaligned > 0) {
|
61 | const unsigned n = bytes_per_word - unaligned;
|
62 | if (contains_non_ascii_slow(src, n)) return true;
|
63 | src += n;
|
64 | len -= n;
|
65 | }
|
66 |
|
67 |
|
68 | #if defined(__x86_64__) || defined(_WIN64)
|
69 | const uintptr_t mask = 0x8080808080808080ll;
|
70 | #else
|
71 | const uintptr_t mask = 0x80808080l;
|
72 | #endif
|
73 |
|
74 | const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
|
75 |
|
76 | for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
|
77 | if (srcw[i] & mask) return true;
|
78 | }
|
79 |
|
80 | const unsigned remainder = len & align_mask;
|
81 | if (remainder > 0) {
|
82 | const size_t offset = len - remainder;
|
83 | if (contains_non_ascii_slow(src + offset, remainder)) return true;
|
84 | }
|
85 |
|
86 | return false;
|
87 | }
|
88 |
|
89 |
|
90 | static void force_ascii_slow(const char* src, char* dst, size_t len) {
|
91 | for (size_t i = 0; i < len; ++i) {
|
92 | dst[i] = src[i] & 0x7f;
|
93 | }
|
94 | }
|
95 |
|
96 |
|
97 | static void force_ascii(const char* src, char* dst, size_t len) {
|
98 | if (len < 16) {
|
99 | force_ascii_slow(src, dst, len);
|
100 | return;
|
101 | }
|
102 |
|
103 | const unsigned bytes_per_word = sizeof(void*);
|
104 | const unsigned align_mask = bytes_per_word - 1;
|
105 | const unsigned src_unalign = reinterpret_cast<uintptr_t>(src) & align_mask;
|
106 | const unsigned dst_unalign = reinterpret_cast<uintptr_t>(dst) & align_mask;
|
107 |
|
108 | if (src_unalign > 0) {
|
109 | if (src_unalign == dst_unalign) {
|
110 | const unsigned unalign = bytes_per_word - src_unalign;
|
111 | force_ascii_slow(src, dst, unalign);
|
112 | src += unalign;
|
113 | dst += unalign;
|
114 | len -= src_unalign;
|
115 | } else {
|
116 | force_ascii_slow(src, dst, len);
|
117 | return;
|
118 | }
|
119 | }
|
120 |
|
121 | #if defined(__x86_64__) || defined(_WIN64)
|
122 | const uintptr_t mask = ~0x8080808080808080ll;
|
123 | #else
|
124 | const uintptr_t mask = ~0x80808080l;
|
125 | #endif
|
126 |
|
127 | const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
|
128 | uintptr_t* dstw = reinterpret_cast<uintptr_t*>(dst);
|
129 |
|
130 | for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
|
131 | dstw[i] = srcw[i] & mask;
|
132 | }
|
133 |
|
134 | const unsigned remainder = len & align_mask;
|
135 | if (remainder > 0) {
|
136 | const size_t offset = len - remainder;
|
137 | force_ascii_slow(src + offset, dst + offset, remainder);
|
138 | }
|
139 | }
|
140 |
|
141 |
|
142 | static size_t base64_encode(const char* src,
|
143 | size_t slen,
|
144 | char* dst,
|
145 | size_t dlen) {
|
146 |
|
147 | assert(dlen >= base64_encoded_size(slen) &&
|
148 | "not enough space provided for base64 encode");
|
149 |
|
150 | dlen = base64_encoded_size(slen);
|
151 |
|
152 | unsigned a;
|
153 | unsigned b;
|
154 | unsigned c;
|
155 | unsigned i;
|
156 | unsigned k;
|
157 | unsigned n;
|
158 |
|
159 | static const char table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
160 | "abcdefghijklmnopqrstuvwxyz"
|
161 | "0123456789+/";
|
162 |
|
163 | i = 0;
|
164 | k = 0;
|
165 | n = slen / 3 * 3;
|
166 |
|
167 | while (i < n) {
|
168 | a = src[i + 0] & 0xff;
|
169 | b = src[i + 1] & 0xff;
|
170 | c = src[i + 2] & 0xff;
|
171 |
|
172 | dst[k + 0] = table[a >> 2];
|
173 | dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
|
174 | dst[k + 2] = table[((b & 0x0f) << 2) | (c >> 6)];
|
175 | dst[k + 3] = table[c & 0x3f];
|
176 |
|
177 | i += 3;
|
178 | k += 4;
|
179 | }
|
180 |
|
181 | if (n != slen) {
|
182 | switch (slen - n) {
|
183 | case 1:
|
184 | a = src[i + 0] & 0xff;
|
185 | dst[k + 0] = table[a >> 2];
|
186 | dst[k + 1] = table[(a & 3) << 4];
|
187 | dst[k + 2] = '=';
|
188 | dst[k + 3] = '=';
|
189 | break;
|
190 |
|
191 | case 2:
|
192 | a = src[i + 0] & 0xff;
|
193 | b = src[i + 1] & 0xff;
|
194 | dst[k + 0] = table[a >> 2];
|
195 | dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
|
196 | dst[k + 2] = table[(b & 0x0f) << 2];
|
197 | dst[k + 3] = '=';
|
198 | break;
|
199 | }
|
200 | }
|
201 |
|
202 | return dlen;
|
203 | }
|
204 |
|
205 |
|
206 | static size_t hex_encode(const char* src, size_t slen, char* dst, size_t dlen) {
|
207 |
|
208 | assert(dlen >= slen * 2 &&
|
209 | "not enough space provided for hex encode");
|
210 |
|
211 | dlen = slen * 2;
|
212 | for (uint32_t i = 0, k = 0; k < dlen; i += 1, k += 2) {
|
213 | static const char hex[] = "0123456789abcdef";
|
214 | uint8_t val = static_cast<uint8_t>(src[i]);
|
215 | dst[k + 0] = hex[val >> 4];
|
216 | dst[k + 1] = hex[val & 15];
|
217 | }
|
218 |
|
219 | return dlen;
|
220 | }
|
221 |
|
222 |
|
223 |
|
224 | static Local<Value> Encode(const char* buf,
|
225 | size_t buflen,
|
226 | enum Encoding encoding) {
|
227 | assert(buflen <= node::Buffer::kMaxLength);
|
228 | if (!buflen && encoding != BUFFER)
|
229 | return New("").ToLocalChecked();
|
230 |
|
231 | Local<String> val;
|
232 | switch (encoding) {
|
233 | case BUFFER:
|
234 | return CopyBuffer(buf, buflen).ToLocalChecked();
|
235 |
|
236 | case ASCII:
|
237 | if (contains_non_ascii(buf, buflen)) {
|
238 | char* out = new char[buflen];
|
239 | force_ascii(buf, out, buflen);
|
240 | val = New<String>(out, buflen).ToLocalChecked();
|
241 | delete[] out;
|
242 | } else {
|
243 | val = New<String>(buf, buflen).ToLocalChecked();
|
244 | }
|
245 | break;
|
246 |
|
247 | case UTF8:
|
248 | val = New<String>(buf, buflen).ToLocalChecked();
|
249 | break;
|
250 |
|
251 | case BINARY: {
|
252 |
|
253 | const unsigned char *cbuf = reinterpret_cast<const unsigned char*>(buf);
|
254 | uint16_t * twobytebuf = new uint16_t[buflen];
|
255 | for (size_t i = 0; i < buflen; i++) {
|
256 |
|
257 | twobytebuf[i] = cbuf[i];
|
258 | }
|
259 | val = New<String>(twobytebuf, buflen).ToLocalChecked();
|
260 | delete[] twobytebuf;
|
261 | break;
|
262 | }
|
263 |
|
264 | case BASE64: {
|
265 | size_t dlen = base64_encoded_size(buflen);
|
266 | char* dst = new char[dlen];
|
267 |
|
268 | size_t written = base64_encode(buf, buflen, dst, dlen);
|
269 | assert(written == dlen);
|
270 |
|
271 | val = New<String>(dst, dlen).ToLocalChecked();
|
272 | delete[] dst;
|
273 | break;
|
274 | }
|
275 |
|
276 | case UCS2: {
|
277 | const uint16_t* data = reinterpret_cast<const uint16_t*>(buf);
|
278 | val = New<String>(data, buflen / 2).ToLocalChecked();
|
279 | break;
|
280 | }
|
281 |
|
282 | case HEX: {
|
283 | size_t dlen = buflen * 2;
|
284 | char* dst = new char[dlen];
|
285 | size_t written = hex_encode(buf, buflen, dst, dlen);
|
286 | assert(written == dlen);
|
287 |
|
288 | val = New<String>(dst, dlen).ToLocalChecked();
|
289 | delete[] dst;
|
290 | break;
|
291 | }
|
292 |
|
293 | default:
|
294 | assert(0 && "unknown encoding");
|
295 | break;
|
296 | }
|
297 |
|
298 | return val;
|
299 | }
|
300 |
|
301 | #undef base64_encoded_size
|
302 |
|
303 | }
|
304 |
|
305 | #endif
|