UNPKG

8.1 kBtext/x-cView Raw
1// Copyright Joyent, Inc. and other Node contributors.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a
4// copy of this software and associated documentation files (the
5// "Software"), to deal in the Software without restriction, including
6// without limitation the rights to use, copy, modify, merge, publish,
7// distribute, sublicense, and/or sell copies of the Software, and to permit
8// persons to whom the Software is furnished to do so, subject to the
9// following conditions:
10//
11// The above copyright notice and this permission notice shall be included
12// in all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20// USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22#ifndef NAN_STRING_BYTES_H_
23#define NAN_STRING_BYTES_H_
24
25// Decodes a v8::Local<v8::String> or Buffer to a raw char*
26
27namespace imp {
28
29using v8::Local;
30using v8::Object;
31using v8::String;
32using v8::Value;
33
34
35//// Base 64 ////
36
37#define base64_encoded_size(size) ((size + 2 - ((size + 2) % 3)) / 3 * 4)
38
39
40
41//// HEX ////
42
43static bool contains_non_ascii_slow(const char* buf, size_t len) {
44 for (size_t i = 0; i < len; ++i) {
45 if (buf[i] & 0x80) return true;
46 }
47 return false;
48}
49
50
51static bool contains_non_ascii(const char* src, size_t len) {
52 if (len < 16) {
53 return contains_non_ascii_slow(src, len);
54 }
55
56 const unsigned bytes_per_word = sizeof(void*);
57 const unsigned align_mask = bytes_per_word - 1;
58 const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
59
60 if (unaligned > 0) {
61 const unsigned n = bytes_per_word - unaligned;
62 if (contains_non_ascii_slow(src, n)) return true;
63 src += n;
64 len -= n;
65 }
66
67
68#if defined(__x86_64__) || defined(_WIN64)
69 const uintptr_t mask = 0x8080808080808080ll;
70#else
71 const uintptr_t mask = 0x80808080l;
72#endif
73
74 const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
75
76 for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
77 if (srcw[i] & mask) return true;
78 }
79
80 const unsigned remainder = len & align_mask;
81 if (remainder > 0) {
82 const size_t offset = len - remainder;
83 if (contains_non_ascii_slow(src + offset, remainder)) return true;
84 }
85
86 return false;
87}
88
89
90static void force_ascii_slow(const char* src, char* dst, size_t len) {
91 for (size_t i = 0; i < len; ++i) {
92 dst[i] = src[i] & 0x7f;
93 }
94}
95
96
97static void force_ascii(const char* src, char* dst, size_t len) {
98 if (len < 16) {
99 force_ascii_slow(src, dst, len);
100 return;
101 }
102
103 const unsigned bytes_per_word = sizeof(void*);
104 const unsigned align_mask = bytes_per_word - 1;
105 const unsigned src_unalign = reinterpret_cast<uintptr_t>(src) & align_mask;
106 const unsigned dst_unalign = reinterpret_cast<uintptr_t>(dst) & align_mask;
107
108 if (src_unalign > 0) {
109 if (src_unalign == dst_unalign) {
110 const unsigned unalign = bytes_per_word - src_unalign;
111 force_ascii_slow(src, dst, unalign);
112 src += unalign;
113 dst += unalign;
114 len -= src_unalign;
115 } else {
116 force_ascii_slow(src, dst, len);
117 return;
118 }
119 }
120
121#if defined(__x86_64__) || defined(_WIN64)
122 const uintptr_t mask = ~0x8080808080808080ll;
123#else
124 const uintptr_t mask = ~0x80808080l;
125#endif
126
127 const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
128 uintptr_t* dstw = reinterpret_cast<uintptr_t*>(dst);
129
130 for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
131 dstw[i] = srcw[i] & mask;
132 }
133
134 const unsigned remainder = len & align_mask;
135 if (remainder > 0) {
136 const size_t offset = len - remainder;
137 force_ascii_slow(src + offset, dst + offset, remainder);
138 }
139}
140
141
142static size_t base64_encode(const char* src,
143 size_t slen,
144 char* dst,
145 size_t dlen) {
146 // We know how much we'll write, just make sure that there's space.
147 assert(dlen >= base64_encoded_size(slen) &&
148 "not enough space provided for base64 encode");
149
150 dlen = base64_encoded_size(slen);
151
152 unsigned a;
153 unsigned b;
154 unsigned c;
155 unsigned i;
156 unsigned k;
157 unsigned n;
158
159 static const char table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
160 "abcdefghijklmnopqrstuvwxyz"
161 "0123456789+/";
162
163 i = 0;
164 k = 0;
165 n = slen / 3 * 3;
166
167 while (i < n) {
168 a = src[i + 0] & 0xff;
169 b = src[i + 1] & 0xff;
170 c = src[i + 2] & 0xff;
171
172 dst[k + 0] = table[a >> 2];
173 dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
174 dst[k + 2] = table[((b & 0x0f) << 2) | (c >> 6)];
175 dst[k + 3] = table[c & 0x3f];
176
177 i += 3;
178 k += 4;
179 }
180
181 if (n != slen) {
182 switch (slen - n) {
183 case 1:
184 a = src[i + 0] & 0xff;
185 dst[k + 0] = table[a >> 2];
186 dst[k + 1] = table[(a & 3) << 4];
187 dst[k + 2] = '=';
188 dst[k + 3] = '=';
189 break;
190
191 case 2:
192 a = src[i + 0] & 0xff;
193 b = src[i + 1] & 0xff;
194 dst[k + 0] = table[a >> 2];
195 dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
196 dst[k + 2] = table[(b & 0x0f) << 2];
197 dst[k + 3] = '=';
198 break;
199 }
200 }
201
202 return dlen;
203}
204
205
206static size_t hex_encode(const char* src, size_t slen, char* dst, size_t dlen) {
207 // We know how much we'll write, just make sure that there's space.
208 assert(dlen >= slen * 2 &&
209 "not enough space provided for hex encode");
210
211 dlen = slen * 2;
212 for (uint32_t i = 0, k = 0; k < dlen; i += 1, k += 2) {
213 static const char hex[] = "0123456789abcdef";
214 uint8_t val = static_cast<uint8_t>(src[i]);
215 dst[k + 0] = hex[val >> 4];
216 dst[k + 1] = hex[val & 15];
217 }
218
219 return dlen;
220}
221
222
223
224static Local<Value> Encode(const char* buf,
225 size_t buflen,
226 enum Encoding encoding) {
227 assert(buflen <= node::Buffer::kMaxLength);
228 if (!buflen && encoding != BUFFER)
229 return New("").ToLocalChecked();
230
231 Local<String> val;
232 switch (encoding) {
233 case BUFFER:
234 return CopyBuffer(buf, buflen).ToLocalChecked();
235
236 case ASCII:
237 if (contains_non_ascii(buf, buflen)) {
238 char* out = new char[buflen];
239 force_ascii(buf, out, buflen);
240 val = New<String>(out, buflen).ToLocalChecked();
241 delete[] out;
242 } else {
243 val = New<String>(buf, buflen).ToLocalChecked();
244 }
245 break;
246
247 case UTF8:
248 val = New<String>(buf, buflen).ToLocalChecked();
249 break;
250
251 case BINARY: {
252 // TODO(isaacs) use ExternalTwoByteString?
253 const unsigned char *cbuf = reinterpret_cast<const unsigned char*>(buf);
254 uint16_t * twobytebuf = new uint16_t[buflen];
255 for (size_t i = 0; i < buflen; i++) {
256 // XXX is the following line platform independent?
257 twobytebuf[i] = cbuf[i];
258 }
259 val = New<String>(twobytebuf, buflen).ToLocalChecked();
260 delete[] twobytebuf;
261 break;
262 }
263
264 case BASE64: {
265 size_t dlen = base64_encoded_size(buflen);
266 char* dst = new char[dlen];
267
268 size_t written = base64_encode(buf, buflen, dst, dlen);
269 assert(written == dlen);
270
271 val = New<String>(dst, dlen).ToLocalChecked();
272 delete[] dst;
273 break;
274 }
275
276 case UCS2: {
277 const uint16_t* data = reinterpret_cast<const uint16_t*>(buf);
278 val = New<String>(data, buflen / 2).ToLocalChecked();
279 break;
280 }
281
282 case HEX: {
283 size_t dlen = buflen * 2;
284 char* dst = new char[dlen];
285 size_t written = hex_encode(buf, buflen, dst, dlen);
286 assert(written == dlen);
287
288 val = New<String>(dst, dlen).ToLocalChecked();
289 delete[] dst;
290 break;
291 }
292
293 default:
294 assert(0 && "unknown encoding");
295 break;
296 }
297
298 return val;
299}
300
301#undef base64_encoded_size
302
303} // end of namespace imp
304
305#endif // NAN_STRING_BYTES_H_