1 | /*
|
2 | Copyright (c) 2003-2013, Troy D. Hanson http://troydhanson.github.com/uthash/
|
3 | All rights reserved.
|
4 |
|
5 | Redistribution and use in source and binary forms, with or without
|
6 | modification, are permitted provided that the following conditions are met:
|
7 |
|
8 | * Redistributions of source code must retain the above copyright
|
9 | notice, this list of conditions and the following disclaimer.
|
10 |
|
11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
12 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
13 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
14 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
15 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
16 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
17 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
18 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
19 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
22 | */
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 |
|
29 |
|
30 |
|
31 | /* These macros use decltype or the earlier __typeof GNU extension.
|
32 | As decltype is only available in newer compilers (VS2010 or gcc 4.3+
|
33 | when compiling c++ source) this code uses whatever method is needed
|
34 | or, for VS2008 where neither is available, uses casting workarounds. */
|
35 |
|
36 |
|
37 |
|
38 |
|
39 |
|
40 |
|
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 |
|
47 |
|
48 | do { \
|
49 | char **_da_dst = (char**)(&(dst)); \
|
50 | *_da_dst = (char*)(src); \
|
51 | } while(0)
|
52 |
|
53 |
|
54 | do { \
|
55 | (dst) = DECLTYPE(dst)(src); \
|
56 | } while(0)
|
57 |
|
58 |
|
59 | /* a number of the hash function use uint32_t which isn't defined on win32 */
|
60 |
|
61 | typedef unsigned int uint32_t;
|
62 | typedef unsigned char uint8_t;
|
63 |
|
64 |
|
65 |
|
66 |
|
67 |
|
68 |
|
69 |
|
70 |
|
71 |
|
72 |
|
73 |
|
74 |
|
75 |
|
76 |
|
77 |
|
78 |
|
79 |
|
80 |
|
81 |
|
82 |
|
83 |
|
84 |
|
85 |
|
86 | /* initial number of buckets */
|
87 |
|
88 |
|
89 |
|
90 |
|
91 | /* calculate the element whose hash handle address is hhe */
|
92 |
|
93 |
|
94 |
|
95 | do { \
|
96 | unsigned _hf_bkt,_hf_hashv; \
|
97 | out=NULL; \
|
98 | if (head) { \
|
99 | HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
|
100 | if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
|
101 | HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
|
102 | keyptr,keylen,out); \
|
103 | } \
|
104 | } \
|
105 | } while (0)
|
106 |
|
107 |
|
108 |
|
109 |
|
110 |
|
111 | do { \
|
112 | (tbl)->bloom_nbits = HASH_BLOOM; \
|
113 | (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
|
114 | if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
|
115 | memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
|
116 | (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
|
117 | } while (0)
|
118 |
|
119 |
|
120 | do { \
|
121 | uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
|
122 | } while (0)
|
123 |
|
124 |
|
125 |
|
126 |
|
127 |
|
128 | HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
|
129 |
|
130 |
|
131 | HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
|
132 |
|
133 |
|
134 |
|
135 |
|
136 |
|
137 |
|
138 |
|
139 |
|
140 |
|
141 |
|
142 | do { \
|
143 | (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
|
144 | sizeof(UT_hash_table)); \
|
145 | if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
|
146 | memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
|
147 | (head)->hh.tbl->tail = &((head)->hh); \
|
148 | (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
|
149 | (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
|
150 | (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
|
151 | (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
|
152 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
|
153 | if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
|
154 | memset((head)->hh.tbl->buckets, 0, \
|
155 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
|
156 | HASH_BLOOM_MAKE((head)->hh.tbl); \
|
157 | (head)->hh.tbl->signature = HASH_SIGNATURE; \
|
158 | } while(0)
|
159 |
|
160 |
|
161 | HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
|
162 |
|
163 |
|
164 | do { \
|
165 | replaced=NULL; \
|
166 | HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \
|
167 | if (replaced!=NULL) { \
|
168 | HASH_DELETE(hh,head,replaced); \
|
169 | }; \
|
170 | HASH_ADD(hh,head,fieldname,keylen_in,add); \
|
171 | } while(0)
|
172 |
|
173 |
|
174 | do { \
|
175 | unsigned _ha_bkt; \
|
176 | (add)->hh.next = NULL; \
|
177 | (add)->hh.key = (char*)keyptr; \
|
178 | (add)->hh.keylen = (unsigned)keylen_in; \
|
179 | if (!(head)) { \
|
180 | head = (add); \
|
181 | (head)->hh.prev = NULL; \
|
182 | HASH_MAKE_TABLE(hh,head); \
|
183 | } else { \
|
184 | (head)->hh.tbl->tail->next = (add); \
|
185 | (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
|
186 | (head)->hh.tbl->tail = &((add)->hh); \
|
187 | } \
|
188 | (head)->hh.tbl->num_items++; \
|
189 | (add)->hh.tbl = (head)->hh.tbl; \
|
190 | HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
|
191 | (add)->hh.hashv, _ha_bkt); \
|
192 | HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
|
193 | HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
|
194 | HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
|
195 | HASH_FSCK(hh,head); \
|
196 | } while(0)
|
197 |
|
198 |
|
199 | do { \
|
200 | bkt = ((hashv) & ((num_bkts) - 1)); \
|
201 | } while(0)
|
202 |
|
203 | /* delete "delptr" from the hash table.
|
204 | * "the usual" patch-up process for the app-order doubly-linked-list.
|
205 | * The use of _hd_hh_del below deserves special explanation.
|
206 | * These used to be expressed using (delptr) but that led to a bug
|
207 | * if someone used the same symbol for the head and deletee, like
|
208 | * HASH_DELETE(hh,users,users);
|
209 | * We want that to work, but by changing the head (users) below
|
210 | * we were forfeiting our ability to further refer to the deletee (users)
|
211 | * in the patch-up process. Solution: use scratch space to
|
212 | * copy the deletee pointer, then the latter references are via that
|
213 | * scratch pointer rather than through the repointed (users) symbol.
|
214 | */
|
215 |
|
216 | do { \
|
217 | unsigned _hd_bkt; \
|
218 | struct UT_hash_handle *_hd_hh_del; \
|
219 | if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
|
220 | uthash_free((head)->hh.tbl->buckets, \
|
221 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
|
222 | HASH_BLOOM_FREE((head)->hh.tbl); \
|
223 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
|
224 | head = NULL; \
|
225 | } else { \
|
226 | _hd_hh_del = &((delptr)->hh); \
|
227 | if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
|
228 | (head)->hh.tbl->tail = \
|
229 | (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
|
230 | (head)->hh.tbl->hho); \
|
231 | } \
|
232 | if ((delptr)->hh.prev) { \
|
233 | ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
|
234 | (head)->hh.tbl->hho))->next = (delptr)->hh.next; \
|
235 | } else { \
|
236 | DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
|
237 | } \
|
238 | if (_hd_hh_del->next) { \
|
239 | ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \
|
240 | (head)->hh.tbl->hho))->prev = \
|
241 | _hd_hh_del->prev; \
|
242 | } \
|
243 | HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
|
244 | HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
|
245 | (head)->hh.tbl->num_items--; \
|
246 | } \
|
247 | HASH_FSCK(hh,head); \
|
248 | } while (0)
|
249 |
|
250 |
|
251 | /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
|
252 |
|
253 | HASH_FIND(hh,head,findstr,strlen(findstr),out)
|
254 |
|
255 | HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
|
256 |
|
257 | HASH_REPLACE(hh,head,strfield,strlen(add->strfield),add,replaced)
|
258 |
|
259 | HASH_FIND(hh,head,findint,sizeof(int),out)
|
260 |
|
261 | HASH_ADD(hh,head,intfield,sizeof(int),add)
|
262 |
|
263 | HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
|
264 |
|
265 | HASH_FIND(hh,head,findptr,sizeof(void *),out)
|
266 |
|
267 | HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
|
268 |
|
269 | HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
|
270 |
|
271 | HASH_DELETE(hh,head,delptr)
|
272 |
|
273 | /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
|
274 | * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
|
275 | */
|
276 |
|
277 |
|
278 |
|
279 | do { \
|
280 | unsigned _bkt_i; \
|
281 | unsigned _count, _bkt_count; \
|
282 | char *_prev; \
|
283 | struct UT_hash_handle *_thh; \
|
284 | if (head) { \
|
285 | _count = 0; \
|
286 | for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
|
287 | _bkt_count = 0; \
|
288 | _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
|
289 | _prev = NULL; \
|
290 | while (_thh) { \
|
291 | if (_prev != (char*)(_thh->hh_prev)) { \
|
292 | HASH_OOPS("invalid hh_prev %p, actual %p\n", \
|
293 | _thh->hh_prev, _prev ); \
|
294 | } \
|
295 | _bkt_count++; \
|
296 | _prev = (char*)(_thh); \
|
297 | _thh = _thh->hh_next; \
|
298 | } \
|
299 | _count += _bkt_count; \
|
300 | if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
|
301 | HASH_OOPS("invalid bucket count %d, actual %d\n", \
|
302 | (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
|
303 | } \
|
304 | } \
|
305 | if (_count != (head)->hh.tbl->num_items) { \
|
306 | HASH_OOPS("invalid hh item count %d, actual %d\n", \
|
307 | (head)->hh.tbl->num_items, _count ); \
|
308 | } \
|
309 | /* traverse hh in app order; check next/prev integrity, count */ \
|
310 | _count = 0; \
|
311 | _prev = NULL; \
|
312 | _thh = &(head)->hh; \
|
313 | while (_thh) { \
|
314 | _count++; \
|
315 | if (_prev !=(char*)(_thh->prev)) { \
|
316 | HASH_OOPS("invalid prev %p, actual %p\n", \
|
317 | _thh->prev, _prev ); \
|
318 | } \
|
319 | _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
|
320 | _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \
|
321 | (head)->hh.tbl->hho) : NULL ); \
|
322 | } \
|
323 | if (_count != (head)->hh.tbl->num_items) { \
|
324 | HASH_OOPS("invalid app item count %d, actual %d\n", \
|
325 | (head)->hh.tbl->num_items, _count ); \
|
326 | } \
|
327 | } \
|
328 | } while (0)
|
329 |
|
330 |
|
331 |
|
332 |
|
333 | /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
|
334 | * the descriptor to which this macro is defined for tuning the hash function.
|
335 | * The app can #include <unistd.h> to get the prototype for write(2). */
|
336 |
|
337 |
|
338 | do { \
|
339 | unsigned _klen = fieldlen; \
|
340 | write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
|
341 | write(HASH_EMIT_KEYS, keyptr, fieldlen); \
|
342 | } while (0)
|
343 |
|
344 |
|
345 |
|
346 |
|
347 | /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
|
348 |
|
349 |
|
350 |
|
351 |
|
352 |
|
353 |
|
354 | /* The Bernstein hash function, used in Perl prior to v5.6 */
|
355 |
|
356 | do { \
|
357 | unsigned _hb_keylen=keylen; \
|
358 | char *_hb_key=(char*)(key); \
|
359 | (hashv) = 0; \
|
360 | while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \
|
361 | bkt = (hashv) & (num_bkts-1); \
|
362 | } while (0)
|
363 |
|
364 |
|
365 | /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
|
366 | * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
|
367 |
|
368 | do { \
|
369 | unsigned _sx_i; \
|
370 | char *_hs_key=(char*)(key); \
|
371 | hashv = 0; \
|
372 | for(_sx_i=0; _sx_i < keylen; _sx_i++) \
|
373 | hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
|
374 | bkt = hashv & (num_bkts-1); \
|
375 | } while (0)
|
376 |
|
377 |
|
378 | do { \
|
379 | unsigned _fn_i; \
|
380 | char *_hf_key=(char*)(key); \
|
381 | hashv = 2166136261UL; \
|
382 | for(_fn_i=0; _fn_i < keylen; _fn_i++) \
|
383 | hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
|
384 | bkt = hashv & (num_bkts-1); \
|
385 | } while(0)
|
386 |
|
387 |
|
388 | do { \
|
389 | unsigned _ho_i; \
|
390 | char *_ho_key=(char*)(key); \
|
391 | hashv = 0; \
|
392 | for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
|
393 | hashv += _ho_key[_ho_i]; \
|
394 | hashv += (hashv << 10); \
|
395 | hashv ^= (hashv >> 6); \
|
396 | } \
|
397 | hashv += (hashv << 3); \
|
398 | hashv ^= (hashv >> 11); \
|
399 | hashv += (hashv << 15); \
|
400 | bkt = hashv & (num_bkts-1); \
|
401 | } while(0)
|
402 |
|
403 |
|
404 | do { \
|
405 | a -= b; a -= c; a ^= ( c >> 13 ); \
|
406 | b -= c; b -= a; b ^= ( a << 8 ); \
|
407 | c -= a; c -= b; c ^= ( b >> 13 ); \
|
408 | a -= b; a -= c; a ^= ( c >> 12 ); \
|
409 | b -= c; b -= a; b ^= ( a << 16 ); \
|
410 | c -= a; c -= b; c ^= ( b >> 5 ); \
|
411 | a -= b; a -= c; a ^= ( c >> 3 ); \
|
412 | b -= c; b -= a; b ^= ( a << 10 ); \
|
413 | c -= a; c -= b; c ^= ( b >> 15 ); \
|
414 | } while (0)
|
415 |
|
416 |
|
417 | do { \
|
418 | unsigned _hj_i,_hj_j,_hj_k; \
|
419 | unsigned char *_hj_key=(unsigned char*)(key); \
|
420 | hashv = 0xfeedbeef; \
|
421 | _hj_i = _hj_j = 0x9e3779b9; \
|
422 | _hj_k = (unsigned)keylen; \
|
423 | while (_hj_k >= 12) { \
|
424 | _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \
|
425 | + ( (unsigned)_hj_key[2] << 16 ) \
|
426 | + ( (unsigned)_hj_key[3] << 24 ) ); \
|
427 | _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \
|
428 | + ( (unsigned)_hj_key[6] << 16 ) \
|
429 | + ( (unsigned)_hj_key[7] << 24 ) ); \
|
430 | hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \
|
431 | + ( (unsigned)_hj_key[10] << 16 ) \
|
432 | + ( (unsigned)_hj_key[11] << 24 ) ); \
|
433 | \
|
434 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
|
435 | \
|
436 | _hj_key += 12; \
|
437 | _hj_k -= 12; \
|
438 | } \
|
439 | hashv += keylen; \
|
440 | switch ( _hj_k ) { \
|
441 | case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \
|
442 | case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \
|
443 | case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \
|
444 | case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \
|
445 | case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \
|
446 | case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \
|
447 | case 5: _hj_j += _hj_key[4]; \
|
448 | case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \
|
449 | case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \
|
450 | case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \
|
451 | case 1: _hj_i += _hj_key[0]; \
|
452 | } \
|
453 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
|
454 | bkt = hashv & (num_bkts-1); \
|
455 | } while(0)
|
456 |
|
457 | /* The Paul Hsieh hash function */
|
458 |
|
459 |
|
460 | || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
|
461 |
|
462 |
|
463 |
|
464 |
|
465 |
|
466 | +(uint32_t)(((const uint8_t *)(d))[0]) )
|
467 |
|
468 |
|
469 | do { \
|
470 | unsigned char *_sfh_key=(unsigned char*)(key); \
|
471 | uint32_t _sfh_tmp, _sfh_len = keylen; \
|
472 | \
|
473 | int _sfh_rem = _sfh_len & 3; \
|
474 | _sfh_len >>= 2; \
|
475 | hashv = 0xcafebabe; \
|
476 | \
|
477 | /* Main loop */ \
|
478 | for (;_sfh_len > 0; _sfh_len--) { \
|
479 | hashv += get16bits (_sfh_key); \
|
480 | _sfh_tmp = (uint32_t)(get16bits (_sfh_key+2)) << 11 ^ hashv; \
|
481 | hashv = (hashv << 16) ^ _sfh_tmp; \
|
482 | _sfh_key += 2*sizeof (uint16_t); \
|
483 | hashv += hashv >> 11; \
|
484 | } \
|
485 | \
|
486 | /* Handle end cases */ \
|
487 | switch (_sfh_rem) { \
|
488 | case 3: hashv += get16bits (_sfh_key); \
|
489 | hashv ^= hashv << 16; \
|
490 | hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)] << 18); \
|
491 | hashv += hashv >> 11; \
|
492 | break; \
|
493 | case 2: hashv += get16bits (_sfh_key); \
|
494 | hashv ^= hashv << 11; \
|
495 | hashv += hashv >> 17; \
|
496 | break; \
|
497 | case 1: hashv += *_sfh_key; \
|
498 | hashv ^= hashv << 10; \
|
499 | hashv += hashv >> 1; \
|
500 | } \
|
501 | \
|
502 | /* Force "avalanching" of final 127 bits */ \
|
503 | hashv ^= hashv << 3; \
|
504 | hashv += hashv >> 5; \
|
505 | hashv ^= hashv << 4; \
|
506 | hashv += hashv >> 17; \
|
507 | hashv ^= hashv << 25; \
|
508 | hashv += hashv >> 6; \
|
509 | bkt = hashv & (num_bkts-1); \
|
510 | } while(0)
|
511 |
|
512 |
|
513 | /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
|
514 | * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
|
515 | * MurmurHash uses the faster approach only on CPU's where we know it's safe.
|
516 | *
|
517 | * Note the preprocessor built-in defines can be emitted using:
|
518 | *
|
519 | * gcc -m64 -dM -E - < /dev/null (on gcc)
|
520 | * cc -## a.c (where a.c is a simple test file) (Sun Studio)
|
521 | */
|
522 |
|
523 |
|
524 |
|
525 |
|
526 |
|
527 |
|
528 |
|
529 |
|
530 |
|
531 |
|
532 |
|
533 |
|
534 |
|
535 |
|
536 |
|
537 |
|
538 |
|
539 |
|
540 | (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
|
541 | (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
|
542 | MUR_ONE_THREE(p))))
|
543 |
|
544 |
|
545 |
|
546 | do { \
|
547 | _h ^= _h >> 16; \
|
548 | _h *= 0x85ebca6b; \
|
549 | _h ^= _h >> 13; \
|
550 | _h *= 0xc2b2ae35l; \
|
551 | _h ^= _h >> 16; \
|
552 | } while(0)
|
553 |
|
554 |
|
555 | do { \
|
556 | const uint8_t *_mur_data = (const uint8_t*)(key); \
|
557 | const int _mur_nblocks = (keylen) / 4; \
|
558 | uint32_t _mur_h1 = 0xf88D5353; \
|
559 | uint32_t _mur_c1 = 0xcc9e2d51; \
|
560 | uint32_t _mur_c2 = 0x1b873593; \
|
561 | uint32_t _mur_k1 = 0; \
|
562 | const uint8_t *_mur_tail; \
|
563 | const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \
|
564 | int _mur_i; \
|
565 | for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \
|
566 | _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
|
567 | _mur_k1 *= _mur_c1; \
|
568 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \
|
569 | _mur_k1 *= _mur_c2; \
|
570 | \
|
571 | _mur_h1 ^= _mur_k1; \
|
572 | _mur_h1 = MUR_ROTL32(_mur_h1,13); \
|
573 | _mur_h1 = _mur_h1*5+0xe6546b64; \
|
574 | } \
|
575 | _mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \
|
576 | _mur_k1=0; \
|
577 | switch((keylen) & 3) { \
|
578 | case 3: _mur_k1 ^= _mur_tail[2] << 16; \
|
579 | case 2: _mur_k1 ^= _mur_tail[1] << 8; \
|
580 | case 1: _mur_k1 ^= _mur_tail[0]; \
|
581 | _mur_k1 *= _mur_c1; \
|
582 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \
|
583 | _mur_k1 *= _mur_c2; \
|
584 | _mur_h1 ^= _mur_k1; \
|
585 | } \
|
586 | _mur_h1 ^= (keylen); \
|
587 | MUR_FMIX(_mur_h1); \
|
588 | hashv = _mur_h1; \
|
589 | bkt = hashv & (num_bkts-1); \
|
590 | } while(0)
|
591 |
|
592 |
|
593 | /* key comparison function; return 0 if keys equal */
|
594 |
|
595 |
|
596 | /* iterate over items in a known bucket to find desired item */
|
597 |
|
598 | do { \
|
599 | if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
|
600 | else out=NULL; \
|
601 | while (out) { \
|
602 | if ((out)->hh.keylen == keylen_in) { \
|
603 | if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \
|
604 | } \
|
605 | if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \
|
606 | else out = NULL; \
|
607 | } \
|
608 | } while(0)
|
609 |
|
610 | /* add an item to a bucket */
|
611 |
|
612 | do { \
|
613 | head.count++; \
|
614 | (addhh)->hh_next = head.hh_head; \
|
615 | (addhh)->hh_prev = NULL; \
|
616 | if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
|
617 | (head).hh_head=addhh; \
|
618 | if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
|
619 | && (addhh)->tbl->noexpand != 1) { \
|
620 | HASH_EXPAND_BUCKETS((addhh)->tbl); \
|
621 | } \
|
622 | } while(0)
|
623 |
|
624 | /* remove an item from a given bucket */
|
625 |
|
626 | (head).count--; \
|
627 | if ((head).hh_head == hh_del) { \
|
628 | (head).hh_head = hh_del->hh_next; \
|
629 | } \
|
630 | if (hh_del->hh_prev) { \
|
631 | hh_del->hh_prev->hh_next = hh_del->hh_next; \
|
632 | } \
|
633 | if (hh_del->hh_next) { \
|
634 | hh_del->hh_next->hh_prev = hh_del->hh_prev; \
|
635 | }
|
636 |
|
637 | /* Bucket expansion has the effect of doubling the number of buckets
|
638 | * and redistributing the items into the new buckets. Ideally the
|
639 | * items will distribute more or less evenly into the new buckets
|
640 | * (the extent to which this is true is a measure of the quality of
|
641 | * the hash function as it applies to the key domain).
|
642 | *
|
643 | * With the items distributed into more buckets, the chain length
|
644 | * (item count) in each bucket is reduced. Thus by expanding buckets
|
645 | * the hash keeps a bound on the chain length. This bounded chain
|
646 | * length is the essence of how a hash provides constant time lookup.
|
647 | *
|
648 | * The calculation of tbl->ideal_chain_maxlen below deserves some
|
649 | * explanation. First, keep in mind that we're calculating the ideal
|
650 | * maximum chain length based on the *new* (doubled) bucket count.
|
651 | * In fractions this is just n/b (n=number of items,b=new num buckets).
|
652 | * Since the ideal chain length is an integer, we want to calculate
|
653 | * ceil(n/b). We don't depend on floating point arithmetic in this
|
654 | * hash, so to calculate ceil(n/b) with integers we could write
|
655 | *
|
656 | * ceil(n/b) = (n/b) + ((n%b)?1:0)
|
657 | *
|
658 | * and in fact a previous version of this hash did just that.
|
659 | * But now we have improved things a bit by recognizing that b is
|
660 | * always a power of two. We keep its base 2 log handy (call it lb),
|
661 | * so now we can write this with a bit shift and logical AND:
|
662 | *
|
663 | * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
|
664 | *
|
665 | */
|
666 |
|
667 | do { \
|
668 | unsigned _he_bkt; \
|
669 | unsigned _he_bkt_i; \
|
670 | struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
|
671 | UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
|
672 | _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
|
673 | 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
|
674 | if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
|
675 | memset(_he_new_buckets, 0, \
|
676 | 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
|
677 | tbl->ideal_chain_maxlen = \
|
678 | (tbl->num_items >> (tbl->log2_num_buckets+1)) + \
|
679 | ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
|
680 | tbl->nonideal_items = 0; \
|
681 | for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
|
682 | { \
|
683 | _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
|
684 | while (_he_thh) { \
|
685 | _he_hh_nxt = _he_thh->hh_next; \
|
686 | HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
|
687 | _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
|
688 | if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
|
689 | tbl->nonideal_items++; \
|
690 | _he_newbkt->expand_mult = _he_newbkt->count / \
|
691 | tbl->ideal_chain_maxlen; \
|
692 | } \
|
693 | _he_thh->hh_prev = NULL; \
|
694 | _he_thh->hh_next = _he_newbkt->hh_head; \
|
695 | if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
|
696 | _he_thh; \
|
697 | _he_newbkt->hh_head = _he_thh; \
|
698 | _he_thh = _he_hh_nxt; \
|
699 | } \
|
700 | } \
|
701 | uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
|
702 | tbl->num_buckets *= 2; \
|
703 | tbl->log2_num_buckets++; \
|
704 | tbl->buckets = _he_new_buckets; \
|
705 | tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
|
706 | (tbl->ineff_expands+1) : 0; \
|
707 | if (tbl->ineff_expands > 1) { \
|
708 | tbl->noexpand=1; \
|
709 | uthash_noexpand_fyi(tbl); \
|
710 | } \
|
711 | uthash_expand_fyi(tbl); \
|
712 | } while(0)
|
713 |
|
714 |
|
715 | /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
|
716 | /* Note that HASH_SORT assumes the hash handle name to be hh.
|
717 | * HASH_SRT was added to allow the hash handle name to be passed in. */
|
718 |
|
719 |
|
720 | do { \
|
721 | unsigned _hs_i; \
|
722 | unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
|
723 | struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \
|
724 | if (head) { \
|
725 | _hs_insize = 1; \
|
726 | _hs_looping = 1; \
|
727 | _hs_list = &((head)->hh); \
|
728 | while (_hs_looping) { \
|
729 | _hs_p = _hs_list; \
|
730 | _hs_list = NULL; \
|
731 | _hs_tail = NULL; \
|
732 | _hs_nmerges = 0; \
|
733 | while (_hs_p) { \
|
734 | _hs_nmerges++; \
|
735 | _hs_q = _hs_p; \
|
736 | _hs_psize = 0; \
|
737 | for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
|
738 | _hs_psize++; \
|
739 | _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
|
740 | ((void*)((char*)(_hs_q->next) + \
|
741 | (head)->hh.tbl->hho)) : NULL); \
|
742 | if (! (_hs_q) ) break; \
|
743 | } \
|
744 | _hs_qsize = _hs_insize; \
|
745 | while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \
|
746 | if (_hs_psize == 0) { \
|
747 | _hs_e = _hs_q; \
|
748 | _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
|
749 | ((void*)((char*)(_hs_q->next) + \
|
750 | (head)->hh.tbl->hho)) : NULL); \
|
751 | _hs_qsize--; \
|
752 | } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \
|
753 | _hs_e = _hs_p; \
|
754 | if (_hs_p){ \
|
755 | _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
|
756 | ((void*)((char*)(_hs_p->next) + \
|
757 | (head)->hh.tbl->hho)) : NULL); \
|
758 | } \
|
759 | _hs_psize--; \
|
760 | } else if (( \
|
761 | cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
|
762 | DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
|
763 | ) <= 0) { \
|
764 | _hs_e = _hs_p; \
|
765 | if (_hs_p){ \
|
766 | _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
|
767 | ((void*)((char*)(_hs_p->next) + \
|
768 | (head)->hh.tbl->hho)) : NULL); \
|
769 | } \
|
770 | _hs_psize--; \
|
771 | } else { \
|
772 | _hs_e = _hs_q; \
|
773 | _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
|
774 | ((void*)((char*)(_hs_q->next) + \
|
775 | (head)->hh.tbl->hho)) : NULL); \
|
776 | _hs_qsize--; \
|
777 | } \
|
778 | if ( _hs_tail ) { \
|
779 | _hs_tail->next = ((_hs_e) ? \
|
780 | ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
|
781 | } else { \
|
782 | _hs_list = _hs_e; \
|
783 | } \
|
784 | if (_hs_e) { \
|
785 | _hs_e->prev = ((_hs_tail) ? \
|
786 | ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
|
787 | } \
|
788 | _hs_tail = _hs_e; \
|
789 | } \
|
790 | _hs_p = _hs_q; \
|
791 | } \
|
792 | if (_hs_tail){ \
|
793 | _hs_tail->next = NULL; \
|
794 | } \
|
795 | if ( _hs_nmerges <= 1 ) { \
|
796 | _hs_looping=0; \
|
797 | (head)->hh.tbl->tail = _hs_tail; \
|
798 | DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
|
799 | } \
|
800 | _hs_insize *= 2; \
|
801 | } \
|
802 | HASH_FSCK(hh,head); \
|
803 | } \
|
804 | } while (0)
|
805 |
|
806 | /* This function selects items from one hash into another hash.
|
807 | * The end result is that the selected items have dual presence
|
808 | * in both hashes. There is no copy of the items made; rather
|
809 | * they are added into the new hash through a secondary hash
|
810 | * hash handle that must be present in the structure. */
|
811 |
|
812 | do { \
|
813 | unsigned _src_bkt, _dst_bkt; \
|
814 | void *_last_elt=NULL, *_elt; \
|
815 | UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \
|
816 | ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \
|
817 | if (src) { \
|
818 | for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
|
819 | for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
|
820 | _src_hh; \
|
821 | _src_hh = _src_hh->hh_next) { \
|
822 | _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
|
823 | if (cond(_elt)) { \
|
824 | _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \
|
825 | _dst_hh->key = _src_hh->key; \
|
826 | _dst_hh->keylen = _src_hh->keylen; \
|
827 | _dst_hh->hashv = _src_hh->hashv; \
|
828 | _dst_hh->prev = _last_elt; \
|
829 | _dst_hh->next = NULL; \
|
830 | if (_last_elt_hh) { _last_elt_hh->next = _elt; } \
|
831 | if (!dst) { \
|
832 | DECLTYPE_ASSIGN(dst,_elt); \
|
833 | HASH_MAKE_TABLE(hh_dst,dst); \
|
834 | } else { \
|
835 | _dst_hh->tbl = (dst)->hh_dst.tbl; \
|
836 | } \
|
837 | HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
|
838 | HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
|
839 | (dst)->hh_dst.tbl->num_items++; \
|
840 | _last_elt = _elt; \
|
841 | _last_elt_hh = _dst_hh; \
|
842 | } \
|
843 | } \
|
844 | } \
|
845 | } \
|
846 | HASH_FSCK(hh_dst,dst); \
|
847 | } while (0)
|
848 |
|
849 |
|
850 | do { \
|
851 | if (head) { \
|
852 | uthash_free((head)->hh.tbl->buckets, \
|
853 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
|
854 | HASH_BLOOM_FREE((head)->hh.tbl); \
|
855 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
|
856 | (head)=NULL; \
|
857 | } \
|
858 | } while(0)
|
859 |
|
860 |
|
861 | (size_t)((((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \
|
862 | ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \
|
863 | (sizeof(UT_hash_table)) + \
|
864 | (HASH_BLOOM_BYTELEN)))
|
865 |
|
866 |
|
867 |
|
868 | for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
|
869 | el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
|
870 |
|
871 |
|
872 | for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
|
873 | el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
|
874 |
|
875 |
|
876 | /* obtain a count of items in the hash */
|
877 |
|
878 |
|
879 |
|
880 | typedef struct UT_hash_bucket {
|
881 | struct UT_hash_handle *hh_head;
|
882 | unsigned count;
|
883 |
|
884 | /* expand_mult is normally set to 0. In this situation, the max chain length
|
885 | * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
|
886 | * the bucket's chain exceeds this length, bucket expansion is triggered).
|
887 | * However, setting expand_mult to a non-zero value delays bucket expansion
|
888 | * (that would be triggered by additions to this particular bucket)
|
889 | * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
|
890 | * (The multiplier is simply expand_mult+1). The whole idea of this
|
891 | * multiplier is to reduce bucket expansions, since they are expensive, in
|
892 | * situations where we know that a particular bucket tends to be overused.
|
893 | * It is better to let its chain length grow to a longer yet-still-bounded
|
894 | * value, than to do an O(n) bucket expansion too often.
|
895 | */
|
896 | unsigned expand_mult;
|
897 |
|
898 | } UT_hash_bucket;
|
899 |
|
900 | /* random signature used only to find hash tables in external analysis */
|
901 |
|
902 |
|
903 |
|
904 | typedef struct UT_hash_table {
|
905 | UT_hash_bucket *buckets;
|
906 | unsigned num_buckets, log2_num_buckets;
|
907 | unsigned num_items;
|
908 | struct UT_hash_handle *tail; /* tail hh in app order, for fast append */
|
909 | ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
|
910 |
|
911 | /* in an ideal situation (all buckets used equally), no bucket would have
|
912 | * more than ceil(#items/#buckets) items. that's the ideal chain length. */
|
913 | unsigned ideal_chain_maxlen;
|
914 |
|
915 | /* nonideal_items is the number of items in the hash whose chain position
|
916 | * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
|
917 | * hash distribution; reaching them in a chain traversal takes >ideal steps */
|
918 | unsigned nonideal_items;
|
919 |
|
920 | /* ineffective expands occur when a bucket doubling was performed, but
|
921 | * afterward, more than half the items in the hash had nonideal chain
|
922 | * positions. If this happens on two consecutive expansions we inhibit any
|
923 | * further expansion, as it's not helping; this happens when the hash
|
924 | * function isn't a good fit for the key domain. When expansion is inhibited
|
925 | * the hash will still work, albeit no longer in constant time. */
|
926 | unsigned ineff_expands, noexpand;
|
927 |
|
928 | uint32_t signature; /* used only to find hash tables in external analysis */
|
929 |
|
930 | uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
|
931 | uint8_t *bloom_bv;
|
932 | char bloom_nbits;
|
933 |
|
934 |
|
935 | } UT_hash_table;
|
936 |
|
937 | typedef struct UT_hash_handle {
|
938 | struct UT_hash_table *tbl;
|
939 | void *prev; /* prev element in app order */
|
940 | void *next; /* next element in app order */
|
941 | struct UT_hash_handle *hh_prev; /* previous hh in bucket order */
|
942 | struct UT_hash_handle *hh_next; /* next hh in bucket order */
|
943 | void *key; /* ptr to enclosing struct's key */
|
944 | unsigned keylen; /* enclosing struct's key len */
|
945 | unsigned hashv; /* result of hash-fcn(key) */
|
946 | } UT_hash_handle;
|
947 |
|
948 |
|