1 /* 2 Copyright (c) 2003-2011, Troy D. Hanson http://uthash.sourceforge.net 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #ifndef UTHASH_H 25 #define UTHASH_H 26 27 #include <string.h> /* memcmp,strlen */ 28 #include <stddef.h> /* ptrdiff_t */ 29 #include <stdlib.h> /* exit() */ 30 31 /* These macros use decltype or the earlier __typeof GNU extension. 32 As decltype is only available in newer compilers (VS2010 or gcc 4.3+ 33 when compiling c++ source) this code uses whatever method is needed 34 or, for VS2008 where neither is available, uses casting workarounds. */ 35 #ifdef _MSC_VER /* MS compiler */ 36 #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ 37 #define DECLTYPE(x) (decltype(x)) 38 #else /* VS2008 or older (or VS2010 in C mode) */ 39 #define NO_DECLTYPE 40 #define DECLTYPE(x) 41 #endif 42 #else /* GNU, Sun and other compilers */ 43 #define DECLTYPE(x) (__typeof(x)) 44 #endif 45 46 #ifdef NO_DECLTYPE 47 #define DECLTYPE_ASSIGN(dst,src) \ 48 do { \ 49 char **_da_dst = (char**)(&(dst)); \ 50 *_da_dst = (char*)(src); \ 51 } while(0) 52 #else 53 #define DECLTYPE_ASSIGN(dst,src) \ 54 do { \ 55 (dst) = DECLTYPE(dst)(src); \ 56 } while(0) 57 #endif 58 59 /* a number of the hash function use uint32_t which isn't defined on win32 */ 60 #ifdef _MSC_VER 61 typedef unsigned int uint32_t; 62 typedef unsigned char uint8_t; 63 #else 64 #include <inttypes.h> /* uint32_t */ 65 #endif 66 67 #define UTHASH_VERSION 1.9.4 68 69 #ifndef UTHASH_MALLOC 70 #define UTHASH_MALLOC malloc 71 #endif 72 73 #ifndef UTHASH_FREE 74 #define UTHASH_FREE free 75 #endif 76 77 #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ 78 #define uthash_malloc(sz) UTHASH_MALLOC(sz) /* malloc fcn */ 79 #define uthash_free(ptr,sz) UTHASH_FREE(ptr) /* free fcn */ 80 81 #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 82 #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 83 84 /* initial number of buckets */ 85 #define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ 86 #define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ 87 #define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ 88 89 /* calculate the element whose hash handle address is hhe */ 90 #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) 91 92 #define HASH_FIND(hh,head,keyptr,keylen,out) \ 93 do { \ 94 unsigned _hf_bkt,_hf_hashv; \ 95 out=NULL; \ 96 if (head) { \ 97 HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ 98 if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ 99 HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ 100 keyptr,keylen,out); \ 101 } \ 102 } \ 103 } while (0) 104 105 #ifdef HASH_BLOOM 106 #define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) 107 #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) 108 #define HASH_BLOOM_MAKE(tbl) \ 109 do { \ 110 (tbl)->bloom_nbits = HASH_BLOOM; \ 111 (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 112 if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ 113 memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ 114 (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 115 } while (0); 116 117 #define HASH_BLOOM_FREE(tbl) \ 118 do { \ 119 uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 120 } while (0); 121 122 #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) 123 #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) 124 125 #define HASH_BLOOM_ADD(tbl,hashv) \ 126 HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 127 128 #define HASH_BLOOM_TEST(tbl,hashv) \ 129 HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 130 131 #else 132 #define HASH_BLOOM_MAKE(tbl) 133 #define HASH_BLOOM_FREE(tbl) 134 #define HASH_BLOOM_ADD(tbl,hashv) 135 #define HASH_BLOOM_TEST(tbl,hashv) (1) 136 #endif 137 138 #define HASH_MAKE_TABLE(hh,head) \ 139 do { \ 140 (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ 141 sizeof(UT_hash_table)); \ 142 if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ 143 memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ 144 (head)->hh.tbl->tail = &((head)->hh); \ 145 (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 146 (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 147 (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 148 (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 149 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 150 if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ 151 memset((head)->hh.tbl->buckets, 0, \ 152 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 153 HASH_BLOOM_MAKE((head)->hh.tbl); \ 154 (head)->hh.tbl->signature = HASH_SIGNATURE; \ 155 } while(0) 156 157 #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 158 HASH_ADD_KEYPTR(hh,head,&add->fieldname,keylen_in,add) 159 160 #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 161 do { \ 162 unsigned _ha_bkt; \ 163 (add)->hh.next = NULL; \ 164 (add)->hh.key = (char*)keyptr; \ 165 (add)->hh.keylen = keylen_in; \ 166 if (!(head)) { \ 167 head = (add); \ 168 (head)->hh.prev = NULL; \ 169 HASH_MAKE_TABLE(hh,head); \ 170 } else { \ 171 (head)->hh.tbl->tail->next = (add); \ 172 (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 173 (head)->hh.tbl->tail = &((add)->hh); \ 174 } \ 175 (head)->hh.tbl->num_items++; \ 176 (add)->hh.tbl = (head)->hh.tbl; \ 177 HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ 178 (add)->hh.hashv, _ha_bkt); \ 179 HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ 180 HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ 181 HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ 182 HASH_FSCK(hh,head); \ 183 } while(0) 184 185 #define HASH_TO_BKT( hashv, num_bkts, bkt ) \ 186 do { \ 187 bkt = ((hashv) & ((num_bkts) - 1)); \ 188 } while(0) 189 190 /* delete "delptr" from the hash table. 191 * "the usual" patch-up process for the app-order doubly-linked-list. 192 * The use of _hd_hh_del below deserves special explanation. 193 * These used to be expressed using (delptr) but that led to a bug 194 * if someone used the same symbol for the head and deletee, like 195 * HASH_DELETE(hh,users,users); 196 * We want that to work, but by changing the head (users) below 197 * we were forfeiting our ability to further refer to the deletee (users) 198 * in the patch-up process. Solution: use scratch space to 199 * copy the deletee pointer, then the latter references are via that 200 * scratch pointer rather than through the repointed (users) symbol. 201 */ 202 #define HASH_DELETE(hh,head,delptr) \ 203 do { \ 204 unsigned _hd_bkt; \ 205 struct UT_hash_handle *_hd_hh_del; \ 206 if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ 207 uthash_free((head)->hh.tbl->buckets, \ 208 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 209 HASH_BLOOM_FREE((head)->hh.tbl); \ 210 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 211 head = NULL; \ 212 } else { \ 213 _hd_hh_del = &((delptr)->hh); \ 214 if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ 215 (head)->hh.tbl->tail = \ 216 (UT_hash_handle*)((char*)((delptr)->hh.prev) + \ 217 (head)->hh.tbl->hho); \ 218 } \ 219 if ((delptr)->hh.prev) { \ 220 ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \ 221 (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ 222 } else { \ 223 DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ 224 } \ 225 if (_hd_hh_del->next) { \ 226 ((UT_hash_handle*)((char*)_hd_hh_del->next + \ 227 (head)->hh.tbl->hho))->prev = \ 228 _hd_hh_del->prev; \ 229 } \ 230 HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 231 HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 232 (head)->hh.tbl->num_items--; \ 233 } \ 234 HASH_FSCK(hh,head); \ 235 } while (0) 236 237 /* use this function to delete an item from a table only if its in that table */ 238 #define HASH_DELETE_IF(hh,group,ptr) \ 239 do {\ 240 if(ptr && group && ptr->hh.tbl) { \ 241 HASH_DELETE(hh,group,ptr); \ 242 ptr->hh.tbl = 0;\ 243 } \ 244 } while(0) 245 246 /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 247 #define HASH_FIND_STR(head,findstr,out) \ 248 HASH_FIND(hh,head,findstr,strlen(findstr),out) 249 #define HASH_ADD_STR(head,strfield,add) \ 250 HASH_ADD(hh,head,strfield,strlen(add->strfield),add) 251 #define HASH_FIND_INT(head,findint,out) \ 252 HASH_FIND(hh,head,findint,sizeof(int),out) 253 #define HASH_ADD_INT(head,intfield,add) \ 254 HASH_ADD(hh,head,intfield,sizeof(int),add) 255 #define HASH_FIND_PTR(head,findptr,out) \ 256 HASH_FIND(hh,head,findptr,sizeof(void *),out) 257 #define HASH_ADD_PTR(head,ptrfield,add) \ 258 HASH_ADD(hh,head,ptrfield,sizeof(void *),add) 259 #define HASH_DEL(head,delptr) \ 260 HASH_DELETE(hh,head,delptr) 261 262 /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 263 * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 264 */ 265 #ifdef HASH_DEBUG 266 #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) 267 #define HASH_FSCK(hh,head) \ 268 do { \ 269 unsigned _bkt_i; \ 270 unsigned _count, _bkt_count; \ 271 char *_prev; \ 272 struct UT_hash_handle *_thh; \ 273 if (head) { \ 274 _count = 0; \ 275 for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ 276 _bkt_count = 0; \ 277 _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 278 _prev = NULL; \ 279 while (_thh) { \ 280 if (_prev != (char*)(_thh->hh_prev)) { \ 281 HASH_OOPS("invalid hh_prev %p, actual %p\n", \ 282 _thh->hh_prev, _prev ); \ 283 } \ 284 _bkt_count++; \ 285 _prev = (char*)(_thh); \ 286 _thh = _thh->hh_next; \ 287 } \ 288 _count += _bkt_count; \ 289 if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 290 HASH_OOPS("invalid bucket count %d, actual %d\n", \ 291 (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 292 } \ 293 } \ 294 if (_count != (head)->hh.tbl->num_items) { \ 295 HASH_OOPS("invalid hh item count %d, actual %d\n", \ 296 (head)->hh.tbl->num_items, _count ); \ 297 } \ 298 /* traverse hh in app order; check next/prev integrity, count */ \ 299 _count = 0; \ 300 _prev = NULL; \ 301 _thh = &(head)->hh; \ 302 while (_thh) { \ 303 _count++; \ 304 if (_prev !=(char*)(_thh->prev)) { \ 305 HASH_OOPS("invalid prev %p, actual %p\n", \ 306 _thh->prev, _prev ); \ 307 } \ 308 _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 309 _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ 310 (head)->hh.tbl->hho) : NULL ); \ 311 } \ 312 if (_count != (head)->hh.tbl->num_items) { \ 313 HASH_OOPS("invalid app item count %d, actual %d\n", \ 314 (head)->hh.tbl->num_items, _count ); \ 315 } \ 316 } \ 317 } while (0) 318 #else 319 #define HASH_FSCK(hh,head) 320 #endif 321 322 /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 323 * the descriptor to which this macro is defined for tuning the hash function. 324 * The app can #include <unistd.h> to get the prototype for write(2). */ 325 #ifdef HASH_EMIT_KEYS 326 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 327 do { \ 328 unsigned _klen = fieldlen; \ 329 write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 330 write(HASH_EMIT_KEYS, keyptr, fieldlen); \ 331 } while (0) 332 #else 333 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 334 #endif 335 336 /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ 337 #ifdef HASH_FUNCTION 338 #define HASH_FCN HASH_FUNCTION 339 #else 340 #define HASH_FCN HASH_JEN 341 #endif 342 343 /* The Bernstein hash function, used in Perl prior to v5.6 */ 344 #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ 345 do { \ 346 unsigned _hb_keylen=keylen; \ 347 char *_hb_key=(char*)(key); \ 348 (hashv) = 0; \ 349 while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ 350 bkt = (hashv) & (num_bkts-1); \ 351 } while (0) 352 353 354 /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 355 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ 356 #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ 357 do { \ 358 unsigned _sx_i; \ 359 char *_hs_key=(char*)(key); \ 360 hashv = 0; \ 361 for(_sx_i=0; _sx_i < keylen; _sx_i++) \ 362 hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 363 bkt = hashv & (num_bkts-1); \ 364 } while (0) 365 366 #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ 367 do { \ 368 unsigned _fn_i; \ 369 char *_hf_key=(char*)(key); \ 370 hashv = 2166136261UL; \ 371 for(_fn_i=0; _fn_i < keylen; _fn_i++) \ 372 hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ 373 bkt = hashv & (num_bkts-1); \ 374 } while(0); 375 376 #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ 377 do { \ 378 unsigned _ho_i; \ 379 char *_ho_key=(char*)(key); \ 380 hashv = 0; \ 381 for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 382 hashv += _ho_key[_ho_i]; \ 383 hashv += (hashv << 10); \ 384 hashv ^= (hashv >> 6); \ 385 } \ 386 hashv += (hashv << 3); \ 387 hashv ^= (hashv >> 11); \ 388 hashv += (hashv << 15); \ 389 bkt = hashv & (num_bkts-1); \ 390 } while(0) 391 392 #define HASH_JEN_MIX(a,b,c) \ 393 do { \ 394 a -= b; a -= c; a ^= ( c >> 13 ); \ 395 b -= c; b -= a; b ^= ( a << 8 ); \ 396 c -= a; c -= b; c ^= ( b >> 13 ); \ 397 a -= b; a -= c; a ^= ( c >> 12 ); \ 398 b -= c; b -= a; b ^= ( a << 16 ); \ 399 c -= a; c -= b; c ^= ( b >> 5 ); \ 400 a -= b; a -= c; a ^= ( c >> 3 ); \ 401 b -= c; b -= a; b ^= ( a << 10 ); \ 402 c -= a; c -= b; c ^= ( b >> 15 ); \ 403 } while (0) 404 405 #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ 406 do { \ 407 unsigned _hj_i,_hj_j,_hj_k; \ 408 char *_hj_key=(char*)(key); \ 409 hashv = 0xfeedbeef; \ 410 _hj_i = _hj_j = 0x9e3779b9; \ 411 _hj_k = keylen; \ 412 while (_hj_k >= 12) { \ 413 _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 414 + ( (unsigned)_hj_key[2] << 16 ) \ 415 + ( (unsigned)_hj_key[3] << 24 ) ); \ 416 _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 417 + ( (unsigned)_hj_key[6] << 16 ) \ 418 + ( (unsigned)_hj_key[7] << 24 ) ); \ 419 hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 420 + ( (unsigned)_hj_key[10] << 16 ) \ 421 + ( (unsigned)_hj_key[11] << 24 ) ); \ 422 \ 423 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 424 \ 425 _hj_key += 12; \ 426 _hj_k -= 12; \ 427 } \ 428 hashv += keylen; \ 429 switch ( _hj_k ) { \ 430 case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ 431 case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ 432 case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ 433 case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ 434 case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ 435 case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ 436 case 5: _hj_j += _hj_key[4]; \ 437 case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ 438 case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ 439 case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ 440 case 1: _hj_i += _hj_key[0]; \ 441 } \ 442 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 443 bkt = hashv & (num_bkts-1); \ 444 } while(0) 445 446 /* The Paul Hsieh hash function */ 447 #undef get16bits 448 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 449 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 450 #define get16bits(d) (*((const uint16_t *) (d))) 451 #endif 452 453 #if !defined (get16bits) 454 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 455 +(uint32_t)(((const uint8_t *)(d))[0]) ) 456 #endif 457 #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ 458 do { \ 459 char *_sfh_key=(char*)(key); \ 460 uint32_t _sfh_tmp, _sfh_len = keylen; \ 461 \ 462 int _sfh_rem = _sfh_len & 3; \ 463 _sfh_len >>= 2; \ 464 hashv = 0xcafebabe; \ 465 \ 466 /* Main loop */ \ 467 for (;_sfh_len > 0; _sfh_len--) { \ 468 hashv += get16bits (_sfh_key); \ 469 _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ 470 hashv = (hashv << 16) ^ _sfh_tmp; \ 471 _sfh_key += 2*sizeof (uint16_t); \ 472 hashv += hashv >> 11; \ 473 } \ 474 \ 475 /* Handle end cases */ \ 476 switch (_sfh_rem) { \ 477 case 3: hashv += get16bits (_sfh_key); \ 478 hashv ^= hashv << 16; \ 479 hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ 480 hashv += hashv >> 11; \ 481 break; \ 482 case 2: hashv += get16bits (_sfh_key); \ 483 hashv ^= hashv << 11; \ 484 hashv += hashv >> 17; \ 485 break; \ 486 case 1: hashv += *_sfh_key; \ 487 hashv ^= hashv << 10; \ 488 hashv += hashv >> 1; \ 489 } \ 490 \ 491 /* Force "avalanching" of final 127 bits */ \ 492 hashv ^= hashv << 3; \ 493 hashv += hashv >> 5; \ 494 hashv ^= hashv << 4; \ 495 hashv += hashv >> 17; \ 496 hashv ^= hashv << 25; \ 497 hashv += hashv >> 6; \ 498 bkt = hashv & (num_bkts-1); \ 499 } while(0); 500 501 #ifdef HASH_USING_NO_STRICT_ALIASING 502 /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. 503 * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. 504 * MurmurHash uses the faster approach only on CPU's where we know it's safe. 505 * 506 * Note the preprocessor built-in defines can be emitted using: 507 * 508 * gcc -m64 -dM -E - < /dev/null (on gcc) 509 * cc -## a.c (where a.c is a simple test file) (Sun Studio) 510 */ 511 #if (defined(__i386__) || defined(__x86_64__)) 512 #define MUR_GETBLOCK(p,i) p[i] 513 #else /* non intel */ 514 #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0) 515 #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1) 516 #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2) 517 #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3) 518 #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) 519 #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) 520 #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) 521 #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) 522 #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) 523 #else /* assume little endian non-intel */ 524 #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) 525 #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) 526 #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) 527 #endif 528 #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ 529 (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ 530 (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ 531 MUR_ONE_THREE(p)))) 532 #endif 533 #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) 534 #define MUR_FMIX(_h) \ 535 do { \ 536 _h ^= _h >> 16; \ 537 _h *= 0x85ebca6b; \ 538 _h ^= _h >> 13; \ 539 _h *= 0xc2b2ae35l; \ 540 _h ^= _h >> 16; \ 541 } while(0) 542 543 #define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ 544 do { \ 545 const uint8_t *_mur_data = (const uint8_t*)(key); \ 546 const int _mur_nblocks = (keylen) / 4; \ 547 uint32_t _mur_h1 = 0xf88D5353; \ 548 uint32_t _mur_c1 = 0xcc9e2d51; \ 549 uint32_t _mur_c2 = 0x1b873593; \ 550 const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \ 551 int _mur_i; \ 552 for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \ 553 uint32_t _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ 554 _mur_k1 *= _mur_c1; \ 555 _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 556 _mur_k1 *= _mur_c2; \ 557 \ 558 _mur_h1 ^= _mur_k1; \ 559 _mur_h1 = MUR_ROTL32(_mur_h1,13); \ 560 _mur_h1 = _mur_h1*5+0xe6546b64; \ 561 } \ 562 const uint8_t *_mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \ 563 uint32_t _mur_k1=0; \ 564 switch((keylen) & 3) { \ 565 case 3: _mur_k1 ^= _mur_tail[2] << 16; \ 566 case 2: _mur_k1 ^= _mur_tail[1] << 8; \ 567 case 1: _mur_k1 ^= _mur_tail[0]; \ 568 _mur_k1 *= _mur_c1; \ 569 _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 570 _mur_k1 *= _mur_c2; \ 571 _mur_h1 ^= _mur_k1; \ 572 } \ 573 _mur_h1 ^= (keylen); \ 574 MUR_FMIX(_mur_h1); \ 575 hashv = _mur_h1; \ 576 bkt = hashv & (num_bkts-1); \ 577 } while(0) 578 #endif /* HASH_USING_NO_STRICT_ALIASING */ 579 580 /* key comparison function; return 0 if keys equal */ 581 #define HASH_KEYCMP(a,b,len) memcmp(a,b,len) 582 583 /* iterate over items in a known bucket to find desired item */ 584 #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ 585 do { \ 586 if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ 587 else out=NULL; \ 588 while (out) { \ 589 if (out->hh.keylen == keylen_in) { \ 590 if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break; \ 591 } \ 592 if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \ 593 else out = NULL; \ 594 } \ 595 } while(0) 596 597 /* add an item to a bucket */ 598 #define HASH_ADD_TO_BKT(head,addhh) \ 599 do { \ 600 head.count++; \ 601 (addhh)->hh_next = head.hh_head; \ 602 (addhh)->hh_prev = NULL; \ 603 if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ 604 (head).hh_head=addhh; \ 605 if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ 606 && (addhh)->tbl->noexpand != 1) { \ 607 HASH_EXPAND_BUCKETS((addhh)->tbl); \ 608 } \ 609 } while(0) 610 611 /* remove an item from a given bucket */ 612 #define HASH_DEL_IN_BKT(hh,head,hh_del) \ 613 (head).count--; \ 614 if ((head).hh_head == hh_del) { \ 615 (head).hh_head = hh_del->hh_next; \ 616 } \ 617 if (hh_del->hh_prev) { \ 618 hh_del->hh_prev->hh_next = hh_del->hh_next; \ 619 } \ 620 if (hh_del->hh_next) { \ 621 hh_del->hh_next->hh_prev = hh_del->hh_prev; \ 622 } 623 624 /* Bucket expansion has the effect of doubling the number of buckets 625 * and redistributing the items into the new buckets. Ideally the 626 * items will distribute more or less evenly into the new buckets 627 * (the extent to which this is true is a measure of the quality of 628 * the hash function as it applies to the key domain). 629 * 630 * With the items distributed into more buckets, the chain length 631 * (item count) in each bucket is reduced. Thus by expanding buckets 632 * the hash keeps a bound on the chain length. This bounded chain 633 * length is the essence of how a hash provides constant time lookup. 634 * 635 * The calculation of tbl->ideal_chain_maxlen below deserves some 636 * explanation. First, keep in mind that we're calculating the ideal 637 * maximum chain length based on the *new* (doubled) bucket count. 638 * In fractions this is just n/b (n=number of items,b=new num buckets). 639 * Since the ideal chain length is an integer, we want to calculate 640 * ceil(n/b). We don't depend on floating point arithmetic in this 641 * hash, so to calculate ceil(n/b) with integers we could write 642 * 643 * ceil(n/b) = (n/b) + ((n%b)?1:0) 644 * 645 * and in fact a previous version of this hash did just that. 646 * But now we have improved things a bit by recognizing that b is 647 * always a power of two. We keep its base 2 log handy (call it lb), 648 * so now we can write this with a bit shift and logical AND: 649 * 650 * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 651 * 652 */ 653 #define HASH_EXPAND_BUCKETS(tbl) \ 654 do { \ 655 unsigned _he_bkt; \ 656 unsigned _he_bkt_i; \ 657 struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 658 UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 659 _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 660 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 661 if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ 662 memset(_he_new_buckets, 0, \ 663 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 664 tbl->ideal_chain_maxlen = \ 665 (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ 666 ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ 667 tbl->nonideal_items = 0; \ 668 for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ 669 { \ 670 _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ 671 while (_he_thh) { \ 672 _he_hh_nxt = _he_thh->hh_next; \ 673 HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ 674 _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ 675 if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ 676 tbl->nonideal_items++; \ 677 _he_newbkt->expand_mult = _he_newbkt->count / \ 678 tbl->ideal_chain_maxlen; \ 679 } \ 680 _he_thh->hh_prev = NULL; \ 681 _he_thh->hh_next = _he_newbkt->hh_head; \ 682 if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ 683 _he_thh; \ 684 _he_newbkt->hh_head = _he_thh; \ 685 _he_thh = _he_hh_nxt; \ 686 } \ 687 } \ 688 uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 689 tbl->num_buckets *= 2; \ 690 tbl->log2_num_buckets++; \ 691 tbl->buckets = _he_new_buckets; \ 692 tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ 693 (tbl->ineff_expands+1) : 0; \ 694 if (tbl->ineff_expands > 1) { \ 695 tbl->noexpand=1; \ 696 uthash_noexpand_fyi(tbl); \ 697 } \ 698 uthash_expand_fyi(tbl); \ 699 } while(0) 700 701 702 /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 703 /* Note that HASH_SORT assumes the hash handle name to be hh. 704 * HASH_SRT was added to allow the hash handle name to be passed in. */ 705 #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 706 #define HASH_SRT(hh,head,cmpfcn) \ 707 do { \ 708 unsigned _hs_i; \ 709 unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 710 struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 711 if (head) { \ 712 _hs_insize = 1; \ 713 _hs_looping = 1; \ 714 _hs_list = &((head)->hh); \ 715 while (_hs_looping) { \ 716 _hs_p = _hs_list; \ 717 _hs_list = NULL; \ 718 _hs_tail = NULL; \ 719 _hs_nmerges = 0; \ 720 while (_hs_p) { \ 721 _hs_nmerges++; \ 722 _hs_q = _hs_p; \ 723 _hs_psize = 0; \ 724 for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ 725 _hs_psize++; \ 726 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 727 ((void*)((char*)(_hs_q->next) + \ 728 (head)->hh.tbl->hho)) : NULL); \ 729 if (! (_hs_q) ) break; \ 730 } \ 731 _hs_qsize = _hs_insize; \ 732 while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ 733 if (_hs_psize == 0) { \ 734 _hs_e = _hs_q; \ 735 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 736 ((void*)((char*)(_hs_q->next) + \ 737 (head)->hh.tbl->hho)) : NULL); \ 738 _hs_qsize--; \ 739 } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ 740 _hs_e = _hs_p; \ 741 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 742 ((void*)((char*)(_hs_p->next) + \ 743 (head)->hh.tbl->hho)) : NULL); \ 744 _hs_psize--; \ 745 } else if (( \ 746 cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ 747 DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ 748 ) <= 0) { \ 749 _hs_e = _hs_p; \ 750 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 751 ((void*)((char*)(_hs_p->next) + \ 752 (head)->hh.tbl->hho)) : NULL); \ 753 _hs_psize--; \ 754 } else { \ 755 _hs_e = _hs_q; \ 756 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 757 ((void*)((char*)(_hs_q->next) + \ 758 (head)->hh.tbl->hho)) : NULL); \ 759 _hs_qsize--; \ 760 } \ 761 if ( _hs_tail ) { \ 762 _hs_tail->next = ((_hs_e) ? \ 763 ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ 764 } else { \ 765 _hs_list = _hs_e; \ 766 } \ 767 _hs_e->prev = ((_hs_tail) ? \ 768 ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ 769 _hs_tail = _hs_e; \ 770 } \ 771 _hs_p = _hs_q; \ 772 } \ 773 _hs_tail->next = NULL; \ 774 if ( _hs_nmerges <= 1 ) { \ 775 _hs_looping=0; \ 776 (head)->hh.tbl->tail = _hs_tail; \ 777 DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ 778 } \ 779 _hs_insize *= 2; \ 780 } \ 781 HASH_FSCK(hh,head); \ 782 } \ 783 } while (0) 784 785 /* This function selects items from one hash into another hash. 786 * The end result is that the selected items have dual presence 787 * in both hashes. There is no copy of the items made; rather 788 * they are added into the new hash through a secondary hash 789 * hash handle that must be present in the structure. */ 790 #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 791 do { \ 792 unsigned _src_bkt, _dst_bkt; \ 793 void *_last_elt=NULL, *_elt; \ 794 UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 795 ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 796 if (src) { \ 797 for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 798 for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 799 _src_hh; \ 800 _src_hh = _src_hh->hh_next) { \ 801 _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 802 if (cond(_elt)) { \ 803 _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ 804 _dst_hh->key = _src_hh->key; \ 805 _dst_hh->keylen = _src_hh->keylen; \ 806 _dst_hh->hashv = _src_hh->hashv; \ 807 _dst_hh->prev = _last_elt; \ 808 _dst_hh->next = NULL; \ 809 if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ 810 if (!dst) { \ 811 DECLTYPE_ASSIGN(dst,_elt); \ 812 HASH_MAKE_TABLE(hh_dst,dst); \ 813 } else { \ 814 _dst_hh->tbl = (dst)->hh_dst.tbl; \ 815 } \ 816 HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 817 HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ 818 (dst)->hh_dst.tbl->num_items++; \ 819 _last_elt = _elt; \ 820 _last_elt_hh = _dst_hh; \ 821 } \ 822 } \ 823 } \ 824 } \ 825 HASH_FSCK(hh_dst,dst); \ 826 } while (0) 827 828 #define HASH_CLEAR(hh,head) \ 829 do { \ 830 if (head) { \ 831 uthash_free((head)->hh.tbl->buckets, \ 832 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ 833 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 834 (head)=NULL; \ 835 } \ 836 } while(0) 837 838 #ifdef NO_DECLTYPE 839 #define HASH_ITER(hh,head,el,tmp) \ 840 for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ 841 el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) 842 #else 843 #define HASH_ITER(hh,head,el,tmp) \ 844 for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ 845 el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) 846 #endif 847 848 /* obtain a count of items in the hash */ 849 #define HASH_COUNT(head) HASH_CNT(hh,head) 850 #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) 851 852 typedef struct UT_hash_bucket { 853 struct UT_hash_handle *hh_head; 854 unsigned count; 855 856 /* expand_mult is normally set to 0. In this situation, the max chain length 857 * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 858 * the bucket's chain exceeds this length, bucket expansion is triggered). 859 * However, setting expand_mult to a non-zero value delays bucket expansion 860 * (that would be triggered by additions to this particular bucket) 861 * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 862 * (The multiplier is simply expand_mult+1). The whole idea of this 863 * multiplier is to reduce bucket expansions, since they are expensive, in 864 * situations where we know that a particular bucket tends to be overused. 865 * It is better to let its chain length grow to a longer yet-still-bounded 866 * value, than to do an O(n) bucket expansion too often. 867 */ 868 unsigned expand_mult; 869 870 } UT_hash_bucket; 871 872 /* random signature used only to find hash tables in external analysis */ 873 #define HASH_SIGNATURE 0xa0111fe1 874 #define HASH_BLOOM_SIGNATURE 0xb12220f2 875 876 typedef struct UT_hash_table { 877 UT_hash_bucket *buckets; 878 unsigned num_buckets, log2_num_buckets; 879 unsigned num_items; 880 struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 881 ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 882 883 /* in an ideal situation (all buckets used equally), no bucket would have 884 * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 885 unsigned ideal_chain_maxlen; 886 887 /* nonideal_items is the number of items in the hash whose chain position 888 * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 889 * hash distribution; reaching them in a chain traversal takes >ideal steps */ 890 unsigned nonideal_items; 891 892 /* ineffective expands occur when a bucket doubling was performed, but 893 * afterward, more than half the items in the hash had nonideal chain 894 * positions. If this happens on two consecutive expansions we inhibit any 895 * further expansion, as it's not helping; this happens when the hash 896 * function isn't a good fit for the key domain. When expansion is inhibited 897 * the hash will still work, albeit no longer in constant time. */ 898 unsigned ineff_expands, noexpand; 899 900 uint32_t signature; /* used only to find hash tables in external analysis */ 901 #ifdef HASH_BLOOM 902 uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 903 uint8_t *bloom_bv; 904 char bloom_nbits; 905 #endif 906 907 } UT_hash_table; 908 909 typedef struct UT_hash_handle { 910 struct UT_hash_table *tbl; 911 void *prev; /* prev element in app order */ 912 void *next; /* next element in app order */ 913 struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 914 struct UT_hash_handle *hh_next; /* next hh in bucket order */ 915 void *key; /* ptr to enclosing struct's key */ 916 unsigned keylen; /* enclosing struct's key len */ 917 unsigned hashv; /* result of hash-fcn(key) */ 918 } UT_hash_handle; 919 920 #endif /* UTHASH_H */ 921