1*418b791dSBob Badour /* 2*418b791dSBob Badour Copyright (c) 2003-2011, Troy D. Hanson http://uthash.sourceforge.net 3*418b791dSBob Badour All rights reserved. 4*418b791dSBob Badour 5*418b791dSBob Badour Redistribution and use in source and binary forms, with or without 6*418b791dSBob Badour modification, are permitted provided that the following conditions are met: 7*418b791dSBob Badour 8*418b791dSBob Badour * Redistributions of source code must retain the above copyright 9*418b791dSBob Badour notice, this list of conditions and the following disclaimer. 10*418b791dSBob Badour 11*418b791dSBob Badour THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12*418b791dSBob Badour IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13*418b791dSBob Badour TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14*418b791dSBob Badour PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15*418b791dSBob Badour OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16*418b791dSBob Badour EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17*418b791dSBob Badour PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18*418b791dSBob Badour PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19*418b791dSBob Badour LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20*418b791dSBob Badour NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21*418b791dSBob Badour SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22*418b791dSBob Badour */ 23*418b791dSBob Badour 24*418b791dSBob Badour #ifndef UTHASH_H 25*418b791dSBob Badour #define UTHASH_H 26*418b791dSBob Badour 27*418b791dSBob Badour #include <string.h> /* memcmp,strlen */ 28*418b791dSBob Badour #include <stddef.h> /* ptrdiff_t */ 29*418b791dSBob Badour #include <stdlib.h> /* exit() */ 30*418b791dSBob Badour 31*418b791dSBob Badour /* These macros use decltype or the earlier __typeof GNU extension. 32*418b791dSBob Badour As decltype is only available in newer compilers (VS2010 or gcc 4.3+ 33*418b791dSBob Badour when compiling c++ source) this code uses whatever method is needed 34*418b791dSBob Badour or, for VS2008 where neither is available, uses casting workarounds. */ 35*418b791dSBob Badour #ifdef _MSC_VER /* MS compiler */ 36*418b791dSBob Badour #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ 37*418b791dSBob Badour #define DECLTYPE(x) (decltype(x)) 38*418b791dSBob Badour #else /* VS2008 or older (or VS2010 in C mode) */ 39*418b791dSBob Badour #define NO_DECLTYPE 40*418b791dSBob Badour #define DECLTYPE(x) 41*418b791dSBob Badour #endif 42*418b791dSBob Badour #else /* GNU, Sun and other compilers */ 43*418b791dSBob Badour #define DECLTYPE(x) (__typeof(x)) 44*418b791dSBob Badour #endif 45*418b791dSBob Badour 46*418b791dSBob Badour #ifdef NO_DECLTYPE 47*418b791dSBob Badour #define DECLTYPE_ASSIGN(dst,src) \ 48*418b791dSBob Badour do { \ 49*418b791dSBob Badour char **_da_dst = (char**)(&(dst)); \ 50*418b791dSBob Badour *_da_dst = (char*)(src); \ 51*418b791dSBob Badour } while(0) 52*418b791dSBob Badour #else 53*418b791dSBob Badour #define DECLTYPE_ASSIGN(dst,src) \ 54*418b791dSBob Badour do { \ 55*418b791dSBob Badour (dst) = DECLTYPE(dst)(src); \ 56*418b791dSBob Badour } while(0) 57*418b791dSBob Badour #endif 58*418b791dSBob Badour 59*418b791dSBob Badour /* a number of the hash function use uint32_t which isn't defined on win32 */ 60*418b791dSBob Badour #ifdef _MSC_VER 61*418b791dSBob Badour typedef unsigned int uint32_t; 62*418b791dSBob Badour typedef unsigned char uint8_t; 63*418b791dSBob Badour #else 64*418b791dSBob Badour #include <inttypes.h> /* uint32_t */ 65*418b791dSBob Badour #endif 66*418b791dSBob Badour 67*418b791dSBob Badour #define UTHASH_VERSION 1.9.4 68*418b791dSBob Badour 69*418b791dSBob Badour #ifndef UTHASH_MALLOC 70*418b791dSBob Badour #define UTHASH_MALLOC malloc 71*418b791dSBob Badour #endif 72*418b791dSBob Badour 73*418b791dSBob Badour #ifndef UTHASH_FREE 74*418b791dSBob Badour #define UTHASH_FREE free 75*418b791dSBob Badour #endif 76*418b791dSBob Badour 77*418b791dSBob Badour #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ 78*418b791dSBob Badour #define uthash_malloc(sz) UTHASH_MALLOC(sz) /* malloc fcn */ 79*418b791dSBob Badour #define uthash_free(ptr,sz) UTHASH_FREE(ptr) /* free fcn */ 80*418b791dSBob Badour 81*418b791dSBob Badour #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 82*418b791dSBob Badour #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 83*418b791dSBob Badour 84*418b791dSBob Badour /* initial number of buckets */ 85*418b791dSBob Badour #define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ 86*418b791dSBob Badour #define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ 87*418b791dSBob Badour #define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ 88*418b791dSBob Badour 89*418b791dSBob Badour /* calculate the element whose hash handle address is hhe */ 90*418b791dSBob Badour #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) 91*418b791dSBob Badour 92*418b791dSBob Badour #define HASH_FIND(hh,head,keyptr,keylen,out) \ 93*418b791dSBob Badour do { \ 94*418b791dSBob Badour unsigned _hf_bkt,_hf_hashv; \ 95*418b791dSBob Badour out=NULL; \ 96*418b791dSBob Badour if (head) { \ 97*418b791dSBob Badour HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ 98*418b791dSBob Badour if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ 99*418b791dSBob Badour HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ 100*418b791dSBob Badour keyptr,keylen,out); \ 101*418b791dSBob Badour } \ 102*418b791dSBob Badour } \ 103*418b791dSBob Badour } while (0) 104*418b791dSBob Badour 105*418b791dSBob Badour #ifdef HASH_BLOOM 106*418b791dSBob Badour #define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) 107*418b791dSBob Badour #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) 108*418b791dSBob Badour #define HASH_BLOOM_MAKE(tbl) \ 109*418b791dSBob Badour do { \ 110*418b791dSBob Badour (tbl)->bloom_nbits = HASH_BLOOM; \ 111*418b791dSBob Badour (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 112*418b791dSBob Badour if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ 113*418b791dSBob Badour memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ 114*418b791dSBob Badour (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 115*418b791dSBob Badour } while (0); 116*418b791dSBob Badour 117*418b791dSBob Badour #define HASH_BLOOM_FREE(tbl) \ 118*418b791dSBob Badour do { \ 119*418b791dSBob Badour uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 120*418b791dSBob Badour } while (0); 121*418b791dSBob Badour 122*418b791dSBob Badour #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) 123*418b791dSBob Badour #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) 124*418b791dSBob Badour 125*418b791dSBob Badour #define HASH_BLOOM_ADD(tbl,hashv) \ 126*418b791dSBob Badour HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 127*418b791dSBob Badour 128*418b791dSBob Badour #define HASH_BLOOM_TEST(tbl,hashv) \ 129*418b791dSBob Badour HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 130*418b791dSBob Badour 131*418b791dSBob Badour #else 132*418b791dSBob Badour #define HASH_BLOOM_MAKE(tbl) 133*418b791dSBob Badour #define HASH_BLOOM_FREE(tbl) 134*418b791dSBob Badour #define HASH_BLOOM_ADD(tbl,hashv) 135*418b791dSBob Badour #define HASH_BLOOM_TEST(tbl,hashv) (1) 136*418b791dSBob Badour #endif 137*418b791dSBob Badour 138*418b791dSBob Badour #define HASH_MAKE_TABLE(hh,head) \ 139*418b791dSBob Badour do { \ 140*418b791dSBob Badour (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ 141*418b791dSBob Badour sizeof(UT_hash_table)); \ 142*418b791dSBob Badour if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ 143*418b791dSBob Badour memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ 144*418b791dSBob Badour (head)->hh.tbl->tail = &((head)->hh); \ 145*418b791dSBob Badour (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 146*418b791dSBob Badour (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 147*418b791dSBob Badour (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 148*418b791dSBob Badour (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 149*418b791dSBob Badour HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 150*418b791dSBob Badour if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ 151*418b791dSBob Badour memset((head)->hh.tbl->buckets, 0, \ 152*418b791dSBob Badour HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 153*418b791dSBob Badour HASH_BLOOM_MAKE((head)->hh.tbl); \ 154*418b791dSBob Badour (head)->hh.tbl->signature = HASH_SIGNATURE; \ 155*418b791dSBob Badour } while(0) 156*418b791dSBob Badour 157*418b791dSBob Badour #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 158*418b791dSBob Badour HASH_ADD_KEYPTR(hh,head,&add->fieldname,keylen_in,add) 159*418b791dSBob Badour 160*418b791dSBob Badour #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 161*418b791dSBob Badour do { \ 162*418b791dSBob Badour unsigned _ha_bkt; \ 163*418b791dSBob Badour (add)->hh.next = NULL; \ 164*418b791dSBob Badour (add)->hh.key = (char*)keyptr; \ 165*418b791dSBob Badour (add)->hh.keylen = keylen_in; \ 166*418b791dSBob Badour if (!(head)) { \ 167*418b791dSBob Badour head = (add); \ 168*418b791dSBob Badour (head)->hh.prev = NULL; \ 169*418b791dSBob Badour HASH_MAKE_TABLE(hh,head); \ 170*418b791dSBob Badour } else { \ 171*418b791dSBob Badour (head)->hh.tbl->tail->next = (add); \ 172*418b791dSBob Badour (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 173*418b791dSBob Badour (head)->hh.tbl->tail = &((add)->hh); \ 174*418b791dSBob Badour } \ 175*418b791dSBob Badour (head)->hh.tbl->num_items++; \ 176*418b791dSBob Badour (add)->hh.tbl = (head)->hh.tbl; \ 177*418b791dSBob Badour HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ 178*418b791dSBob Badour (add)->hh.hashv, _ha_bkt); \ 179*418b791dSBob Badour HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ 180*418b791dSBob Badour HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ 181*418b791dSBob Badour HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ 182*418b791dSBob Badour HASH_FSCK(hh,head); \ 183*418b791dSBob Badour } while(0) 184*418b791dSBob Badour 185*418b791dSBob Badour #define HASH_TO_BKT( hashv, num_bkts, bkt ) \ 186*418b791dSBob Badour do { \ 187*418b791dSBob Badour bkt = ((hashv) & ((num_bkts) - 1)); \ 188*418b791dSBob Badour } while(0) 189*418b791dSBob Badour 190*418b791dSBob Badour /* delete "delptr" from the hash table. 191*418b791dSBob Badour * "the usual" patch-up process for the app-order doubly-linked-list. 192*418b791dSBob Badour * The use of _hd_hh_del below deserves special explanation. 193*418b791dSBob Badour * These used to be expressed using (delptr) but that led to a bug 194*418b791dSBob Badour * if someone used the same symbol for the head and deletee, like 195*418b791dSBob Badour * HASH_DELETE(hh,users,users); 196*418b791dSBob Badour * We want that to work, but by changing the head (users) below 197*418b791dSBob Badour * we were forfeiting our ability to further refer to the deletee (users) 198*418b791dSBob Badour * in the patch-up process. Solution: use scratch space to 199*418b791dSBob Badour * copy the deletee pointer, then the latter references are via that 200*418b791dSBob Badour * scratch pointer rather than through the repointed (users) symbol. 201*418b791dSBob Badour */ 202*418b791dSBob Badour #define HASH_DELETE(hh,head,delptr) \ 203*418b791dSBob Badour do { \ 204*418b791dSBob Badour unsigned _hd_bkt; \ 205*418b791dSBob Badour struct UT_hash_handle *_hd_hh_del; \ 206*418b791dSBob Badour if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ 207*418b791dSBob Badour uthash_free((head)->hh.tbl->buckets, \ 208*418b791dSBob Badour (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 209*418b791dSBob Badour HASH_BLOOM_FREE((head)->hh.tbl); \ 210*418b791dSBob Badour uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 211*418b791dSBob Badour head = NULL; \ 212*418b791dSBob Badour } else { \ 213*418b791dSBob Badour _hd_hh_del = &((delptr)->hh); \ 214*418b791dSBob Badour if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ 215*418b791dSBob Badour (head)->hh.tbl->tail = \ 216*418b791dSBob Badour (UT_hash_handle*)((char*)((delptr)->hh.prev) + \ 217*418b791dSBob Badour (head)->hh.tbl->hho); \ 218*418b791dSBob Badour } \ 219*418b791dSBob Badour if ((delptr)->hh.prev) { \ 220*418b791dSBob Badour ((UT_hash_handle*)((char*)((delptr)->hh.prev) + \ 221*418b791dSBob Badour (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ 222*418b791dSBob Badour } else { \ 223*418b791dSBob Badour DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ 224*418b791dSBob Badour } \ 225*418b791dSBob Badour if (_hd_hh_del->next) { \ 226*418b791dSBob Badour ((UT_hash_handle*)((char*)_hd_hh_del->next + \ 227*418b791dSBob Badour (head)->hh.tbl->hho))->prev = \ 228*418b791dSBob Badour _hd_hh_del->prev; \ 229*418b791dSBob Badour } \ 230*418b791dSBob Badour HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 231*418b791dSBob Badour HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 232*418b791dSBob Badour (head)->hh.tbl->num_items--; \ 233*418b791dSBob Badour } \ 234*418b791dSBob Badour HASH_FSCK(hh,head); \ 235*418b791dSBob Badour } while (0) 236*418b791dSBob Badour 237*418b791dSBob Badour /* use this function to delete an item from a table only if its in that table */ 238*418b791dSBob Badour #define HASH_DELETE_IF(hh,group,ptr) \ 239*418b791dSBob Badour do {\ 240*418b791dSBob Badour if(ptr && group && ptr->hh.tbl) { \ 241*418b791dSBob Badour HASH_DELETE(hh,group,ptr); \ 242*418b791dSBob Badour ptr->hh.tbl = 0;\ 243*418b791dSBob Badour } \ 244*418b791dSBob Badour } while(0) 245*418b791dSBob Badour 246*418b791dSBob Badour /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 247*418b791dSBob Badour #define HASH_FIND_STR(head,findstr,out) \ 248*418b791dSBob Badour HASH_FIND(hh,head,findstr,strlen(findstr),out) 249*418b791dSBob Badour #define HASH_ADD_STR(head,strfield,add) \ 250*418b791dSBob Badour HASH_ADD(hh,head,strfield,strlen(add->strfield),add) 251*418b791dSBob Badour #define HASH_FIND_INT(head,findint,out) \ 252*418b791dSBob Badour HASH_FIND(hh,head,findint,sizeof(int),out) 253*418b791dSBob Badour #define HASH_ADD_INT(head,intfield,add) \ 254*418b791dSBob Badour HASH_ADD(hh,head,intfield,sizeof(int),add) 255*418b791dSBob Badour #define HASH_FIND_PTR(head,findptr,out) \ 256*418b791dSBob Badour HASH_FIND(hh,head,findptr,sizeof(void *),out) 257*418b791dSBob Badour #define HASH_ADD_PTR(head,ptrfield,add) \ 258*418b791dSBob Badour HASH_ADD(hh,head,ptrfield,sizeof(void *),add) 259*418b791dSBob Badour #define HASH_DEL(head,delptr) \ 260*418b791dSBob Badour HASH_DELETE(hh,head,delptr) 261*418b791dSBob Badour 262*418b791dSBob Badour /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 263*418b791dSBob Badour * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 264*418b791dSBob Badour */ 265*418b791dSBob Badour #ifdef HASH_DEBUG 266*418b791dSBob Badour #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) 267*418b791dSBob Badour #define HASH_FSCK(hh,head) \ 268*418b791dSBob Badour do { \ 269*418b791dSBob Badour unsigned _bkt_i; \ 270*418b791dSBob Badour unsigned _count, _bkt_count; \ 271*418b791dSBob Badour char *_prev; \ 272*418b791dSBob Badour struct UT_hash_handle *_thh; \ 273*418b791dSBob Badour if (head) { \ 274*418b791dSBob Badour _count = 0; \ 275*418b791dSBob Badour for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ 276*418b791dSBob Badour _bkt_count = 0; \ 277*418b791dSBob Badour _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 278*418b791dSBob Badour _prev = NULL; \ 279*418b791dSBob Badour while (_thh) { \ 280*418b791dSBob Badour if (_prev != (char*)(_thh->hh_prev)) { \ 281*418b791dSBob Badour HASH_OOPS("invalid hh_prev %p, actual %p\n", \ 282*418b791dSBob Badour _thh->hh_prev, _prev ); \ 283*418b791dSBob Badour } \ 284*418b791dSBob Badour _bkt_count++; \ 285*418b791dSBob Badour _prev = (char*)(_thh); \ 286*418b791dSBob Badour _thh = _thh->hh_next; \ 287*418b791dSBob Badour } \ 288*418b791dSBob Badour _count += _bkt_count; \ 289*418b791dSBob Badour if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 290*418b791dSBob Badour HASH_OOPS("invalid bucket count %d, actual %d\n", \ 291*418b791dSBob Badour (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 292*418b791dSBob Badour } \ 293*418b791dSBob Badour } \ 294*418b791dSBob Badour if (_count != (head)->hh.tbl->num_items) { \ 295*418b791dSBob Badour HASH_OOPS("invalid hh item count %d, actual %d\n", \ 296*418b791dSBob Badour (head)->hh.tbl->num_items, _count ); \ 297*418b791dSBob Badour } \ 298*418b791dSBob Badour /* traverse hh in app order; check next/prev integrity, count */ \ 299*418b791dSBob Badour _count = 0; \ 300*418b791dSBob Badour _prev = NULL; \ 301*418b791dSBob Badour _thh = &(head)->hh; \ 302*418b791dSBob Badour while (_thh) { \ 303*418b791dSBob Badour _count++; \ 304*418b791dSBob Badour if (_prev !=(char*)(_thh->prev)) { \ 305*418b791dSBob Badour HASH_OOPS("invalid prev %p, actual %p\n", \ 306*418b791dSBob Badour _thh->prev, _prev ); \ 307*418b791dSBob Badour } \ 308*418b791dSBob Badour _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 309*418b791dSBob Badour _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ 310*418b791dSBob Badour (head)->hh.tbl->hho) : NULL ); \ 311*418b791dSBob Badour } \ 312*418b791dSBob Badour if (_count != (head)->hh.tbl->num_items) { \ 313*418b791dSBob Badour HASH_OOPS("invalid app item count %d, actual %d\n", \ 314*418b791dSBob Badour (head)->hh.tbl->num_items, _count ); \ 315*418b791dSBob Badour } \ 316*418b791dSBob Badour } \ 317*418b791dSBob Badour } while (0) 318*418b791dSBob Badour #else 319*418b791dSBob Badour #define HASH_FSCK(hh,head) 320*418b791dSBob Badour #endif 321*418b791dSBob Badour 322*418b791dSBob Badour /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 323*418b791dSBob Badour * the descriptor to which this macro is defined for tuning the hash function. 324*418b791dSBob Badour * The app can #include <unistd.h> to get the prototype for write(2). */ 325*418b791dSBob Badour #ifdef HASH_EMIT_KEYS 326*418b791dSBob Badour #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 327*418b791dSBob Badour do { \ 328*418b791dSBob Badour unsigned _klen = fieldlen; \ 329*418b791dSBob Badour write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 330*418b791dSBob Badour write(HASH_EMIT_KEYS, keyptr, fieldlen); \ 331*418b791dSBob Badour } while (0) 332*418b791dSBob Badour #else 333*418b791dSBob Badour #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 334*418b791dSBob Badour #endif 335*418b791dSBob Badour 336*418b791dSBob Badour /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ 337*418b791dSBob Badour #ifdef HASH_FUNCTION 338*418b791dSBob Badour #define HASH_FCN HASH_FUNCTION 339*418b791dSBob Badour #else 340*418b791dSBob Badour #define HASH_FCN HASH_JEN 341*418b791dSBob Badour #endif 342*418b791dSBob Badour 343*418b791dSBob Badour /* The Bernstein hash function, used in Perl prior to v5.6 */ 344*418b791dSBob Badour #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ 345*418b791dSBob Badour do { \ 346*418b791dSBob Badour unsigned _hb_keylen=keylen; \ 347*418b791dSBob Badour char *_hb_key=(char*)(key); \ 348*418b791dSBob Badour (hashv) = 0; \ 349*418b791dSBob Badour while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ 350*418b791dSBob Badour bkt = (hashv) & (num_bkts-1); \ 351*418b791dSBob Badour } while (0) 352*418b791dSBob Badour 353*418b791dSBob Badour 354*418b791dSBob Badour /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 355*418b791dSBob Badour * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ 356*418b791dSBob Badour #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ 357*418b791dSBob Badour do { \ 358*418b791dSBob Badour unsigned _sx_i; \ 359*418b791dSBob Badour char *_hs_key=(char*)(key); \ 360*418b791dSBob Badour hashv = 0; \ 361*418b791dSBob Badour for(_sx_i=0; _sx_i < keylen; _sx_i++) \ 362*418b791dSBob Badour hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 363*418b791dSBob Badour bkt = hashv & (num_bkts-1); \ 364*418b791dSBob Badour } while (0) 365*418b791dSBob Badour 366*418b791dSBob Badour #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ 367*418b791dSBob Badour do { \ 368*418b791dSBob Badour unsigned _fn_i; \ 369*418b791dSBob Badour char *_hf_key=(char*)(key); \ 370*418b791dSBob Badour hashv = 2166136261UL; \ 371*418b791dSBob Badour for(_fn_i=0; _fn_i < keylen; _fn_i++) \ 372*418b791dSBob Badour hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ 373*418b791dSBob Badour bkt = hashv & (num_bkts-1); \ 374*418b791dSBob Badour } while(0); 375*418b791dSBob Badour 376*418b791dSBob Badour #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ 377*418b791dSBob Badour do { \ 378*418b791dSBob Badour unsigned _ho_i; \ 379*418b791dSBob Badour char *_ho_key=(char*)(key); \ 380*418b791dSBob Badour hashv = 0; \ 381*418b791dSBob Badour for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 382*418b791dSBob Badour hashv += _ho_key[_ho_i]; \ 383*418b791dSBob Badour hashv += (hashv << 10); \ 384*418b791dSBob Badour hashv ^= (hashv >> 6); \ 385*418b791dSBob Badour } \ 386*418b791dSBob Badour hashv += (hashv << 3); \ 387*418b791dSBob Badour hashv ^= (hashv >> 11); \ 388*418b791dSBob Badour hashv += (hashv << 15); \ 389*418b791dSBob Badour bkt = hashv & (num_bkts-1); \ 390*418b791dSBob Badour } while(0) 391*418b791dSBob Badour 392*418b791dSBob Badour #define HASH_JEN_MIX(a,b,c) \ 393*418b791dSBob Badour do { \ 394*418b791dSBob Badour a -= b; a -= c; a ^= ( c >> 13 ); \ 395*418b791dSBob Badour b -= c; b -= a; b ^= ( a << 8 ); \ 396*418b791dSBob Badour c -= a; c -= b; c ^= ( b >> 13 ); \ 397*418b791dSBob Badour a -= b; a -= c; a ^= ( c >> 12 ); \ 398*418b791dSBob Badour b -= c; b -= a; b ^= ( a << 16 ); \ 399*418b791dSBob Badour c -= a; c -= b; c ^= ( b >> 5 ); \ 400*418b791dSBob Badour a -= b; a -= c; a ^= ( c >> 3 ); \ 401*418b791dSBob Badour b -= c; b -= a; b ^= ( a << 10 ); \ 402*418b791dSBob Badour c -= a; c -= b; c ^= ( b >> 15 ); \ 403*418b791dSBob Badour } while (0) 404*418b791dSBob Badour 405*418b791dSBob Badour #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ 406*418b791dSBob Badour do { \ 407*418b791dSBob Badour unsigned _hj_i,_hj_j,_hj_k; \ 408*418b791dSBob Badour char *_hj_key=(char*)(key); \ 409*418b791dSBob Badour hashv = 0xfeedbeef; \ 410*418b791dSBob Badour _hj_i = _hj_j = 0x9e3779b9; \ 411*418b791dSBob Badour _hj_k = keylen; \ 412*418b791dSBob Badour while (_hj_k >= 12) { \ 413*418b791dSBob Badour _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 414*418b791dSBob Badour + ( (unsigned)_hj_key[2] << 16 ) \ 415*418b791dSBob Badour + ( (unsigned)_hj_key[3] << 24 ) ); \ 416*418b791dSBob Badour _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 417*418b791dSBob Badour + ( (unsigned)_hj_key[6] << 16 ) \ 418*418b791dSBob Badour + ( (unsigned)_hj_key[7] << 24 ) ); \ 419*418b791dSBob Badour hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 420*418b791dSBob Badour + ( (unsigned)_hj_key[10] << 16 ) \ 421*418b791dSBob Badour + ( (unsigned)_hj_key[11] << 24 ) ); \ 422*418b791dSBob Badour \ 423*418b791dSBob Badour HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 424*418b791dSBob Badour \ 425*418b791dSBob Badour _hj_key += 12; \ 426*418b791dSBob Badour _hj_k -= 12; \ 427*418b791dSBob Badour } \ 428*418b791dSBob Badour hashv += keylen; \ 429*418b791dSBob Badour switch ( _hj_k ) { \ 430*418b791dSBob Badour case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ 431*418b791dSBob Badour case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ 432*418b791dSBob Badour case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ 433*418b791dSBob Badour case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ 434*418b791dSBob Badour case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ 435*418b791dSBob Badour case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ 436*418b791dSBob Badour case 5: _hj_j += _hj_key[4]; \ 437*418b791dSBob Badour case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ 438*418b791dSBob Badour case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ 439*418b791dSBob Badour case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ 440*418b791dSBob Badour case 1: _hj_i += _hj_key[0]; \ 441*418b791dSBob Badour } \ 442*418b791dSBob Badour HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 443*418b791dSBob Badour bkt = hashv & (num_bkts-1); \ 444*418b791dSBob Badour } while(0) 445*418b791dSBob Badour 446*418b791dSBob Badour /* The Paul Hsieh hash function */ 447*418b791dSBob Badour #undef get16bits 448*418b791dSBob Badour #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 449*418b791dSBob Badour || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 450*418b791dSBob Badour #define get16bits(d) (*((const uint16_t *) (d))) 451*418b791dSBob Badour #endif 452*418b791dSBob Badour 453*418b791dSBob Badour #if !defined (get16bits) 454*418b791dSBob Badour #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 455*418b791dSBob Badour +(uint32_t)(((const uint8_t *)(d))[0]) ) 456*418b791dSBob Badour #endif 457*418b791dSBob Badour #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ 458*418b791dSBob Badour do { \ 459*418b791dSBob Badour char *_sfh_key=(char*)(key); \ 460*418b791dSBob Badour uint32_t _sfh_tmp, _sfh_len = keylen; \ 461*418b791dSBob Badour \ 462*418b791dSBob Badour int _sfh_rem = _sfh_len & 3; \ 463*418b791dSBob Badour _sfh_len >>= 2; \ 464*418b791dSBob Badour hashv = 0xcafebabe; \ 465*418b791dSBob Badour \ 466*418b791dSBob Badour /* Main loop */ \ 467*418b791dSBob Badour for (;_sfh_len > 0; _sfh_len--) { \ 468*418b791dSBob Badour hashv += get16bits (_sfh_key); \ 469*418b791dSBob Badour _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ 470*418b791dSBob Badour hashv = (hashv << 16) ^ _sfh_tmp; \ 471*418b791dSBob Badour _sfh_key += 2*sizeof (uint16_t); \ 472*418b791dSBob Badour hashv += hashv >> 11; \ 473*418b791dSBob Badour } \ 474*418b791dSBob Badour \ 475*418b791dSBob Badour /* Handle end cases */ \ 476*418b791dSBob Badour switch (_sfh_rem) { \ 477*418b791dSBob Badour case 3: hashv += get16bits (_sfh_key); \ 478*418b791dSBob Badour hashv ^= hashv << 16; \ 479*418b791dSBob Badour hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ 480*418b791dSBob Badour hashv += hashv >> 11; \ 481*418b791dSBob Badour break; \ 482*418b791dSBob Badour case 2: hashv += get16bits (_sfh_key); \ 483*418b791dSBob Badour hashv ^= hashv << 11; \ 484*418b791dSBob Badour hashv += hashv >> 17; \ 485*418b791dSBob Badour break; \ 486*418b791dSBob Badour case 1: hashv += *_sfh_key; \ 487*418b791dSBob Badour hashv ^= hashv << 10; \ 488*418b791dSBob Badour hashv += hashv >> 1; \ 489*418b791dSBob Badour } \ 490*418b791dSBob Badour \ 491*418b791dSBob Badour /* Force "avalanching" of final 127 bits */ \ 492*418b791dSBob Badour hashv ^= hashv << 3; \ 493*418b791dSBob Badour hashv += hashv >> 5; \ 494*418b791dSBob Badour hashv ^= hashv << 4; \ 495*418b791dSBob Badour hashv += hashv >> 17; \ 496*418b791dSBob Badour hashv ^= hashv << 25; \ 497*418b791dSBob Badour hashv += hashv >> 6; \ 498*418b791dSBob Badour bkt = hashv & (num_bkts-1); \ 499*418b791dSBob Badour } while(0); 500*418b791dSBob Badour 501*418b791dSBob Badour #ifdef HASH_USING_NO_STRICT_ALIASING 502*418b791dSBob Badour /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. 503*418b791dSBob Badour * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. 504*418b791dSBob Badour * MurmurHash uses the faster approach only on CPU's where we know it's safe. 505*418b791dSBob Badour * 506*418b791dSBob Badour * Note the preprocessor built-in defines can be emitted using: 507*418b791dSBob Badour * 508*418b791dSBob Badour * gcc -m64 -dM -E - < /dev/null (on gcc) 509*418b791dSBob Badour * cc -## a.c (where a.c is a simple test file) (Sun Studio) 510*418b791dSBob Badour */ 511*418b791dSBob Badour #if (defined(__i386__) || defined(__x86_64__)) 512*418b791dSBob Badour #define MUR_GETBLOCK(p,i) p[i] 513*418b791dSBob Badour #else /* non intel */ 514*418b791dSBob Badour #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0) 515*418b791dSBob Badour #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1) 516*418b791dSBob Badour #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2) 517*418b791dSBob Badour #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3) 518*418b791dSBob Badour #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) 519*418b791dSBob Badour #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) 520*418b791dSBob Badour #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) 521*418b791dSBob Badour #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) 522*418b791dSBob Badour #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) 523*418b791dSBob Badour #else /* assume little endian non-intel */ 524*418b791dSBob Badour #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) 525*418b791dSBob Badour #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) 526*418b791dSBob Badour #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) 527*418b791dSBob Badour #endif 528*418b791dSBob Badour #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ 529*418b791dSBob Badour (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ 530*418b791dSBob Badour (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ 531*418b791dSBob Badour MUR_ONE_THREE(p)))) 532*418b791dSBob Badour #endif 533*418b791dSBob Badour #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) 534*418b791dSBob Badour #define MUR_FMIX(_h) \ 535*418b791dSBob Badour do { \ 536*418b791dSBob Badour _h ^= _h >> 16; \ 537*418b791dSBob Badour _h *= 0x85ebca6b; \ 538*418b791dSBob Badour _h ^= _h >> 13; \ 539*418b791dSBob Badour _h *= 0xc2b2ae35l; \ 540*418b791dSBob Badour _h ^= _h >> 16; \ 541*418b791dSBob Badour } while(0) 542*418b791dSBob Badour 543*418b791dSBob Badour #define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ 544*418b791dSBob Badour do { \ 545*418b791dSBob Badour const uint8_t *_mur_data = (const uint8_t*)(key); \ 546*418b791dSBob Badour const int _mur_nblocks = (keylen) / 4; \ 547*418b791dSBob Badour uint32_t _mur_h1 = 0xf88D5353; \ 548*418b791dSBob Badour uint32_t _mur_c1 = 0xcc9e2d51; \ 549*418b791dSBob Badour uint32_t _mur_c2 = 0x1b873593; \ 550*418b791dSBob Badour const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \ 551*418b791dSBob Badour int _mur_i; \ 552*418b791dSBob Badour for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \ 553*418b791dSBob Badour uint32_t _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ 554*418b791dSBob Badour _mur_k1 *= _mur_c1; \ 555*418b791dSBob Badour _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 556*418b791dSBob Badour _mur_k1 *= _mur_c2; \ 557*418b791dSBob Badour \ 558*418b791dSBob Badour _mur_h1 ^= _mur_k1; \ 559*418b791dSBob Badour _mur_h1 = MUR_ROTL32(_mur_h1,13); \ 560*418b791dSBob Badour _mur_h1 = _mur_h1*5+0xe6546b64; \ 561*418b791dSBob Badour } \ 562*418b791dSBob Badour const uint8_t *_mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \ 563*418b791dSBob Badour uint32_t _mur_k1=0; \ 564*418b791dSBob Badour switch((keylen) & 3) { \ 565*418b791dSBob Badour case 3: _mur_k1 ^= _mur_tail[2] << 16; \ 566*418b791dSBob Badour case 2: _mur_k1 ^= _mur_tail[1] << 8; \ 567*418b791dSBob Badour case 1: _mur_k1 ^= _mur_tail[0]; \ 568*418b791dSBob Badour _mur_k1 *= _mur_c1; \ 569*418b791dSBob Badour _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 570*418b791dSBob Badour _mur_k1 *= _mur_c2; \ 571*418b791dSBob Badour _mur_h1 ^= _mur_k1; \ 572*418b791dSBob Badour } \ 573*418b791dSBob Badour _mur_h1 ^= (keylen); \ 574*418b791dSBob Badour MUR_FMIX(_mur_h1); \ 575*418b791dSBob Badour hashv = _mur_h1; \ 576*418b791dSBob Badour bkt = hashv & (num_bkts-1); \ 577*418b791dSBob Badour } while(0) 578*418b791dSBob Badour #endif /* HASH_USING_NO_STRICT_ALIASING */ 579*418b791dSBob Badour 580*418b791dSBob Badour /* key comparison function; return 0 if keys equal */ 581*418b791dSBob Badour #define HASH_KEYCMP(a,b,len) memcmp(a,b,len) 582*418b791dSBob Badour 583*418b791dSBob Badour /* iterate over items in a known bucket to find desired item */ 584*418b791dSBob Badour #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ 585*418b791dSBob Badour do { \ 586*418b791dSBob Badour if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ 587*418b791dSBob Badour else out=NULL; \ 588*418b791dSBob Badour while (out) { \ 589*418b791dSBob Badour if (out->hh.keylen == keylen_in) { \ 590*418b791dSBob Badour if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break; \ 591*418b791dSBob Badour } \ 592*418b791dSBob Badour if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \ 593*418b791dSBob Badour else out = NULL; \ 594*418b791dSBob Badour } \ 595*418b791dSBob Badour } while(0) 596*418b791dSBob Badour 597*418b791dSBob Badour /* add an item to a bucket */ 598*418b791dSBob Badour #define HASH_ADD_TO_BKT(head,addhh) \ 599*418b791dSBob Badour do { \ 600*418b791dSBob Badour head.count++; \ 601*418b791dSBob Badour (addhh)->hh_next = head.hh_head; \ 602*418b791dSBob Badour (addhh)->hh_prev = NULL; \ 603*418b791dSBob Badour if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ 604*418b791dSBob Badour (head).hh_head=addhh; \ 605*418b791dSBob Badour if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ 606*418b791dSBob Badour && (addhh)->tbl->noexpand != 1) { \ 607*418b791dSBob Badour HASH_EXPAND_BUCKETS((addhh)->tbl); \ 608*418b791dSBob Badour } \ 609*418b791dSBob Badour } while(0) 610*418b791dSBob Badour 611*418b791dSBob Badour /* remove an item from a given bucket */ 612*418b791dSBob Badour #define HASH_DEL_IN_BKT(hh,head,hh_del) \ 613*418b791dSBob Badour (head).count--; \ 614*418b791dSBob Badour if ((head).hh_head == hh_del) { \ 615*418b791dSBob Badour (head).hh_head = hh_del->hh_next; \ 616*418b791dSBob Badour } \ 617*418b791dSBob Badour if (hh_del->hh_prev) { \ 618*418b791dSBob Badour hh_del->hh_prev->hh_next = hh_del->hh_next; \ 619*418b791dSBob Badour } \ 620*418b791dSBob Badour if (hh_del->hh_next) { \ 621*418b791dSBob Badour hh_del->hh_next->hh_prev = hh_del->hh_prev; \ 622*418b791dSBob Badour } 623*418b791dSBob Badour 624*418b791dSBob Badour /* Bucket expansion has the effect of doubling the number of buckets 625*418b791dSBob Badour * and redistributing the items into the new buckets. Ideally the 626*418b791dSBob Badour * items will distribute more or less evenly into the new buckets 627*418b791dSBob Badour * (the extent to which this is true is a measure of the quality of 628*418b791dSBob Badour * the hash function as it applies to the key domain). 629*418b791dSBob Badour * 630*418b791dSBob Badour * With the items distributed into more buckets, the chain length 631*418b791dSBob Badour * (item count) in each bucket is reduced. Thus by expanding buckets 632*418b791dSBob Badour * the hash keeps a bound on the chain length. This bounded chain 633*418b791dSBob Badour * length is the essence of how a hash provides constant time lookup. 634*418b791dSBob Badour * 635*418b791dSBob Badour * The calculation of tbl->ideal_chain_maxlen below deserves some 636*418b791dSBob Badour * explanation. First, keep in mind that we're calculating the ideal 637*418b791dSBob Badour * maximum chain length based on the *new* (doubled) bucket count. 638*418b791dSBob Badour * In fractions this is just n/b (n=number of items,b=new num buckets). 639*418b791dSBob Badour * Since the ideal chain length is an integer, we want to calculate 640*418b791dSBob Badour * ceil(n/b). We don't depend on floating point arithmetic in this 641*418b791dSBob Badour * hash, so to calculate ceil(n/b) with integers we could write 642*418b791dSBob Badour * 643*418b791dSBob Badour * ceil(n/b) = (n/b) + ((n%b)?1:0) 644*418b791dSBob Badour * 645*418b791dSBob Badour * and in fact a previous version of this hash did just that. 646*418b791dSBob Badour * But now we have improved things a bit by recognizing that b is 647*418b791dSBob Badour * always a power of two. We keep its base 2 log handy (call it lb), 648*418b791dSBob Badour * so now we can write this with a bit shift and logical AND: 649*418b791dSBob Badour * 650*418b791dSBob Badour * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 651*418b791dSBob Badour * 652*418b791dSBob Badour */ 653*418b791dSBob Badour #define HASH_EXPAND_BUCKETS(tbl) \ 654*418b791dSBob Badour do { \ 655*418b791dSBob Badour unsigned _he_bkt; \ 656*418b791dSBob Badour unsigned _he_bkt_i; \ 657*418b791dSBob Badour struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 658*418b791dSBob Badour UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 659*418b791dSBob Badour _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 660*418b791dSBob Badour 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 661*418b791dSBob Badour if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ 662*418b791dSBob Badour memset(_he_new_buckets, 0, \ 663*418b791dSBob Badour 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 664*418b791dSBob Badour tbl->ideal_chain_maxlen = \ 665*418b791dSBob Badour (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ 666*418b791dSBob Badour ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ 667*418b791dSBob Badour tbl->nonideal_items = 0; \ 668*418b791dSBob Badour for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ 669*418b791dSBob Badour { \ 670*418b791dSBob Badour _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ 671*418b791dSBob Badour while (_he_thh) { \ 672*418b791dSBob Badour _he_hh_nxt = _he_thh->hh_next; \ 673*418b791dSBob Badour HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ 674*418b791dSBob Badour _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ 675*418b791dSBob Badour if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ 676*418b791dSBob Badour tbl->nonideal_items++; \ 677*418b791dSBob Badour _he_newbkt->expand_mult = _he_newbkt->count / \ 678*418b791dSBob Badour tbl->ideal_chain_maxlen; \ 679*418b791dSBob Badour } \ 680*418b791dSBob Badour _he_thh->hh_prev = NULL; \ 681*418b791dSBob Badour _he_thh->hh_next = _he_newbkt->hh_head; \ 682*418b791dSBob Badour if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ 683*418b791dSBob Badour _he_thh; \ 684*418b791dSBob Badour _he_newbkt->hh_head = _he_thh; \ 685*418b791dSBob Badour _he_thh = _he_hh_nxt; \ 686*418b791dSBob Badour } \ 687*418b791dSBob Badour } \ 688*418b791dSBob Badour uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 689*418b791dSBob Badour tbl->num_buckets *= 2; \ 690*418b791dSBob Badour tbl->log2_num_buckets++; \ 691*418b791dSBob Badour tbl->buckets = _he_new_buckets; \ 692*418b791dSBob Badour tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ 693*418b791dSBob Badour (tbl->ineff_expands+1) : 0; \ 694*418b791dSBob Badour if (tbl->ineff_expands > 1) { \ 695*418b791dSBob Badour tbl->noexpand=1; \ 696*418b791dSBob Badour uthash_noexpand_fyi(tbl); \ 697*418b791dSBob Badour } \ 698*418b791dSBob Badour uthash_expand_fyi(tbl); \ 699*418b791dSBob Badour } while(0) 700*418b791dSBob Badour 701*418b791dSBob Badour 702*418b791dSBob Badour /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 703*418b791dSBob Badour /* Note that HASH_SORT assumes the hash handle name to be hh. 704*418b791dSBob Badour * HASH_SRT was added to allow the hash handle name to be passed in. */ 705*418b791dSBob Badour #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 706*418b791dSBob Badour #define HASH_SRT(hh,head,cmpfcn) \ 707*418b791dSBob Badour do { \ 708*418b791dSBob Badour unsigned _hs_i; \ 709*418b791dSBob Badour unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 710*418b791dSBob Badour struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 711*418b791dSBob Badour if (head) { \ 712*418b791dSBob Badour _hs_insize = 1; \ 713*418b791dSBob Badour _hs_looping = 1; \ 714*418b791dSBob Badour _hs_list = &((head)->hh); \ 715*418b791dSBob Badour while (_hs_looping) { \ 716*418b791dSBob Badour _hs_p = _hs_list; \ 717*418b791dSBob Badour _hs_list = NULL; \ 718*418b791dSBob Badour _hs_tail = NULL; \ 719*418b791dSBob Badour _hs_nmerges = 0; \ 720*418b791dSBob Badour while (_hs_p) { \ 721*418b791dSBob Badour _hs_nmerges++; \ 722*418b791dSBob Badour _hs_q = _hs_p; \ 723*418b791dSBob Badour _hs_psize = 0; \ 724*418b791dSBob Badour for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ 725*418b791dSBob Badour _hs_psize++; \ 726*418b791dSBob Badour _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 727*418b791dSBob Badour ((void*)((char*)(_hs_q->next) + \ 728*418b791dSBob Badour (head)->hh.tbl->hho)) : NULL); \ 729*418b791dSBob Badour if (! (_hs_q) ) break; \ 730*418b791dSBob Badour } \ 731*418b791dSBob Badour _hs_qsize = _hs_insize; \ 732*418b791dSBob Badour while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ 733*418b791dSBob Badour if (_hs_psize == 0) { \ 734*418b791dSBob Badour _hs_e = _hs_q; \ 735*418b791dSBob Badour _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 736*418b791dSBob Badour ((void*)((char*)(_hs_q->next) + \ 737*418b791dSBob Badour (head)->hh.tbl->hho)) : NULL); \ 738*418b791dSBob Badour _hs_qsize--; \ 739*418b791dSBob Badour } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ 740*418b791dSBob Badour _hs_e = _hs_p; \ 741*418b791dSBob Badour _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 742*418b791dSBob Badour ((void*)((char*)(_hs_p->next) + \ 743*418b791dSBob Badour (head)->hh.tbl->hho)) : NULL); \ 744*418b791dSBob Badour _hs_psize--; \ 745*418b791dSBob Badour } else if (( \ 746*418b791dSBob Badour cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ 747*418b791dSBob Badour DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ 748*418b791dSBob Badour ) <= 0) { \ 749*418b791dSBob Badour _hs_e = _hs_p; \ 750*418b791dSBob Badour _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 751*418b791dSBob Badour ((void*)((char*)(_hs_p->next) + \ 752*418b791dSBob Badour (head)->hh.tbl->hho)) : NULL); \ 753*418b791dSBob Badour _hs_psize--; \ 754*418b791dSBob Badour } else { \ 755*418b791dSBob Badour _hs_e = _hs_q; \ 756*418b791dSBob Badour _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 757*418b791dSBob Badour ((void*)((char*)(_hs_q->next) + \ 758*418b791dSBob Badour (head)->hh.tbl->hho)) : NULL); \ 759*418b791dSBob Badour _hs_qsize--; \ 760*418b791dSBob Badour } \ 761*418b791dSBob Badour if ( _hs_tail ) { \ 762*418b791dSBob Badour _hs_tail->next = ((_hs_e) ? \ 763*418b791dSBob Badour ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ 764*418b791dSBob Badour } else { \ 765*418b791dSBob Badour _hs_list = _hs_e; \ 766*418b791dSBob Badour } \ 767*418b791dSBob Badour _hs_e->prev = ((_hs_tail) ? \ 768*418b791dSBob Badour ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ 769*418b791dSBob Badour _hs_tail = _hs_e; \ 770*418b791dSBob Badour } \ 771*418b791dSBob Badour _hs_p = _hs_q; \ 772*418b791dSBob Badour } \ 773*418b791dSBob Badour _hs_tail->next = NULL; \ 774*418b791dSBob Badour if ( _hs_nmerges <= 1 ) { \ 775*418b791dSBob Badour _hs_looping=0; \ 776*418b791dSBob Badour (head)->hh.tbl->tail = _hs_tail; \ 777*418b791dSBob Badour DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ 778*418b791dSBob Badour } \ 779*418b791dSBob Badour _hs_insize *= 2; \ 780*418b791dSBob Badour } \ 781*418b791dSBob Badour HASH_FSCK(hh,head); \ 782*418b791dSBob Badour } \ 783*418b791dSBob Badour } while (0) 784*418b791dSBob Badour 785*418b791dSBob Badour /* This function selects items from one hash into another hash. 786*418b791dSBob Badour * The end result is that the selected items have dual presence 787*418b791dSBob Badour * in both hashes. There is no copy of the items made; rather 788*418b791dSBob Badour * they are added into the new hash through a secondary hash 789*418b791dSBob Badour * hash handle that must be present in the structure. */ 790*418b791dSBob Badour #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 791*418b791dSBob Badour do { \ 792*418b791dSBob Badour unsigned _src_bkt, _dst_bkt; \ 793*418b791dSBob Badour void *_last_elt=NULL, *_elt; \ 794*418b791dSBob Badour UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 795*418b791dSBob Badour ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 796*418b791dSBob Badour if (src) { \ 797*418b791dSBob Badour for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 798*418b791dSBob Badour for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 799*418b791dSBob Badour _src_hh; \ 800*418b791dSBob Badour _src_hh = _src_hh->hh_next) { \ 801*418b791dSBob Badour _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 802*418b791dSBob Badour if (cond(_elt)) { \ 803*418b791dSBob Badour _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ 804*418b791dSBob Badour _dst_hh->key = _src_hh->key; \ 805*418b791dSBob Badour _dst_hh->keylen = _src_hh->keylen; \ 806*418b791dSBob Badour _dst_hh->hashv = _src_hh->hashv; \ 807*418b791dSBob Badour _dst_hh->prev = _last_elt; \ 808*418b791dSBob Badour _dst_hh->next = NULL; \ 809*418b791dSBob Badour if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ 810*418b791dSBob Badour if (!dst) { \ 811*418b791dSBob Badour DECLTYPE_ASSIGN(dst,_elt); \ 812*418b791dSBob Badour HASH_MAKE_TABLE(hh_dst,dst); \ 813*418b791dSBob Badour } else { \ 814*418b791dSBob Badour _dst_hh->tbl = (dst)->hh_dst.tbl; \ 815*418b791dSBob Badour } \ 816*418b791dSBob Badour HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 817*418b791dSBob Badour HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ 818*418b791dSBob Badour (dst)->hh_dst.tbl->num_items++; \ 819*418b791dSBob Badour _last_elt = _elt; \ 820*418b791dSBob Badour _last_elt_hh = _dst_hh; \ 821*418b791dSBob Badour } \ 822*418b791dSBob Badour } \ 823*418b791dSBob Badour } \ 824*418b791dSBob Badour } \ 825*418b791dSBob Badour HASH_FSCK(hh_dst,dst); \ 826*418b791dSBob Badour } while (0) 827*418b791dSBob Badour 828*418b791dSBob Badour #define HASH_CLEAR(hh,head) \ 829*418b791dSBob Badour do { \ 830*418b791dSBob Badour if (head) { \ 831*418b791dSBob Badour uthash_free((head)->hh.tbl->buckets, \ 832*418b791dSBob Badour (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ 833*418b791dSBob Badour uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 834*418b791dSBob Badour (head)=NULL; \ 835*418b791dSBob Badour } \ 836*418b791dSBob Badour } while(0) 837*418b791dSBob Badour 838*418b791dSBob Badour #ifdef NO_DECLTYPE 839*418b791dSBob Badour #define HASH_ITER(hh,head,el,tmp) \ 840*418b791dSBob Badour for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ 841*418b791dSBob Badour el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) 842*418b791dSBob Badour #else 843*418b791dSBob Badour #define HASH_ITER(hh,head,el,tmp) \ 844*418b791dSBob Badour for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ 845*418b791dSBob Badour el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) 846*418b791dSBob Badour #endif 847*418b791dSBob Badour 848*418b791dSBob Badour /* obtain a count of items in the hash */ 849*418b791dSBob Badour #define HASH_COUNT(head) HASH_CNT(hh,head) 850*418b791dSBob Badour #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) 851*418b791dSBob Badour 852*418b791dSBob Badour typedef struct UT_hash_bucket { 853*418b791dSBob Badour struct UT_hash_handle *hh_head; 854*418b791dSBob Badour unsigned count; 855*418b791dSBob Badour 856*418b791dSBob Badour /* expand_mult is normally set to 0. In this situation, the max chain length 857*418b791dSBob Badour * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 858*418b791dSBob Badour * the bucket's chain exceeds this length, bucket expansion is triggered). 859*418b791dSBob Badour * However, setting expand_mult to a non-zero value delays bucket expansion 860*418b791dSBob Badour * (that would be triggered by additions to this particular bucket) 861*418b791dSBob Badour * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 862*418b791dSBob Badour * (The multiplier is simply expand_mult+1). The whole idea of this 863*418b791dSBob Badour * multiplier is to reduce bucket expansions, since they are expensive, in 864*418b791dSBob Badour * situations where we know that a particular bucket tends to be overused. 865*418b791dSBob Badour * It is better to let its chain length grow to a longer yet-still-bounded 866*418b791dSBob Badour * value, than to do an O(n) bucket expansion too often. 867*418b791dSBob Badour */ 868*418b791dSBob Badour unsigned expand_mult; 869*418b791dSBob Badour 870*418b791dSBob Badour } UT_hash_bucket; 871*418b791dSBob Badour 872*418b791dSBob Badour /* random signature used only to find hash tables in external analysis */ 873*418b791dSBob Badour #define HASH_SIGNATURE 0xa0111fe1 874*418b791dSBob Badour #define HASH_BLOOM_SIGNATURE 0xb12220f2 875*418b791dSBob Badour 876*418b791dSBob Badour typedef struct UT_hash_table { 877*418b791dSBob Badour UT_hash_bucket *buckets; 878*418b791dSBob Badour unsigned num_buckets, log2_num_buckets; 879*418b791dSBob Badour unsigned num_items; 880*418b791dSBob Badour struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 881*418b791dSBob Badour ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 882*418b791dSBob Badour 883*418b791dSBob Badour /* in an ideal situation (all buckets used equally), no bucket would have 884*418b791dSBob Badour * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 885*418b791dSBob Badour unsigned ideal_chain_maxlen; 886*418b791dSBob Badour 887*418b791dSBob Badour /* nonideal_items is the number of items in the hash whose chain position 888*418b791dSBob Badour * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 889*418b791dSBob Badour * hash distribution; reaching them in a chain traversal takes >ideal steps */ 890*418b791dSBob Badour unsigned nonideal_items; 891*418b791dSBob Badour 892*418b791dSBob Badour /* ineffective expands occur when a bucket doubling was performed, but 893*418b791dSBob Badour * afterward, more than half the items in the hash had nonideal chain 894*418b791dSBob Badour * positions. If this happens on two consecutive expansions we inhibit any 895*418b791dSBob Badour * further expansion, as it's not helping; this happens when the hash 896*418b791dSBob Badour * function isn't a good fit for the key domain. When expansion is inhibited 897*418b791dSBob Badour * the hash will still work, albeit no longer in constant time. */ 898*418b791dSBob Badour unsigned ineff_expands, noexpand; 899*418b791dSBob Badour 900*418b791dSBob Badour uint32_t signature; /* used only to find hash tables in external analysis */ 901*418b791dSBob Badour #ifdef HASH_BLOOM 902*418b791dSBob Badour uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 903*418b791dSBob Badour uint8_t *bloom_bv; 904*418b791dSBob Badour char bloom_nbits; 905*418b791dSBob Badour #endif 906*418b791dSBob Badour 907*418b791dSBob Badour } UT_hash_table; 908*418b791dSBob Badour 909*418b791dSBob Badour typedef struct UT_hash_handle { 910*418b791dSBob Badour struct UT_hash_table *tbl; 911*418b791dSBob Badour void *prev; /* prev element in app order */ 912*418b791dSBob Badour void *next; /* next element in app order */ 913*418b791dSBob Badour struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 914*418b791dSBob Badour struct UT_hash_handle *hh_next; /* next hh in bucket order */ 915*418b791dSBob Badour void *key; /* ptr to enclosing struct's key */ 916*418b791dSBob Badour unsigned keylen; /* enclosing struct's key len */ 917*418b791dSBob Badour unsigned hashv; /* result of hash-fcn(key) */ 918*418b791dSBob Badour } UT_hash_handle; 919*418b791dSBob Badour 920*418b791dSBob Badour #endif /* UTHASH_H */ 921