hash: use more recent lookup3 algorithm instead of lookup2

Bob Jenkins published a newer hash function in May 2006, it has
better distribution.

See http://burtleburtle.net/bob/hash/doobs.html for lengthy
comparisions.
This commit is contained in:
Florian Westphal 2011-06-27 00:16:37 +02:00
parent 7f8d0ea5a3
commit 1765f0ae0b
1 changed files with 44 additions and 38 deletions

View File

@ -26,8 +26,7 @@
#include "exp.h" #include "exp.h"
#include "hash.h" #include "hash.h"
static UINT32 jenkins_hash PARAMS((register UINT8 *k, register UINT32 length, static UINT32 jenkins_hash PARAMS((UINT8 *k, UINT32 length, UINT32 initval));
register UINT32 initval));
/** /**
* Calculate hash value for a given string. * Calculate hash value for a given string.
@ -46,66 +45,74 @@ Hash( const char *String )
} /* Hash */ } /* Hash */
/* /*
* This hash function originates from lookup2.c of Bob Jenkins * This hash function originates from lookup3.c of Bob Jenkins
* (URL: <http://burtleburtle.net/bob/c/lookup2.c>): * (URL: <http://burtleburtle.net/bob/c/lookup3.c>):
* -------------------------------------------------------------------- * --------------------------------------------------------------------
* lookup2.c, by Bob Jenkins, December 1996, Public Domain. * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
* hash(), hash2(), hash3, and mix() are externally useful functions. * These are functions for producing 32-bit hashes for hash table lookup.
* Routines to test the hash are included if SELF_TEST is defined. * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
* You can use this free for any purpose. It has no warranty. * are externally useful functions. Routines to test the hash are included
* if SELF_TEST is defined. You can use this free for any purpose. It's in
* the public domain. It has no warranty.
* -------------------------------------------------------------------- * --------------------------------------------------------------------
* Not all of his functions are used here. * Not all of his functions are used here.
*/ */
#define hashsize(n) ((UINT32)1<<(n)) #define hashsize(n) ((uint32_t)1<<(n))
#define hashmask(n) (hashsize(n)-1) #define hashmask(n) (hashsize(n)-1)
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
#define mix(a,b,c) \ #define mix(a,b,c) \
{ \ { \
a -= b; a -= c; a ^= (c>>13); \ a -= c; a ^= rot(c, 4); c += b; \
b -= c; b -= a; b ^= (a<<8); \ b -= a; b ^= rot(a, 6); a += c; \
c -= a; c -= b; c ^= (b>>13); \ c -= b; c ^= rot(b, 8); b += a; \
a -= b; a -= c; a ^= (c>>12); \ a -= c; a ^= rot(c,16); c += b; \
b -= c; b -= a; b ^= (a<<16); \ b -= a; b ^= rot(a,19); a += c; \
c -= a; c -= b; c ^= (b>>5); \ c -= b; c ^= rot(b, 4); b += a; \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
} /* mix */ } /* mix */
#define final(a,b,c) \
{ \
c ^= b; c -= rot(b,14); \
a ^= c; a -= rot(c,11); \
b ^= a; b -= rot(a,25); \
c ^= b; c -= rot(b,16); \
a ^= c; a -= rot(c,4); \
b ^= a; b -= rot(a,14); \
c ^= b; c -= rot(b,24); \
}
static UINT32 static UINT32
jenkins_hash( register UINT8 *k, register UINT32 length, register UINT32 initval ) jenkins_hash(UINT8 *k, UINT32 length, UINT32 initval)
{ {
/* k: the key /* k: the key
* length: length of the key * length: length of the key
* initval: the previous hash, or an arbitrary value * initval: the previous hash, or an arbitrary value
*/ */
UINT32 a,b,c;
register UINT32 a,b,c,len;
/* Set up the internal state */ /* Set up the internal state */
len = length; a = b = c = 0xdeadbeef + length + initval;
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = initval; /* the previous hash value */
/* handle most of the key */ /* handle most of the key */
while (len >= 12) while (length > 12) {
{
a += (k[0] +((UINT32)k[1]<<8) +((UINT32)k[2]<<16) +((UINT32)k[3]<<24)); a += (k[0] +((UINT32)k[1]<<8) +((UINT32)k[2]<<16) +((UINT32)k[3]<<24));
b += (k[4] +((UINT32)k[5]<<8) +((UINT32)k[6]<<16) +((UINT32)k[7]<<24)); b += (k[4] +((UINT32)k[5]<<8) +((UINT32)k[6]<<16) +((UINT32)k[7]<<24));
c += (k[8] +((UINT32)k[9]<<8) +((UINT32)k[10]<<16)+((UINT32)k[11]<<24)); c += (k[8] +((UINT32)k[9]<<8) +((UINT32)k[10]<<16)+((UINT32)k[11]<<24));
mix(a,b,c); mix(a,b,c);
k += 12; len -= 12; length -= 12;
k += 12;
} }
/* handle the last 11 bytes */ /*-------------------------------- last block: affect all 32 bits of (c) */
c += length; switch(length) /* all the case statements fall through */
switch( (int)len ) /* all the case statements fall through */
{ {
case 11: c+=((UINT32)k[10]<<24); case 12: c+=((UINT32)k[11])<<24;
case 10: c+=((UINT32)k[9]<<16); case 11: c+=((UINT32)k[10]<<16);
case 9 : c+=((UINT32)k[8]<<8); case 10: c+=((UINT32)k[9]<<8);
/* the first byte of c is reserved for the length */ case 9 : c+=k[8];
case 8 : b+=((UINT32)k[7]<<24); case 8 : b+=((UINT32)k[7]<<24);
case 7 : b+=((UINT32)k[6]<<16); case 7 : b+=((UINT32)k[6]<<16);
case 6 : b+=((UINT32)k[5]<<8); case 6 : b+=((UINT32)k[5]<<8);
@ -114,11 +121,10 @@ jenkins_hash( register UINT8 *k, register UINT32 length, register UINT32 initval
case 3 : a+=((UINT32)k[2]<<16); case 3 : a+=((UINT32)k[2]<<16);
case 2 : a+=((UINT32)k[1]<<8); case 2 : a+=((UINT32)k[1]<<8);
case 1 : a+=k[0]; case 1 : a+=k[0];
/* case 0: nothing left to add */ break;
case 0 : return c;
} }
mix(a,b,c); final(a,b,c);
/* report the result */
return c; return c;
} /* jenkins_hash */ } /* jenkins_hash */