jenkins_hash.cc

Go to the documentation of this file.
00001 /*
00002  *    Copyright 2004-2006 Intel Corporation
00003  * 
00004  *    Licensed under the Apache License, Version 2.0 (the "License");
00005  *    you may not use this file except in compliance with the License.
00006  *    You may obtain a copy of the License at
00007  * 
00008  *        http://www.apache.org/licenses/LICENSE-2.0
00009  * 
00010  *    Unless required by applicable law or agreed to in writing, software
00011  *    distributed under the License is distributed on an "AS IS" BASIS,
00012  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  *    See the License for the specific language governing permissions and
00014  *    limitations under the License.
00015  */
00016 
00017 
00018 #include "jenkins_hash.h"
00019 
00020 namespace oasys {
00021 
00022 /*
00023  * Fast general purpose hash function from Bob Jenkins:
00024  * http://burtleburtle.net/bob/hash/doobs.html
00025  *
00026  * Slightly modified by mike demmer to use standard type definitions
00027  * and parameter types inside the declaration.
00028  */
00029 
00030 #define hashsize(n) ((u_int32_t)1<<(n))
00031 #define hashmask(n) (hashsize(n)-1)
00032 
00033 /*
00034 --------------------------------------------------------------------
00035 mix -- mix 3 32-bit values reversibly.
00036 For every delta with one or two bits set, and the deltas of all three
00037   high bits or all three low bits, whether the original value of a,b,c
00038   is almost all zero or is uniformly distributed,
00039 * If mix() is run forward or backward, at least 32 bits in a,b,c
00040   have at least 1/4 probability of changing.
00041 * If mix() is run forward, every bit of c will change between 1/3 and
00042   2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
00043 mix() was built out of 36 single-cycle latency instructions in a 
00044   structure that could supported 2x parallelism, like so:
00045       a -= b; 
00046       a -= c; x = (c>>13);
00047       b -= c; a ^= x;
00048       b -= a; x = (a<<8);
00049       c -= a; b ^= x;
00050       c -= b; x = (b>>13);
00051       ...
00052   Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
00053   of that parallelism.  They've also turned some of those single-cycle
00054   latency instructions into multi-cycle latency instructions.  Still,
00055   this is the fastest good hash I could find.  There were about 2^^68
00056   to choose from.  I only looked at a billion or so.
00057 --------------------------------------------------------------------
00058 */
00059 #define mix(a,b,c) \
00060 { \
00061   a -= b; a -= c; a ^= (c>>13); \
00062   b -= c; b -= a; b ^= (a<<8); \
00063   c -= a; c -= b; c ^= (b>>13); \
00064   a -= b; a -= c; a ^= (c>>12);  \
00065   b -= c; b -= a; b ^= (a<<16); \
00066   c -= a; c -= b; c ^= (b>>5); \
00067   a -= b; a -= c; a ^= (c>>3);  \
00068   b -= c; b -= a; b ^= (a<<10); \
00069   c -= a; c -= b; c ^= (b>>15); \
00070 }
00071 
00072 /*
00073 --------------------------------------------------------------------
00074 hash() -- hash a variable-length key into a 32-bit value
00075   k       : the key (the unaligned variable-length array of bytes)
00076   len     : the length of the key, counting by bytes
00077   initval : can be any 4-byte value
00078 Returns a 32-bit value.  Every bit of the key affects every bit of
00079 the return value.  Every 1-bit and 2-bit delta achieves avalanche.
00080 About 6*len+35 instructions.
00081 
00082 The best hash table sizes are powers of 2.  There is no need to do
00083 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
00084 use a bitmask.  For example, if you need only 10 bits, do
00085   h = (h & hashmask(10));
00086 In which case, the hash table should have hashsize(10) elements.
00087 
00088 If you are hashing n strings (u_int8_t **)k, do it like this:
00089   for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
00090 
00091 By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
00092 code any way you wish, private, educational, or commercial.  It's free.
00093 
00094 See http://burtleburtle.net/bob/hash/evahash.html
00095 Use for hash table lookup, or anything where one collision in 2^^32 is
00096 acceptable.  Do NOT use for cryptographic purposes.
00097 --------------------------------------------------------------------
00098 */
00099 
00100 u_int32_t
00101 jenkins_hash(u_int8_t *k,        /* the key */
00102              u_int32_t length,   /* the length of the key */
00103              u_int32_t initval)  /* the previous hash, or an arbitrary value */
00104 {
00105    register u_int32_t a,b,c,len;
00106 
00107    /* Set up the internal state */
00108    len = length;
00109    a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
00110    c = initval;         /* the previous hash value */
00111 
00112    /*---------------------------------------- handle most of the key */
00113    while (len >= 12)
00114    {
00115       a += (k[0] +((u_int32_t)k[1]<<8) +((u_int32_t)k[2]<<16) +((u_int32_t)k[3]<<24));
00116       b += (k[4] +((u_int32_t)k[5]<<8) +((u_int32_t)k[6]<<16) +((u_int32_t)k[7]<<24));
00117       c += (k[8] +((u_int32_t)k[9]<<8) +((u_int32_t)k[10]<<16)+((u_int32_t)k[11]<<24));
00118       mix(a,b,c);
00119       k += 12; len -= 12;
00120    }
00121 
00122    /*------------------------------------- handle the last 11 bytes */
00123    c += length;
00124    switch(len)              /* all the case statements fall through */
00125    {
00126    case 11: c+=((u_int32_t)k[10]<<24);
00127    case 10: c+=((u_int32_t)k[9]<<16);
00128    case 9 : c+=((u_int32_t)k[8]<<8);
00129       /* the first byte of c is reserved for the length */
00130    case 8 : b+=((u_int32_t)k[7]<<24);
00131    case 7 : b+=((u_int32_t)k[6]<<16);
00132    case 6 : b+=((u_int32_t)k[5]<<8);
00133    case 5 : b+=k[4];
00134    case 4 : a+=((u_int32_t)k[3]<<24);
00135    case 3 : a+=((u_int32_t)k[2]<<16);
00136    case 2 : a+=((u_int32_t)k[1]<<8);
00137    case 1 : a+=k[0];
00138      /* case 0: nothing left to add */
00139    }
00140    mix(a,b,c);
00141    /*-------------------------------------------- report the result */
00142    return c;
00143 }
00144 
00145 } // namespace oasys

Generated on Thu Jun 7 12:54:28 2007 for DTN Reference Implementation by  doxygen 1.5.1