jenkins_hash.cc

Go to the documentation of this file.
00001 /*
00002  * IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. By
00003  * downloading, copying, installing or using the software you agree to
00004  * this license. If you do not agree to this license, do not download,
00005  * install, copy or use the software.
00006  * 
00007  * Intel Open Source License 
00008  * 
00009  * Copyright (c) 2004 Intel Corporation. All rights reserved. 
00010  * 
00011  * Redistribution and use in source and binary forms, with or without
00012  * modification, are permitted provided that the following conditions are
00013  * met:
00014  * 
00015  *   Redistributions of source code must retain the above copyright
00016  *   notice, this list of conditions and the following disclaimer.
00017  * 
00018  *   Redistributions in binary form must reproduce the above copyright
00019  *   notice, this list of conditions and the following disclaimer in the
00020  *   documentation and/or other materials provided with the distribution.
00021  * 
00022  *   Neither the name of the Intel Corporation nor the names of its
00023  *   contributors may be used to endorse or promote products derived from
00024  *   this software without specific prior written permission.
00025  *  
00026  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00027  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00028  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00029  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE INTEL OR
00030  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00031  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00032  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00033  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00034  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00035  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00036  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037  */
00038 
00039 #include "jenkins_hash.h"
00040 
00041 namespace oasys {
00042 
00043 /*
00044  * Fast general purpose hash function from Bob Jenkins:
00045  * http://burtleburtle.net/bob/hash/doobs.html
00046  *
00047  * Slightly modified by mike demmer to use standard type definitions
00048  * and parameter types inside the declaration.
00049  */
00050 
00051 #define hashsize(n) ((u_int32_t)1<<(n))
00052 #define hashmask(n) (hashsize(n)-1)
00053 
00054 /*
00055 --------------------------------------------------------------------
00056 mix -- mix 3 32-bit values reversibly.
00057 For every delta with one or two bits set, and the deltas of all three
00058   high bits or all three low bits, whether the original value of a,b,c
00059   is almost all zero or is uniformly distributed,
00060 * If mix() is run forward or backward, at least 32 bits in a,b,c
00061   have at least 1/4 probability of changing.
00062 * If mix() is run forward, every bit of c will change between 1/3 and
00063   2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
00064 mix() was built out of 36 single-cycle latency instructions in a 
00065   structure that could supported 2x parallelism, like so:
00066       a -= b; 
00067       a -= c; x = (c>>13);
00068       b -= c; a ^= x;
00069       b -= a; x = (a<<8);
00070       c -= a; b ^= x;
00071       c -= b; x = (b>>13);
00072       ...
00073   Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
00074   of that parallelism.  They've also turned some of those single-cycle
00075   latency instructions into multi-cycle latency instructions.  Still,
00076   this is the fastest good hash I could find.  There were about 2^^68
00077   to choose from.  I only looked at a billion or so.
00078 --------------------------------------------------------------------
00079 */
00080 #define mix(a,b,c) \
00081 { \
00082   a -= b; a -= c; a ^= (c>>13); \
00083   b -= c; b -= a; b ^= (a<<8); \
00084   c -= a; c -= b; c ^= (b>>13); \
00085   a -= b; a -= c; a ^= (c>>12);  \
00086   b -= c; b -= a; b ^= (a<<16); \
00087   c -= a; c -= b; c ^= (b>>5); \
00088   a -= b; a -= c; a ^= (c>>3);  \
00089   b -= c; b -= a; b ^= (a<<10); \
00090   c -= a; c -= b; c ^= (b>>15); \
00091 }
00092 
00093 /*
00094 --------------------------------------------------------------------
00095 hash() -- hash a variable-length key into a 32-bit value
00096   k       : the key (the unaligned variable-length array of bytes)
00097   len     : the length of the key, counting by bytes
00098   initval : can be any 4-byte value
00099 Returns a 32-bit value.  Every bit of the key affects every bit of
00100 the return value.  Every 1-bit and 2-bit delta achieves avalanche.
00101 About 6*len+35 instructions.
00102 
00103 The best hash table sizes are powers of 2.  There is no need to do
00104 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
00105 use a bitmask.  For example, if you need only 10 bits, do
00106   h = (h & hashmask(10));
00107 In which case, the hash table should have hashsize(10) elements.
00108 
00109 If you are hashing n strings (u_int8_t **)k, do it like this:
00110   for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
00111 
00112 By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
00113 code any way you wish, private, educational, or commercial.  It's free.
00114 
00115 See http://burtleburtle.net/bob/hash/evahash.html
00116 Use for hash table lookup, or anything where one collision in 2^^32 is
00117 acceptable.  Do NOT use for cryptographic purposes.
00118 --------------------------------------------------------------------
00119 */
00120 
00121 u_int32_t
00122 jenkins_hash(u_int8_t *k,        /* the key */
00123              u_int32_t length,   /* the length of the key */
00124              u_int32_t initval)  /* the previous hash, or an arbitrary value */
00125 {
00126    register u_int32_t a,b,c,len;
00127 
00128    /* Set up the internal state */
00129    len = length;
00130    a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
00131    c = initval;         /* the previous hash value */
00132 
00133    /*---------------------------------------- handle most of the key */
00134    while (len >= 12)
00135    {
00136       a += (k[0] +((u_int32_t)k[1]<<8) +((u_int32_t)k[2]<<16) +((u_int32_t)k[3]<<24));
00137       b += (k[4] +((u_int32_t)k[5]<<8) +((u_int32_t)k[6]<<16) +((u_int32_t)k[7]<<24));
00138       c += (k[8] +((u_int32_t)k[9]<<8) +((u_int32_t)k[10]<<16)+((u_int32_t)k[11]<<24));
00139       mix(a,b,c);
00140       k += 12; len -= 12;
00141    }
00142 
00143    /*------------------------------------- handle the last 11 bytes */
00144    c += length;
00145    switch(len)              /* all the case statements fall through */
00146    {
00147    case 11: c+=((u_int32_t)k[10]<<24);
00148    case 10: c+=((u_int32_t)k[9]<<16);
00149    case 9 : c+=((u_int32_t)k[8]<<8);
00150       /* the first byte of c is reserved for the length */
00151    case 8 : b+=((u_int32_t)k[7]<<24);
00152    case 7 : b+=((u_int32_t)k[6]<<16);
00153    case 6 : b+=((u_int32_t)k[5]<<8);
00154    case 5 : b+=k[4];
00155    case 4 : a+=((u_int32_t)k[3]<<24);
00156    case 3 : a+=((u_int32_t)k[2]<<16);
00157    case 2 : a+=((u_int32_t)k[1]<<8);
00158    case 1 : a+=k[0];
00159      /* case 0: nothing left to add */
00160    }
00161    mix(a,b,c);
00162    /*-------------------------------------------- report the result */
00163    return c;
00164 }
00165 
00166 } // namespace oasys

Generated on Fri Dec 22 14:47:59 2006 for DTN Reference Implementation by  doxygen 1.5.1