On Mon, Jan 23, 2012 at 09:52:59AM +0200, Alon Levy wrote: > See http://code.google.com/p/smhasher/wiki/MurmurHash3 > > Performance quotes from there are 2.5 times what lookup3 can do, for > 32 bit variant, which is what we use: > > Lookup3_x86_32 - 1234 mb/sec > Lookup3_x64_32 - 1265 mb/sec > > MurmurHash3_x86_32 - 3105 mb/sec > > New files are released to the public domain, keeping them that way. > --- > src/Makefile.am | 10 +- > src/lookup3.c | 769 > ----------------------------------------------------- > src/lookup3.h | 26 -- > src/murmurhash3.c | 357 +++++++++++++++++++++++++ > src/murmurhash3.h | 39 +++ > src/qxl_image.c | 9 +- > 6 files changed, 407 insertions(+), 803 deletions(-) > delete mode 100644 src/lookup3.c > delete mode 100644 src/lookup3.h > create mode 100644 src/murmurhash3.c > create mode 100644 src/murmurhash3.h > > diff --git a/src/Makefile.am b/src/Makefile.am > index 2695614..ca07ee9 100644 > --- a/src/Makefile.am > +++ b/src/Makefile.am > @@ -1,4 +1,3 @@ > -# Copyright 2008 Red Hat, Inc.
oops, will resend. > # > # Permission is hereby granted, free of charge, to any person obtaining a > # copy of this software and associated documentation files (the "Software"), > @@ -45,8 +44,8 @@ qxl_drv_la_SOURCES = \ > qxl_mem.c \ > mspace.c \ > mspace.h \ > - lookup3.c \ > - lookup3.h \ > + murmurhash3.c \ > + murmurhash3.h \ > qxl_cursor.c > endif > > @@ -80,7 +79,8 @@ spiceqxl_drv_la_SOURCES = \ > qxl_mem.c \ > mspace.c \ > mspace.h \ > - lookup3.c \ > - lookup3.h \ > + murmurhash3.c \ > + murmurhash3.h \ > qxl_cursor.c > endif > +# Copyright 2008 Red Hat, Inc. > diff --git a/src/lookup3.c b/src/lookup3.c > deleted file mode 100644 > index b37ca51..0000000 > --- a/src/lookup3.c > +++ /dev/null > @@ -1,769 +0,0 @@ > -/* > -------------------------------------------------------------------------------- > -lookup3.c, by Bob Jenkins, May 2006, Public Domain. > - > -These are functions for producing 32-bit hashes for hash table lookup. > -hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() > -are externally useful functions. Routines to test the hash are included > -if SELF_TEST is defined. You can use this free for any purpose. It's in > -the public domain. It has no warranty. > - > -You probably want to use hashlittle(). hashlittle() and hashbig() > -hash byte arrays. hashlittle() is is faster than hashbig() on > -little-endian machines. Intel and AMD are little-endian machines. > -On second thought, you probably want hashlittle2(), which is identical to > -hashlittle() except it returns two 32-bit hashes for the price of one. > -You could implement hashbig2() if you wanted but I haven't bothered here. > - > -If you want to find a hash of, say, exactly 7 integers, do > - a = i1; b = i2; c = i3; > - mix(a,b,c); > - a += i4; b += i5; c += i6; > - mix(a,b,c); > - a += i7; > - final(a,b,c); > -then use c as the hash value. If you have a variable length array of > -4-byte integers to hash, use hashword(). If you have a byte array (like > -a character string), use hashlittle(). If you have several byte arrays, or > -a mix of things, see the comments above hashlittle(). > - > -Why is this so big? I read 12 bytes at a time into 3 4-byte integers, > -then mix those integers. This is fast (you can do a lot more thorough > -mixing with 12*3 instructions on 3 integers than you can with 3 instructions > -on 1 byte), but shoehorning those bytes into integers efficiently is messy. > -------------------------------------------------------------------------------- > -*/ > - > -#include <stdio.h> /* defines printf for tests */ > -#include <time.h> /* defines time_t for timings in the test */ > -#include "lookup3.h" > -#ifdef linux > -# include <endian.h> /* attempt to define endianness */ > -#endif > - > -/* > - * My best guess at if you are big-endian or little-endian. This may > - * need adjustment. > - */ > -#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ > - __BYTE_ORDER == __LITTLE_ENDIAN) || \ > - (defined(i386) || defined(__i386__) || defined(__i486__) || \ > - defined(__i586__) || defined(__i686__) || defined(vax) || > defined(MIPSEL)) > -# define HASH_LITTLE_ENDIAN 1 > -# define HASH_BIG_ENDIAN 0 > -#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ > - __BYTE_ORDER == __BIG_ENDIAN) || \ > - (defined(sparc) || defined(POWERPC) || defined(mc68000) || > defined(sel)) > -# define HASH_LITTLE_ENDIAN 0 > -# define HASH_BIG_ENDIAN 1 > -#else > -# define HASH_LITTLE_ENDIAN 0 > -# define HASH_BIG_ENDIAN 0 > -#endif > - > -#define hashsize(n) ((uint32_t)1<<(n)) > -#define hashmask(n) (hashsize(n)-1) > -#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) > - > -/* > -------------------------------------------------------------------------------- > -mix -- mix 3 32-bit values reversibly. > - > -This is reversible, so any information in (a,b,c) before mix() is > -still in (a,b,c) after mix(). > - > -If four pairs of (a,b,c) inputs are run through mix(), or through > -mix() in reverse, there are at least 32 bits of the output that > -are sometimes the same for one pair and different for another pair. > -This was tested for: > -* pairs that differed by one bit, by two bits, in any combination > - of top bits of (a,b,c), or in any combination of bottom bits of > - (a,b,c). > -* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed > - the output delta to a Gray code (a^(a>>1)) so a string of 1's (as > - is commonly produced by subtraction) look like a single 1-bit > - difference. > -* the base values were pseudorandom, all zero but one bit set, or > - all zero plus a counter that starts at zero. > - > -Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that > -satisfy this are > - 4 6 8 16 19 4 > - 9 15 3 18 27 15 > - 14 9 3 7 17 3 > -Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing > -for "differ" defined as + with a one-bit base and a two-bit delta. I > -used http://burtleburtle.net/bob/hash/avalanche.html to choose > -the operations, constants, and arrangements of the variables. > - > -This does not achieve avalanche. There are input bits of (a,b,c) > -that fail to affect some output bits of (a,b,c), especially of a. The > -most thoroughly mixed value is c, but it doesn't really even achieve > -avalanche in c. > - > -This allows some parallelism. Read-after-writes are good at doubling > -the number of bits affected, so the goal of mixing pulls in the opposite > -direction as the goal of parallelism. I did what I could. Rotates > -seem to cost as much as shifts on every machine I could lay my hands > -on, and rotates are much kinder to the top and bottom bits, so I used > -rotates. > -------------------------------------------------------------------------------- > -*/ > -#define mix(a,b,c) \ > -{ \ > - a -= c; a ^= rot(c, 4); c += b; \ > - b -= a; b ^= rot(a, 6); a += c; \ > - c -= b; c ^= rot(b, 8); b += a; \ > - a -= c; a ^= rot(c,16); c += b; \ > - b -= a; b ^= rot(a,19); a += c; \ > - c -= b; c ^= rot(b, 4); b += a; \ > -} > - > -/* > -------------------------------------------------------------------------------- > -final -- final mixing of 3 32-bit values (a,b,c) into c > - > -Pairs of (a,b,c) values differing in only a few bits will usually > -produce values of c that look totally different. This was tested for > -* pairs that differed by one bit, by two bits, in any combination > - of top bits of (a,b,c), or in any combination of bottom bits of > - (a,b,c). > -* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed > - the output delta to a Gray code (a^(a>>1)) so a string of 1's (as > - is commonly produced by subtraction) look like a single 1-bit > - difference. > -* the base values were pseudorandom, all zero but one bit set, or > - all zero plus a counter that starts at zero. > - > -These constants passed: > - 14 11 25 16 4 14 24 > - 12 14 25 16 4 14 24 > -and these came close: > - 4 8 15 26 3 22 24 > - 10 8 15 26 3 22 24 > - 11 8 15 26 3 22 24 > -------------------------------------------------------------------------------- > -*/ > -#define final(a,b,c) \ > -{ \ > - c ^= b; c -= rot(b,14); \ > - a ^= c; a -= rot(c,11); \ > - b ^= a; b -= rot(a,25); \ > - c ^= b; c -= rot(b,16); \ > - a ^= c; a -= rot(c,4); \ > - b ^= a; b -= rot(a,14); \ > - c ^= b; c -= rot(b,24); \ > -} > - > -/* > --------------------------------------------------------------------- > - This works on all machines. To be useful, it requires > - -- that the key be an array of uint32_t's, and > - -- that the length be the number of uint32_t's in the key > - > - The function hashword() is identical to hashlittle() on little-endian > - machines, and identical to hashbig() on big-endian machines, > - except that the length has to be measured in uint32_ts rather than in > - bytes. hashlittle() is more complicated than hashword() only because > - hashlittle() has to dance around fitting the key bytes into registers. > --------------------------------------------------------------------- > -*/ > -uint32_t hashword( > - const uint32_t *k, /* the key, an array of uint32_t > values */ > - size_t length, /* the length of the key, in > uint32_ts */ > - uint32_t initval) /* the previous hash, or an arbitrary > value */ > -{ > - uint32_t a,b,c; > - > - /* Set up the internal state */ > - a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; > - > - /*------------------------------------------------- handle most of the key > */ > - while (length > 3) > - { > - a += k[0]; > - b += k[1]; > - c += k[2]; > - mix(a,b,c); > - length -= 3; > - k += 3; > - } > - > - /*------------------------------------------- handle the last 3 uint32_t's > */ > - switch(length) /* all the case statements fall through > */ > - { > - case 3 : c+=k[2]; > - case 2 : b+=k[1]; > - case 1 : a+=k[0]; > - final(a,b,c); > - case 0: /* case 0: nothing left to add */ > - break; > - } > - /*------------------------------------------------------ report the result > */ > - return c; > -} > - > - > -/* > --------------------------------------------------------------------- > -hashword2() -- same as hashword(), but take two seeds and return two > -32-bit values. pc and pb must both be nonnull, and *pc and *pb must > -both be initialized with seeds. If you pass in (*pb)==0, the output > -(*pc) will be the same as the return value from hashword(). > --------------------------------------------------------------------- > -*/ > -void hashword2 ( > -const uint32_t *k, /* the key, an array of uint32_t values > */ > -size_t length, /* the length of the key, in uint32_ts > */ > -uint32_t *pc, /* IN: seed OUT: primary hash value > */ > -uint32_t *pb) /* IN: more seed OUT: secondary hash value > */ > -{ > - uint32_t a,b,c; > - > - /* Set up the internal state */ > - a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; > - c += *pb; > - > - /*------------------------------------------------- handle most of the key > */ > - while (length > 3) > - { > - a += k[0]; > - b += k[1]; > - c += k[2]; > - mix(a,b,c); > - length -= 3; > - k += 3; > - } > - > - /*------------------------------------------- handle the last 3 uint32_t's > */ > - switch(length) /* all the case statements fall through > */ > - { > - case 3 : c+=k[2]; > - case 2 : b+=k[1]; > - case 1 : a+=k[0]; > - final(a,b,c); > - case 0: /* case 0: nothing left to add */ > - break; > - } > - /*------------------------------------------------------ report the result > */ > - *pc=c; *pb=b; > -} > - > - > -/* > -------------------------------------------------------------------------------- > -hashlittle() -- hash a variable-length key into a 32-bit value > - k : the key (the unaligned variable-length array of bytes) > - length : the length of the key, counting by bytes > - initval : can be any 4-byte value > -Returns a 32-bit value. Every bit of the key affects every bit of > -the return value. Two keys differing by one or two bits will have > -totally different hash values. > - > -The best hash table sizes are powers of 2. There is no need to do > -mod a prime (mod is sooo slow!). If you need less than 32 bits, > -use a bitmask. For example, if you need only 10 bits, do > - h = (h & hashmask(10)); > -In which case, the hash table should have hashsize(10) elements. > - > -If you are hashing n strings (uint8_t **)k, do it like this: > - for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); > - > -By Bob Jenkins, 2006. [email protected]. You may use this > -code any way you wish, private, educational, or commercial. It's free. > - > -Use for hash table lookup, or anything where one collision in 2^^32 is > -acceptable. Do NOT use for cryptographic purposes. > -------------------------------------------------------------------------------- > -*/ > - > -uint32_t hashlittle( const void *key, size_t length, uint32_t initval) > -{ > - uint32_t a,b,c; /* internal state > */ > - union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 > */ > - > - /* Set up the internal state */ > - a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; > - > - u.ptr = key; > - if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { > - const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks > */ > -#ifdef VALGRIND > - const uint8_t *k8; > -#endif > - > - /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) > */ > - while (length > 12) > - { > - a += k[0]; > - b += k[1]; > - c += k[2]; > - mix(a,b,c); > - length -= 12; > - k += 3; > - } > - > - /*----------------------------- handle the last (probably partial) block > */ > - /* > - * "k[2]&0xffffff" actually reads beyond the end of the string, but > - * then masks off the part it's not allowed to read. Because the > - * string is aligned, the masked-off tail is in the same word as the > - * rest of the string. Every machine with memory protection I've seen > - * does it on word boundaries, so is OK with this. But VALGRIND will > - * still catch it and complain. The masking trick does make the hash > - * noticably faster for short strings (like English words). > - */ > -#ifndef VALGRIND > - > - switch(length) > - { > - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; > - case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; > - case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; > - case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; > - case 8 : b+=k[1]; a+=k[0]; break; > - case 7 : b+=k[1]&0xffffff; a+=k[0]; break; > - case 6 : b+=k[1]&0xffff; a+=k[0]; break; > - case 5 : b+=k[1]&0xff; a+=k[0]; break; > - case 4 : a+=k[0]; break; > - case 3 : a+=k[0]&0xffffff; break; > - case 2 : a+=k[0]&0xffff; break; > - case 1 : a+=k[0]&0xff; break; > - case 0 : return c; /* zero length strings require no mixing > */ > - } > - > -#else /* make valgrind happy */ > - > - k8 = (const uint8_t *)k; > - switch(length) > - { > - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; > - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ > - case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ > - case 9 : c+=k8[8]; /* fall through */ > - case 8 : b+=k[1]; a+=k[0]; break; > - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ > - case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ > - case 5 : b+=k8[4]; /* fall through */ > - case 4 : a+=k[0]; break; > - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ > - case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ > - case 1 : a+=k8[0]; break; > - case 0 : return c; > - } > - > -#endif /* !valgrind */ > - > - } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { > - const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks > */ > - const uint8_t *k8; > - > - /*--------------- all but last block: aligned reads and different mixing > */ > - while (length > 12) > - { > - a += k[0] + (((uint32_t)k[1])<<16); > - b += k[2] + (((uint32_t)k[3])<<16); > - c += k[4] + (((uint32_t)k[5])<<16); > - mix(a,b,c); > - length -= 12; > - k += 6; > - } > - > - /*----------------------------- handle the last (probably partial) block > */ > - k8 = (const uint8_t *)k; > - switch(length) > - { > - case 12: c+=k[4]+(((uint32_t)k[5])<<16); > - b+=k[2]+(((uint32_t)k[3])<<16); > - a+=k[0]+(((uint32_t)k[1])<<16); > - break; > - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ > - case 10: c+=k[4]; > - b+=k[2]+(((uint32_t)k[3])<<16); > - a+=k[0]+(((uint32_t)k[1])<<16); > - break; > - case 9 : c+=k8[8]; /* fall through */ > - case 8 : b+=k[2]+(((uint32_t)k[3])<<16); > - a+=k[0]+(((uint32_t)k[1])<<16); > - break; > - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ > - case 6 : b+=k[2]; > - a+=k[0]+(((uint32_t)k[1])<<16); > - break; > - case 5 : b+=k8[4]; /* fall through */ > - case 4 : a+=k[0]+(((uint32_t)k[1])<<16); > - break; > - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ > - case 2 : a+=k[0]; > - break; > - case 1 : a+=k8[0]; > - break; > - case 0 : return c; /* zero length requires no mixing > */ > - } > - > - } else { /* need to read the key one byte at a time > */ > - const uint8_t *k = (const uint8_t *)key; > - > - /*--------------- all but the last block: affect some 32 bits of (a,b,c) > */ > - while (length > 12) > - { > - a += k[0]; > - a += ((uint32_t)k[1])<<8; > - a += ((uint32_t)k[2])<<16; > - a += ((uint32_t)k[3])<<24; > - b += k[4]; > - b += ((uint32_t)k[5])<<8; > - b += ((uint32_t)k[6])<<16; > - b += ((uint32_t)k[7])<<24; > - c += k[8]; > - c += ((uint32_t)k[9])<<8; > - c += ((uint32_t)k[10])<<16; > - c += ((uint32_t)k[11])<<24; > - mix(a,b,c); > - length -= 12; > - k += 12; > - } > - > - /*-------------------------------- last block: affect all 32 bits of (c) > */ > - switch(length) /* all the case statements fall through > */ > - { > - case 12: c+=((uint32_t)k[11])<<24; > - case 11: c+=((uint32_t)k[10])<<16; > - case 10: c+=((uint32_t)k[9])<<8; > - case 9 : c+=k[8]; > - case 8 : b+=((uint32_t)k[7])<<24; > - case 7 : b+=((uint32_t)k[6])<<16; > - case 6 : b+=((uint32_t)k[5])<<8; > - case 5 : b+=k[4]; > - case 4 : a+=((uint32_t)k[3])<<24; > - case 3 : a+=((uint32_t)k[2])<<16; > - case 2 : a+=((uint32_t)k[1])<<8; > - case 1 : a+=k[0]; > - break; > - case 0 : return c; > - } > - } > - > - final(a,b,c); > - return c; > -} > - > - > -/* > - * hashlittle2: return 2 32-bit hash values > - * > - * This is identical to hashlittle(), except it returns two 32-bit hash > - * values instead of just one. This is good enough for hash table > - * lookup with 2^^64 buckets, or if you want a second hash if you're not > - * happy with the first, or if you want a probably-unique 64-bit ID for > - * the key. *pc is better mixed than *pb, so use *pc first. If you want > - * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". > - */ > -void hashlittle2( > - const void *key, /* the key to hash */ > - size_t length, /* length of the key */ > - uint32_t *pc, /* IN: primary initval, OUT: primary hash */ > - uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */ > -{ > - uint32_t a,b,c; /* internal state > */ > - union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 > */ > - > - /* Set up the internal state */ > - a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc; > - c += *pb; > - > - u.ptr = key; > - if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { > - const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks > */ > -#ifdef VALGRIND > - const uint8_t *k8; > -#endif > - > - /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) > */ > - while (length > 12) > - { > - a += k[0]; > - b += k[1]; > - c += k[2]; > - mix(a,b,c); > - length -= 12; > - k += 3; > - } > - > - /*----------------------------- handle the last (probably partial) block > */ > - /* > - * "k[2]&0xffffff" actually reads beyond the end of the string, but > - * then masks off the part it's not allowed to read. Because the > - * string is aligned, the masked-off tail is in the same word as the > - * rest of the string. Every machine with memory protection I've seen > - * does it on word boundaries, so is OK with this. But VALGRIND will > - * still catch it and complain. The masking trick does make the hash > - * noticably faster for short strings (like English words). > - */ > -#ifndef VALGRIND > - > - switch(length) > - { > - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; > - case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; > - case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; > - case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; > - case 8 : b+=k[1]; a+=k[0]; break; > - case 7 : b+=k[1]&0xffffff; a+=k[0]; break; > - case 6 : b+=k[1]&0xffff; a+=k[0]; break; > - case 5 : b+=k[1]&0xff; a+=k[0]; break; > - case 4 : a+=k[0]; break; > - case 3 : a+=k[0]&0xffffff; break; > - case 2 : a+=k[0]&0xffff; break; > - case 1 : a+=k[0]&0xff; break; > - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing > */ > - } > - > -#else /* make valgrind happy */ > - > - k8 = (const uint8_t *)k; > - switch(length) > - { > - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; > - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ > - case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ > - case 9 : c+=k8[8]; /* fall through */ > - case 8 : b+=k[1]; a+=k[0]; break; > - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ > - case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ > - case 5 : b+=k8[4]; /* fall through */ > - case 4 : a+=k[0]; break; > - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ > - case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ > - case 1 : a+=k8[0]; break; > - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing > */ > - } > - > -#endif /* !valgrind */ > - > - } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { > - const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks > */ > - const uint8_t *k8; > - > - /*--------------- all but last block: aligned reads and different mixing > */ > - while (length > 12) > - { > - a += k[0] + (((uint32_t)k[1])<<16); > - b += k[2] + (((uint32_t)k[3])<<16); > - c += k[4] + (((uint32_t)k[5])<<16); > - mix(a,b,c); > - length -= 12; > - k += 6; > - } > - > - /*----------------------------- handle the last (probably partial) block > */ > - k8 = (const uint8_t *)k; > - switch(length) > - { > - case 12: c+=k[4]+(((uint32_t)k[5])<<16); > - b+=k[2]+(((uint32_t)k[3])<<16); > - a+=k[0]+(((uint32_t)k[1])<<16); > - break; > - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ > - case 10: c+=k[4]; > - b+=k[2]+(((uint32_t)k[3])<<16); > - a+=k[0]+(((uint32_t)k[1])<<16); > - break; > - case 9 : c+=k8[8]; /* fall through */ > - case 8 : b+=k[2]+(((uint32_t)k[3])<<16); > - a+=k[0]+(((uint32_t)k[1])<<16); > - break; > - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ > - case 6 : b+=k[2]; > - a+=k[0]+(((uint32_t)k[1])<<16); > - break; > - case 5 : b+=k8[4]; /* fall through */ > - case 4 : a+=k[0]+(((uint32_t)k[1])<<16); > - break; > - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ > - case 2 : a+=k[0]; > - break; > - case 1 : a+=k8[0]; > - break; > - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing > */ > - } > - > - } else { /* need to read the key one byte at a time > */ > - const uint8_t *k = (const uint8_t *)key; > - > - /*--------------- all but the last block: affect some 32 bits of (a,b,c) > */ > - while (length > 12) > - { > - a += k[0]; > - a += ((uint32_t)k[1])<<8; > - a += ((uint32_t)k[2])<<16; > - a += ((uint32_t)k[3])<<24; > - b += k[4]; > - b += ((uint32_t)k[5])<<8; > - b += ((uint32_t)k[6])<<16; > - b += ((uint32_t)k[7])<<24; > - c += k[8]; > - c += ((uint32_t)k[9])<<8; > - c += ((uint32_t)k[10])<<16; > - c += ((uint32_t)k[11])<<24; > - mix(a,b,c); > - length -= 12; > - k += 12; > - } > - > - /*-------------------------------- last block: affect all 32 bits of (c) > */ > - switch(length) /* all the case statements fall through > */ > - { > - case 12: c+=((uint32_t)k[11])<<24; > - case 11: c+=((uint32_t)k[10])<<16; > - case 10: c+=((uint32_t)k[9])<<8; > - case 9 : c+=k[8]; > - case 8 : b+=((uint32_t)k[7])<<24; > - case 7 : b+=((uint32_t)k[6])<<16; > - case 6 : b+=((uint32_t)k[5])<<8; > - case 5 : b+=k[4]; > - case 4 : a+=((uint32_t)k[3])<<24; > - case 3 : a+=((uint32_t)k[2])<<16; > - case 2 : a+=((uint32_t)k[1])<<8; > - case 1 : a+=k[0]; > - break; > - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing > */ > - } > - } > - > - final(a,b,c); > - *pc=c; *pb=b; > -} > - > - > - > -/* > - * hashbig(): > - * This is the same as hashword() on big-endian machines. It is different > - * from hashlittle() on all machines. hashbig() takes advantage of > - * big-endian byte ordering. > - */ > -uint32_t hashbig( const void *key, size_t length, uint32_t initval) > -{ > - uint32_t a,b,c; > - union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily > */ > - > - /* Set up the internal state */ > - a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; > - > - u.ptr = key; > - if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { > - const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks > */ > -#ifdef VALGRIND > - const uint8_t *k8; > -#endif > - > - /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) > */ > - while (length > 12) > - { > - a += k[0]; > - b += k[1]; > - c += k[2]; > - mix(a,b,c); > - length -= 12; > - k += 3; > - } > - > - /*----------------------------- handle the last (probably partial) block > */ > - /* > - * "k[2]<<8" actually reads beyond the end of the string, but > - * then shifts out the part it's not allowed to read. Because the > - * string is aligned, the illegal read is in the same word as the > - * rest of the string. Every machine with memory protection I've seen > - * does it on word boundaries, so is OK with this. But VALGRIND will > - * still catch it and complain. The masking trick does make the hash > - * noticably faster for short strings (like English words). > - */ > -#ifndef VALGRIND > - > - switch(length) > - { > - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; > - case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; > - case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; > - case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; > - case 8 : b+=k[1]; a+=k[0]; break; > - case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; > - case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; > - case 5 : b+=k[1]&0xff000000; a+=k[0]; break; > - case 4 : a+=k[0]; break; > - case 3 : a+=k[0]&0xffffff00; break; > - case 2 : a+=k[0]&0xffff0000; break; > - case 1 : a+=k[0]&0xff000000; break; > - case 0 : return c; /* zero length strings require no mixing > */ > - } > - > -#else /* make valgrind happy */ > - > - k8 = (const uint8_t *)k; > - switch(length) /* all the case statements fall through > */ > - { > - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; > - case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ > - case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ > - case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ > - case 8 : b+=k[1]; a+=k[0]; break; > - case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ > - case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ > - case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ > - case 4 : a+=k[0]; break; > - case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ > - case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ > - case 1 : a+=((uint32_t)k8[0])<<24; break; > - case 0 : return c; > - } > - > -#endif /* !VALGRIND */ > - > - } else { /* need to read the key one byte at a time > */ > - const uint8_t *k = (const uint8_t *)key; > - > - /*--------------- all but the last block: affect some 32 bits of (a,b,c) > */ > - while (length > 12) > - { > - a += ((uint32_t)k[0])<<24; > - a += ((uint32_t)k[1])<<16; > - a += ((uint32_t)k[2])<<8; > - a += ((uint32_t)k[3]); > - b += ((uint32_t)k[4])<<24; > - b += ((uint32_t)k[5])<<16; > - b += ((uint32_t)k[6])<<8; > - b += ((uint32_t)k[7]); > - c += ((uint32_t)k[8])<<24; > - c += ((uint32_t)k[9])<<16; > - c += ((uint32_t)k[10])<<8; > - c += ((uint32_t)k[11]); > - mix(a,b,c); > - length -= 12; > - k += 12; > - } > - > - /*-------------------------------- last block: affect all 32 bits of (c) > */ > - switch(length) /* all the case statements fall through > */ > - { > - case 12: c+=k[11]; > - case 11: c+=((uint32_t)k[10])<<8; > - case 10: c+=((uint32_t)k[9])<<16; > - case 9 : c+=((uint32_t)k[8])<<24; > - case 8 : b+=k[7]; > - case 7 : b+=((uint32_t)k[6])<<8; > - case 6 : b+=((uint32_t)k[5])<<16; > - case 5 : b+=((uint32_t)k[4])<<24; > - case 4 : a+=k[3]; > - case 3 : a+=((uint32_t)k[2])<<8; > - case 2 : a+=((uint32_t)k[1])<<16; > - case 1 : a+=((uint32_t)k[0])<<24; > - break; > - case 0 : return c; > - } > - } > - > - final(a,b,c); > - return c; > -} > - > diff --git a/src/lookup3.h b/src/lookup3.h > deleted file mode 100644 > index 50c1cf4..0000000 > --- a/src/lookup3.h > +++ /dev/null > @@ -1,26 +0,0 @@ > -#ifndef __LOOKUP3_H > -#define __LOOKUP3_H > - > -#if defined(__GNUC__) || defined(__sun) > - > -#include <stdint.h> > - > -#else > - > -#ifdef QXLDD > -#include <windef.h> > -#include "os_dep.h" > -#else > -#include <stddef.h> > -#include <basetsd.h> > -#endif > - > -typedef UINT32 uint32_t; > -typedef UINT16 uint16_t; > -typedef UINT8 uint8_t; > - > -#endif > - > -uint32_t hashlittle( const void *key, size_t length, uint32_t initval); > - > -#endif > diff --git a/src/murmurhash3.c b/src/murmurhash3.c > new file mode 100644 > index 0000000..ee1cb30 > --- /dev/null > +++ b/src/murmurhash3.c > @@ -0,0 +1,357 @@ > +//----------------------------------------------------------------------------- > +// MurmurHash3 was written by Austin Appleby, and is placed in the public > +// domain. The author hereby disclaims copyright to this source code. > + > +// Note - The x86 and x64 versions do _not_ produce the same results, as the > +// algorithms are optimized for their respective platforms. You can still > +// compile and run any of them on any platform, but your performance with the > +// non-native version will be less than optimal. > + > +#include "murmurhash3.h" > + > +//----------------------------------------------------------------------------- > +// Platform-specific functions and macros > + > +// Microsoft Visual Studio > + > +#if defined(_MSC_VER) > + > +#define FORCE_INLINE __forceinline > + > +#include <stdlib.h> > + > +#define ROTL32(x,y) _rotl(x,y) > +#define ROTL64(x,y) _rotl64(x,y) > + > +#define BIG_CONSTANT(x) (x) > + > +// Other compilers > + > +#else // defined(_MSC_VER) > + > +#define FORCE_INLINE __attribute__((always_inline)) > + > +static inline uint32_t rotl32 ( uint32_t x, int8_t r ) > +{ > + return (x << r) | (x >> (32 - r)); > +} > + > +static inline uint64_t rotl64 ( uint64_t x, int8_t r ) > +{ > + return (x << r) | (x >> (64 - r)); > +} > + > +#define ROTL32(x,y) rotl32(x,y) > +#define ROTL64(x,y) rotl64(x,y) > + > +#define BIG_CONSTANT(x) (x##LLU) > + > +#endif // !defined(_MSC_VER) > + > +//----------------------------------------------------------------------------- > +// Block read - if your platform needs to do endian-swapping or can only > +// handle aligned reads, do the conversion here > + > +static FORCE_INLINE uint32_t getblock_32 ( const uint32_t * p, int i ) > +{ > + return p[i]; > +} > + > +static FORCE_INLINE uint64_t getblock_64 ( const uint64_t * p, int i ) > +{ > + return p[i]; > +} > + > +//----------------------------------------------------------------------------- > +// Finalization mix - force all bits of a hash block to avalanche > + > +static FORCE_INLINE uint32_t fmix_32 ( uint32_t h ) > +{ > + h ^= h >> 16; > + h *= 0x85ebca6b; > + h ^= h >> 13; > + h *= 0xc2b2ae35; > + h ^= h >> 16; > + > + return h; > +} > + > +//---------- > + > +static FORCE_INLINE uint64_t fmix_64 ( uint64_t k ) > +{ > + k ^= k >> 33; > + k *= BIG_CONSTANT(0xff51afd7ed558ccd); > + k ^= k >> 33; > + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); > + k ^= k >> 33; > + > + return k; > +} > + > +//----------------------------------------------------------------------------- > + > +void MurmurHash3_x86_32 ( const void * key, int len, > + uint32_t seed, void * out ) > +{ > + const uint8_t * data = (const uint8_t*)key; > + const int nblocks = len / 4; > + > + uint32_t h1 = seed; > + > + uint32_t c1 = 0xcc9e2d51; > + uint32_t c2 = 0x1b873593; > + > + const uint32_t * blocks; > + const uint8_t * tail; > + > + uint32_t k1; > + > + int i; > + //---------- > + // body > + > + blocks = (const uint32_t *)(data + nblocks*4); > + > + for(i = -nblocks; i; i++) > + { > + k1 = getblock_32(blocks,i); > + > + k1 *= c1; > + k1 = ROTL32(k1,15); > + k1 *= c2; > + > + h1 ^= k1; > + h1 = ROTL32(h1,13); > + h1 = h1*5+0xe6546b64; > + } > + > + //---------- > + // tail > + > + tail = (const uint8_t*)(data + nblocks*4); > + > + k1 = 0; > + > + switch(len & 3) > + { > + case 3: k1 ^= tail[2] << 16; > + case 2: k1 ^= tail[1] << 8; > + case 1: k1 ^= tail[0]; > + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; > + }; > + > + //---------- > + // finalization > + > + h1 ^= len; > + > + h1 = fmix_32(h1); > + > + *(uint32_t*)out = h1; > +} > + > +//----------------------------------------------------------------------------- > + > +void MurmurHash3_x86_128 ( const void * key, const int len, > + uint32_t seed, void * out ) > +{ > + const uint8_t * data = (const uint8_t*)key; > + const int nblocks = len / 16; > + > + uint32_t h1 = seed; > + uint32_t h2 = seed; > + uint32_t h3 = seed; > + uint32_t h4 = seed; > + > + uint32_t c1 = 0x239b961b; > + uint32_t c2 = 0xab0e9789; > + uint32_t c3 = 0x38b34ae5; > + uint32_t c4 = 0xa1e38b93; > + > + uint32_t k1; > + uint32_t k2; > + uint32_t k3; > + uint32_t k4; > + > + const uint32_t * blocks; > + const uint8_t * tail; > + > + int i; > + > + //---------- > + // body > + > + blocks = (const uint32_t *)(data + nblocks*16); > + > + for(i = -nblocks; i; i++) > + { > + k1 = getblock_32(blocks,i*4+0); > + k2 = getblock_32(blocks,i*4+1); > + k3 = getblock_32(blocks,i*4+2); > + k4 = getblock_32(blocks,i*4+3); > + > + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; > + > + h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; > + > + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; > + > + h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; > + > + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; > + > + h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; > + > + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; > + > + h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; > + } > + > + //---------- > + // tail > + > + tail = (const uint8_t*)(data + nblocks*16); > + > + k1 = 0; > + k2 = 0; > + k3 = 0; > + k4 = 0; > + > + switch(len & 15) > + { > + case 15: k4 ^= tail[14] << 16; > + case 14: k4 ^= tail[13] << 8; > + case 13: k4 ^= tail[12] << 0; > + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; > + > + case 12: k3 ^= tail[11] << 24; > + case 11: k3 ^= tail[10] << 16; > + case 10: k3 ^= tail[ 9] << 8; > + case 9: k3 ^= tail[ 8] << 0; > + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; > + > + case 8: k2 ^= tail[ 7] << 24; > + case 7: k2 ^= tail[ 6] << 16; > + case 6: k2 ^= tail[ 5] << 8; > + case 5: k2 ^= tail[ 4] << 0; > + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; > + > + case 4: k1 ^= tail[ 3] << 24; > + case 3: k1 ^= tail[ 2] << 16; > + case 2: k1 ^= tail[ 1] << 8; > + case 1: k1 ^= tail[ 0] << 0; > + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; > + }; > + > + //---------- > + // finalization > + > + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; > + > + h1 += h2; h1 += h3; h1 += h4; > + h2 += h1; h3 += h1; h4 += h1; > + > + h1 = fmix_32(h1); > + h2 = fmix_32(h2); > + h3 = fmix_32(h3); > + h4 = fmix_32(h4); > + > + h1 += h2; h1 += h3; h1 += h4; > + h2 += h1; h3 += h1; h4 += h1; > + > + ((uint32_t*)out)[0] = h1; > + ((uint32_t*)out)[1] = h2; > + ((uint32_t*)out)[2] = h3; > + ((uint32_t*)out)[3] = h4; > +} > + > +//----------------------------------------------------------------------------- > + > +void MurmurHash3_x64_128 ( const void * key, const int len, > + const uint32_t seed, void * out ) > +{ > + const uint8_t * data = (const uint8_t*)key; > + const int nblocks = len / 16; > + > + uint64_t h1 = seed; > + uint64_t h2 = seed; > + > + uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); > + uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); > + > + const uint64_t * blocks; > + const uint8_t * tail; > + > + uint64_t k1; > + uint64_t k2; > + > + int i; > + //---------- > + // body > + > + blocks = (const uint64_t *)(data); > + > + for(i = 0; i < nblocks; i++) > + { > + k1 = getblock_64(blocks,i*2+0); > + k2 = getblock_64(blocks,i*2+1); > + > + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; > + > + h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; > + > + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; > + > + h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; > + } > + > + //---------- > + // tail > + > + tail = (const uint8_t*)(data + nblocks*16); > + > + k1 = 0; > + k2 = 0; > + > + switch(len & 15) > + { > + case 15: k2 ^= ((uint64_t)tail[14]) << 48; > + case 14: k2 ^= ((uint64_t)tail[13]) << 40; > + case 13: k2 ^= ((uint64_t)tail[12]) << 32; > + case 12: k2 ^= ((uint64_t)tail[11]) << 24; > + case 11: k2 ^= ((uint64_t)tail[10]) << 16; > + case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; > + case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; > + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; > + > + case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; > + case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; > + case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; > + case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; > + case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; > + case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; > + case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; > + case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; > + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; > + }; > + > + //---------- > + // finalization > + > + h1 ^= len; h2 ^= len; > + > + h1 += h2; > + h2 += h1; > + > + h1 = fmix_64(h1); > + h2 = fmix_64(h2); > + > + h1 += h2; > + h2 += h1; > + > + ((uint64_t*)out)[0] = h1; > + ((uint64_t*)out)[1] = h2; > +} > + > +//----------------------------------------------------------------------------- > diff --git a/src/murmurhash3.h b/src/murmurhash3.h > new file mode 100644 > index 0000000..c4752cf > --- /dev/null > +++ b/src/murmurhash3.h > @@ -0,0 +1,39 @@ > +// Source: http://code.google.com/p/smhasher/wiki/MurmurHash3 > + > +//----------------------------------------------------------------------------- > +// MurmurHash3 was written by Austin Appleby, and is placed in the public > +// domain. The author hereby disclaims copyright to this source code. > + > +#ifndef _MURMURHASH3_H_ > +#define _MURMURHASH3_H_ > + > +//----------------------------------------------------------------------------- > +// Platform-specific functions and macros > + > +// Microsoft Visual Studio > + > +#if defined(_MSC_VER) > + > +typedef unsigned char uint8_t; > +typedef unsigned long uint32_t; > +typedef unsigned __int64 uint64_t; > + > +// Other compilers > + > +#else // defined(_MSC_VER) > + > +#include <stdint.h> > + > +#endif // !defined(_MSC_VER) > + > +//----------------------------------------------------------------------------- > + > +void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * > out ); > + > +void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * > out ); > + > +void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * > out ); > + > +//----------------------------------------------------------------------------- > + > +#endif // _MURMURHASH3_H_ > diff --git a/src/qxl_image.c b/src/qxl_image.c > index 0ffb031..0b39070 100644 > --- a/src/qxl_image.c > +++ b/src/qxl_image.c > @@ -28,7 +28,7 @@ > #include <assert.h> > #include <stdlib.h> > #include "qxl.h" > -#include "lookup3.h" > +#include "murmurhash3.h" > > typedef struct image_info_t image_info_t; > > @@ -47,7 +47,7 @@ hash_and_copy (const uint8_t *src, int src_stride, > uint8_t *dest, int dest_stride, > int bytes_per_pixel, int width, int height) > { > - unsigned int hash = 0; > + uint32_t hash = 0; > int i; > > for (i = 0; i < height; ++i) > @@ -59,7 +59,8 @@ hash_and_copy (const uint8_t *src, int src_stride, > if (dest) > memcpy (dest_line, src_line, n_bytes); > > - hash = hashlittle (src_line, n_bytes, hash); > + // Hash is not used by anyone right now. > + MurmurHash3_x86_32 (src_line, n_bytes, hash, &hash); > } > > return hash; > @@ -136,6 +137,8 @@ qxl_image_create (qxl_screen_t *qxl, const uint8_t *data, > > data += y * stride + x * Bpp; > > + (void)hash; > + (void)info; // silence warnings until hash gets resurrected > #if 0 > hash = hash_and_copy (data, stride, NULL, -1, Bpp, width, height); > > -- > 1.7.8.4 > > _______________________________________________ > Spice-devel mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/spice-devel _______________________________________________ Spice-devel mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/spice-devel
