Evan Martin, if you're not already aware of that, murmurhash fails
spectacularly on anything with strict alignment.

Idea stolen from siphash

btw, it looks like using siphash instead of murmurhash might be a good
idea. At least THOSE guys know how to write portable code, contrary to
the google dude who wrote murmur hash:

https://131002.net/siphash/

Just grepping for murmurhash, I see it's also used for 64 bit data,
so use the same modification there (there's a chance that data is 32 bit
aligned, but I'm not THAT sure about that)



Index: Makefile
===================================================================
RCS file: /cvs/ports/devel/ninja/Makefile,v
retrieving revision 1.4
diff -u -p -r1.4 Makefile
--- Makefile    2 Jul 2013 06:37:24 -0000       1.4
+++ Makefile    2 Jul 2013 14:33:38 -0000
@@ -3,7 +3,7 @@
 COMMENT =              small build system with a focus on speed
 V =                    1.3.4
 DISTNAME =             ninja-${V}
-REVISION =             0
+REVISION =             1
 CATEGORIES =           devel
 HOMEPAGE =             http://martine.github.io/ninja/
 MAINTAINER =           Matthew Dempsky <[email protected]>
Index: patches/patch-src_build_log_cc
===================================================================
RCS file: patches/patch-src_build_log_cc
diff -N patches/patch-src_build_log_cc
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-src_build_log_cc      2 Jul 2013 14:33:38 -0000
@@ -0,0 +1,50 @@
+$OpenBSD$
+--- src/build_log.cc.orig      Tue Jun  4 20:47:31 2013
++++ src/build_log.cc   Tue Jul  2 18:30:01 2013
+@@ -54,26 +54,34 @@ uint64_t MurmurHash64A(const void* key, size_t len) {
+   const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
+   const int r = 47;
+   uint64_t h = seed ^ (len * m);
+-  const uint64_t * data = (const uint64_t *)key;
+-  const uint64_t * end = data + (len/8);
+-  while (data != end) {
++  const unsigned char* data = (const unsigned char *)key;
++  while (len > 8) {
+     uint64_t k = *data++;
++      uint64_t(data[0]) |
++      (uint64_t(data[1]) << 8) |
++      (uint64_t(data[2]) << 16) |
++      (uint64_t(data[3]) << 24) |
++      (uint64_t(data[4]) << 32) |
++      (uint64_t(data[5]) << 40) |
++      (uint64_t(data[6]) << 48) |
++      (uint64_t(data[7]) << 56);
+     k *= m;
+     k ^= k >> r;
+     k *= m;
+     h ^= k;
++    data += 8;
++    len -= 8;
+     h *= m;
+   }
+-  const unsigned char* data2 = (const unsigned char*)data;
+-  switch (len & 7)
++  switch (len)
+   {
+-  case 7: h ^= uint64_t(data2[6]) << 48;
+-  case 6: h ^= uint64_t(data2[5]) << 40;
+-  case 5: h ^= uint64_t(data2[4]) << 32;
+-  case 4: h ^= uint64_t(data2[3]) << 24;
+-  case 3: h ^= uint64_t(data2[2]) << 16;
+-  case 2: h ^= uint64_t(data2[1]) << 8;
+-  case 1: h ^= uint64_t(data2[0]);
++  case 7: h ^= uint64_t(data[6]) << 48;
++  case 6: h ^= uint64_t(data[5]) << 40;
++  case 5: h ^= uint64_t(data[4]) << 32;
++  case 4: h ^= uint64_t(data[3]) << 24;
++  case 3: h ^= uint64_t(data[2]) << 16;
++  case 2: h ^= uint64_t(data[1]) << 8;
++  case 1: h ^= uint64_t(data[0]);
+           h *= m;
+   };
+   h ^= h >> r;
Index: patches/patch-src_hash_map_h
===================================================================
RCS file: /cvs/ports/devel/ninja/patches/patch-src_hash_map_h,v
retrieving revision 1.1
diff -u -p -r1.1 patch-src_hash_map_h
--- patches/patch-src_hash_map_h        2 Jul 2013 06:37:24 -0000       1.1
+++ patches/patch-src_hash_map_h        2 Jul 2013 14:33:38 -0000
@@ -2,15 +2,18 @@ $OpenBSD: patch-src_hash_map_h,v 1.1 201
 
 Work-around unaligned accesses on strict arches such as sparc64/mips64(el).
 
---- src/hash_map.h.orig        Mon Jul  1 22:42:26 2013
-+++ src/hash_map.h     Mon Jul  1 22:42:54 2013
-@@ -26,7 +26,8 @@ unsigned int MurmurHash2(const void* key, size_t len) 
+--- src/hash_map.h.orig        Tue Jun  4 20:47:31 2013
++++ src/hash_map.h     Tue Jul  2 18:22:43 2013
+@@ -26,7 +26,11 @@ unsigned int MurmurHash2(const void* key, size_t len) 
    unsigned int h = seed ^ len;
    const unsigned char * data = (const unsigned char *)key;
    while (len >= 4) {
 -    unsigned int k = *(unsigned int *)data;
-+    unsigned int k;
-+    memcpy(&k, data, sizeof k);
++    unsigned int k = 
++      (unsigned int)(data[0]) |
++      ((unsigned int)(data[1]) << 8) |
++      ((unsigned int)(data[2]) << 16) |
++      ((unsigned int)(data[3]) << 24);
      k *= m;
      k ^= k >> r;
      k *= m;

Reply via email to