Author: tdunning
Date: Fri Sep 17 18:41:29 2010
New Revision: 998245
URL: http://svn.apache.org/viewvc?rev=998245&view=rev
Log:
MAHOUT-503 - added tests for 32 bit murmur hash.
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/vectors/MurmurHashTest.java
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/vectors/MurmurHashTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectors/MurmurHashTest.java?rev=998245&r1=998244&r2=998245&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/vectors/MurmurHashTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/vectors/MurmurHashTest.java
Fri Sep 17 18:41:29 2010
@@ -22,23 +22,99 @@ import org.apache.mahout.common.MahoutTe
import org.junit.Test;
import java.io.UnsupportedEncodingException;
-import java.nio.ByteBuffer;
public class MurmurHashTest extends MahoutTestCase {
- @Test
- public void testForLotsOfChange() throws UnsupportedEncodingException {
- long h1 = MurmurHash.hash64A("abc".getBytes(Charsets.UTF_8), 0);
- long h2 = MurmurHash.hash64A("abc ".getBytes(Charsets.UTF_8), 0);
- int flipCount = Long.bitCount(h1 ^ h2);
- assertTrue("Small changes should result in lots of bit flips, only
found " + flipCount, flipCount > 25);
+ @Test
+ public void testForLotsOfChange64() throws UnsupportedEncodingException {
+ long h1 = MurmurHash.hash64A("abc".getBytes(Charsets.UTF_8), 0);
+ long h2 = MurmurHash.hash64A("abc ".getBytes(Charsets.UTF_8), 0);
+ int flipCount = Long.bitCount(h1 ^ h2);
+ assertTrue("Small changes should result in lots of bit flips, only found "
+ flipCount, flipCount > 25);
+ }
+
+ @Test
+ public void testHash64() {
+ // test data generated by running MurmurHash2_64.cpp
+ assertEquals(0x9cc9c33498a95efbL,
MurmurHash.hash64A("abc".getBytes(Charsets.UTF_8), 0));
+ assertEquals(0xd2c8c9b470122bddL, MurmurHash.hash64A("abc def ghi jkl
".getBytes(Charsets.UTF_8), 0));
+ assertEquals(0xcd37895736a81cbcL, MurmurHash.hash64A("abc def ghi jkl
moreGoo".getBytes(Charsets.UTF_8), 0));
+ }
+
+ @Test
+ public void testForLotsOfChange32() throws UnsupportedEncodingException {
+ int h1 = MurmurHash.hash("abc".getBytes(Charsets.UTF_8), 0);
+ int h2 = MurmurHash.hash("abc ".getBytes(Charsets.UTF_8), 0);
+ int flipCount = Integer.bitCount(h1 ^ h2);
+ assertTrue("Small changes should result in lots of bit flips, only found "
+ flipCount, flipCount > 14);
+ }
+
+ // tests lifted from http://dmy999.com/article/50/murmurhash-2-java-port
+ // code was marked with this notice:
+ // released to the public domain - [email protected]
+
+ // expected values are generated from the output of a C driver that
+ // ran against the same input
+
+ @Test
+ public void testChangingSeed() {
+ // use a fixed key
+ byte[] key = new byte[]{0x4E, (byte) 0xE3, (byte) 0x91, 0x00,
+ 0x10, (byte) 0x8F, (byte) 0xFF};
+
+ int[] expected = {0xeef8be32, 0x8109dec6, 0x9aaf4192, 0xc1bcaf1c,
+ 0x821d2ce4, 0xd45ed1df, 0x6c0357a7, 0x21d4e845,
+ 0xfa97db50, 0x2f1985c8, 0x5d69782a, 0x0d6e4b85,
+ 0xe7d9cf6b, 0x337e6b49, 0xe1606944, 0xccc18ae8};
+
+ for (int i = 0; i < expected.length; i++) {
+ int expectedHash = expected[i];
+ int hash = MurmurHash.hash(key, i);
+ assertEquals("i = " + i, expectedHash, hash);
}
+ }
- @Test
- public void testHash64() {
- // test data generated by running MurmurHash2_64.cpp
- assertEquals(0x9cc9c33498a95efbL,
MurmurHash.hash64A("abc".getBytes(Charsets.UTF_8), 0));
- assertEquals(0xd2c8c9b470122bddL, MurmurHash.hash64A("abc def ghi jkl
".getBytes(Charsets.UTF_8), 0));
- assertEquals(0xcd37895736a81cbcL, MurmurHash.hash64A("abc def ghi jkl
moreGoo".getBytes(Charsets.UTF_8), 0));
+ @Test
+ public void testChangingKey() {
+ byte[] key = new byte[133];
+
+ int[] expected = {0xd743ae0b, 0xf1b461c6, 0xa45a6ceb, 0xdb15e003,
+ 0x877721a4, 0xc30465f1, 0xfb658ba4, 0x1adf93b2,
+ 0xe40a7931, 0x3da52db0, 0xbf523511, 0x1efaf273,
+ 0xe628c1dd, 0x9a0344df, 0x901c99fc, 0x5ae1aa44};
+ for (int i = 0; i < 16; i++) {
+ // keep seed constant, generate a known key pattern
+ setKey(key, i);
+ int expectedHash = expected[i];
+ int hash = MurmurHash.hash(key, 0x1234ABCD);
+ assertEquals("i = " + i, expectedHash, hash);
+ }
+ }
+
+ @Test
+ public void testChangingKeyLength() {
+ int[] expected = {0xa0c72f8e, 0x29c2f97e, 0x00ca8bba, 0x88387876,
+ 0xe203ce49, 0x58d75952, 0xab84febe, 0x98153c65,
+ 0xcbb38375, 0x6ea1a28b, 0x9afa8f55, 0xfb890eb6,
+ 0x9516cc49, 0x6408a8eb, 0xbb12d3e6, 0x00fb7519};
+ // vary the key and the length
+ for (int i = 0; i < 16; i++) {
+ byte[] key = new byte[i];
+ setKey(key, i);
+ int expectedHash = expected[i];
+ int hash = MurmurHash.hash(key, 0x7870AAFF);
+ assertEquals("i = " + i, expectedHash, hash);
}
+ }
+
+ /**
+ * Fill a key with a known pattern (incrementing numbers)
+ */
+ private void setKey(byte[] key, int start) {
+ for (int i = 0; i < key.length; i++) {
+ key[i] = (byte) ((start + i) & 0xFF);
+ }
+ }
+
+
}