Author: cutting Date: Mon May 21 16:01:20 2007 New Revision: 540342 URL: http://svn.apache.org/viewvc?view=rev&rev=540342 Log: HADOOP-1385. Fix MD5Hash#hashCode() to generally hash to more than 256 values. Contributed by Owen.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MD5Hash.java lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMD5Hash.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=540342&r1=540341&r2=540342 ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Mon May 21 16:01:20 2007 @@ -452,6 +452,9 @@ 122. HADOOP-1388. Fix a potential NullPointerException in web ui. (Devaraj Das via cutting) +123. HADOOP-1385. Fix MD5Hash#hashCode() to generally hash to more + than 256 values. (omalley via cutting) + Release 0.12.3 - 2007-04-06 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MD5Hash.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MD5Hash.java?view=diff&rev=540342&r1=540341&r2=540342 ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MD5Hash.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MD5Hash.java Mon May 21 16:01:20 2007 @@ -116,6 +116,17 @@ return value; } + /** + * Return a 32-bit digest of the MD5. + * @return the first 4 bytes of the md5 + */ + public int quarterDigest() { + int value = 0; + for (int i = 0; i < 4; i++) + value |= ((digest[i] & 0xff) << (8*(3-i))); + return value; + } + /** Returns true iff <code>o</code> is an MD5Hash whose digest contains the * same values. */ public boolean equals(Object o) { @@ -125,13 +136,11 @@ return Arrays.equals(this.digest, other.digest); } - /** Returns a hash code value for this object.*/ + /** Returns a hash code value for this object. + * Only uses the first 4 bytes, since md5s are evenly distributed. + */ public int hashCode() { - return // xor four ints - (digest[ 0] | (digest[ 1]<<8) | (digest[ 2]<<16) | (digest[ 3]<<24)) ^ - (digest[ 4] | (digest[ 5]<<8) | (digest[ 6]<<16) | (digest[ 7]<<24)) ^ - (digest[ 8] | (digest[ 9]<<8) | (digest[10]<<16) | (digest[11]<<24)) ^ - (digest[12] | (digest[13]<<8) | (digest[14]<<16) | (digest[15]<<24)); + return quarterDigest(); } Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMD5Hash.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMD5Hash.java?view=diff&rev=540342&r1=540341&r2=540342 ============================================================================== --- lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMD5Hash.java (original) +++ lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMD5Hash.java Mon May 21 16:01:20 2007 @@ -46,6 +46,16 @@ MD5Hash md5HashFF = new MD5Hash(new byte[] {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}); + + MD5Hash orderedHash = new MD5Hash(new byte[]{1,2,3,4,5,6,7,8,9,10,11,12, + 13,14,15,16}); + MD5Hash backwardHash = new MD5Hash(new byte[]{-1,-2,-3,-4,-5,-6,-7,-8, + -9,-10,-11,-12, -13, -14, + -15,-16}); + MD5Hash closeHash1 = new MD5Hash(new byte[]{-1,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0}); + MD5Hash closeHash2 = new MD5Hash(new byte[]{-1,1,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0}); // test i/o TestWritable.testWritable(md5Hash); @@ -67,6 +77,13 @@ assertEquals(md5Hash00, new MD5Hash(md5Hash00.toString())); assertEquals(md5HashFF, new MD5Hash(md5HashFF.toString())); + assertEquals(0x01020304, orderedHash.quarterDigest()); + assertEquals(0xfffefdfc, backwardHash.quarterDigest()); + + assertEquals(0x0102030405060708L, orderedHash.halfDigest()); + assertEquals(0xfffefdfcfbfaf9f8L, backwardHash.halfDigest()); + assertTrue("hash collision", + closeHash1.hashCode() != closeHash2.hashCode()); } }