Updated Branches: refs/heads/master 51aa9d6e9 -> 718cc803f
Fixes to AppendOnlyMap: - Use Murmur Hash 3 finalization step to scramble the bits of HashCode instead of the simpler version in java.util.HashMap; the latter one had trouble with ranges of consecutive integers. Murmur Hash 3 is used by fastutil. - Use Object.equals() instead of Scala's == to compare keys, because the latter does extra casts for numeric types (see the equals method in https://github.com/scala/scala/blob/master/src/library/scala/runtime/BoxesRunTime.java) Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/7535d7fb Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/7535d7fb Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/7535d7fb Branch: refs/heads/master Commit: 7535d7fbcbe3c0c2515a2d17a806fa523917e398 Parents: 51aa9d6 Author: Matei Zaharia <[email protected]> Authored: Sat Nov 23 17:21:37 2013 -0800 Committer: Matei Zaharia <[email protected]> Committed: Sat Nov 23 17:21:37 2013 -0800 ---------------------------------------------------------------------- .../scala/org/apache/spark/util/AppendOnlyMap.scala | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7535d7fb/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala index f60deaf..8542541 100644 --- a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala +++ b/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala @@ -56,7 +56,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi var i = 1 while (true) { val curKey = data(2 * pos) - if (k.eq(curKey) || k == curKey) { + if (k.eq(curKey) || k.equals(curKey)) { return data(2 * pos + 1).asInstanceOf[V] } else if (curKey.eq(null)) { return null.asInstanceOf[V] @@ -104,7 +104,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi var i = 1 while (true) { val curKey = data(2 * pos) - if (k.eq(curKey) || k == curKey) { + if (k.eq(curKey) || k.equals(curKey)) { val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V]) data(2 * pos + 1) = newValue.asInstanceOf[AnyRef] return newValue @@ -167,12 +167,11 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi } /** - * Re-hash a value to deal better with hash functions that don't differ - * in the lower bits, similar to java.util.HashMap + * Re-hash a value to deal better with hash functions that don't differ in the lower bits. + * We use the Murmur Hash 3 finalization step that's also used in fastutil. */ private def rehash(h: Int): Int = { - val r = h ^ (h >>> 20) ^ (h >>> 12) - r ^ (r >>> 7) ^ (r >>> 4) + it.unimi.dsi.fastutil.HashCommon.murmurHash3(h) } /** @@ -190,7 +189,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi data(2 * pos) = key data(2 * pos + 1) = value.asInstanceOf[AnyRef] return true - } else if (curKey.eq(key) || curKey == key) { + } else if (curKey.eq(key) || curKey.equals(key)) { data(2 * pos + 1) = value.asInstanceOf[AnyRef] return false } else {
