Repository: spark Updated Branches: refs/heads/branch-1.5 0abbc1813 -> ccda27a9b
[SPARK-10325] Override hashCode() for public Row This commit fixes an issue where the public SQL `Row` class did not override `hashCode`, causing it to violate the hashCode() + equals() contract. To fix this, I simply ported the `hashCode` implementation from the 1.4.x version of `Row`. Author: Josh Rosen <[email protected]> Closes #8500 from JoshRosen/SPARK-10325 and squashes the following commits: 51ffea1 [Josh Rosen] Override hashCode() for public Row. (cherry picked from commit d3f87dc39480f075170817bbd00142967a938078) Signed-off-by: Michael Armbrust <[email protected]> Conflicts: sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ccda27a9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ccda27a9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ccda27a9 Branch: refs/heads/branch-1.5 Commit: ccda27a9beb97b11c2522a0700165fd849af44b1 Parents: 0abbc18 Author: Josh Rosen <[email protected]> Authored: Fri Aug 28 11:51:42 2015 -0700 Committer: Michael Armbrust <[email protected]> Committed: Fri Aug 28 12:05:37 2015 -0700 ---------------------------------------------------------------------- .../src/main/scala/org/apache/spark/sql/Row.scala | 15 ++++++++++++++- .../test/scala/org/apache/spark/sql/RowSuite.scala | 9 +++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ccda27a9/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala index ec895af..088b7e1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql -import org.apache.spark.sql.catalyst.InternalRow +import scala.util.hashing.MurmurHash3 + import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.types.StructType @@ -410,6 +411,18 @@ trait Row extends Serializable { true } + override def hashCode: Int = { + // Using Scala's Seq hash code implementation. + var n = 0 + var h = MurmurHash3.seqSeed + val len = length + while (n < len) { + h = MurmurHash3.mix(h, apply(n).##) + n += 1 + } + MurmurHash3.finalizeHash(h, n) + } + /* ---------------------- utility methods for Scala ---------------------- */ /** http://git-wip-us.apache.org/repos/asf/spark/blob/ccda27a9/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala index 795d4e9..77ccd6f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala @@ -85,4 +85,13 @@ class RowSuite extends SparkFunSuite with SharedSQLContext { val r2 = Row(Double.NaN) assert(r1 === r2) } + + test("equals and hashCode") { + val r1 = Row("Hello") + val r2 = Row("Hello") + assert(r1 === r2) + assert(r1.hashCode() === r2.hashCode()) + val r3 = Row("World") + assert(r3.hashCode() != r1.hashCode()) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
