maropu commented on a change in pull request #32496:
URL: https://github.com/apache/spark/pull/32496#discussion_r631478745
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
##########
@@ -369,11 +369,25 @@ abstract class HashExpression[E] extends Expression {
protected def genHashBoolean(input: String, result: String): String =
genHashInt(s"$input ? 1 : 0", result)
- protected def genHashFloat(input: String, result: String): String =
- genHashInt(s"Float.floatToIntBits($input)", result)
+ protected def genHashFloat(input: String, result: String): String = {
+ s"""
+ |if(Float.floatToIntBits($input) == Float.floatToIntBits(-0.0f)) {
Review comment:
Why do we need to use `floatToIntBits` here? `$input == -0.0f` instead?
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
##########
@@ -369,11 +369,25 @@ abstract class HashExpression[E] extends Expression {
protected def genHashBoolean(input: String, result: String): String =
genHashInt(s"$input ? 1 : 0", result)
- protected def genHashFloat(input: String, result: String): String =
- genHashInt(s"Float.floatToIntBits($input)", result)
+ protected def genHashFloat(input: String, result: String): String = {
+ s"""
+ |if(Float.floatToIntBits($input) == Float.floatToIntBits(-0.0f)) {
+ | ${genHashInt(s"Float.floatToIntBits(0.0f)", result)}
+ |} else {
+ | ${genHashInt(s"Float.floatToIntBits($input)", result)}
+ |}
+ """.stripMargin
+ }
- protected def genHashDouble(input: String, result: String): String =
- genHashLong(s"Double.doubleToLongBits($input)", result)
+ protected def genHashDouble(input: String, result: String): String = {
+ s"""
+ |if(Double.doubleToLongBits($input) == Double.doubleToLongBits(-0.0d)) {
Review comment:
ditto
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
##########
@@ -708,6 +708,16 @@ class HashExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(HiveHash(Seq(yearMonth)), 1234)
}
+ test("SPARK-35207: Compute hash consistent between -0.0 and 0.0") {
+ def checkResult(exprs1: Expression, exprs2: Expression): Unit = {
+ assert(Murmur3Hash(Seq(exprs1), 42).eval() == Murmur3Hash(Seq(exprs2),
42).eval())
+ assert(XxHash64(Seq(exprs1), 42).eval() == XxHash64(Seq(exprs2),
42).eval())
+ assert(HiveHash(Seq(exprs1)).eval() == HiveHash(Seq(exprs2)).eval())
+ }
+ checkResult(Literal.create(0D, DoubleType), Literal.create(-0D,
DoubleType))
Review comment:
Please use `checkEvaluation` instead.
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
##########
@@ -654,4 +654,30 @@ class WholeStageCodegenSuite extends QueryTest with
SharedSparkSession
}
}
}
+
+ test("SPARK-35207: Compute hash consistent between -0.0 and 0.0 doubles with
Codegen") {
Review comment:
I think we don't need to add tests here (It's okay just to add tests in
`HashExprSuite`.
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
##########
@@ -708,6 +708,16 @@ class HashExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(HiveHash(Seq(yearMonth)), 1234)
}
+ test("SPARK-35207: Compute hash consistent between -0.0 and 0.0") {
+ def checkResult(exprs1: Expression, exprs2: Expression): Unit = {
+ assert(Murmur3Hash(Seq(exprs1), 42).eval() == Murmur3Hash(Seq(exprs2),
42).eval())
+ assert(XxHash64(Seq(exprs1), 42).eval() == XxHash64(Seq(exprs2),
42).eval())
+ assert(HiveHash(Seq(exprs1)).eval() == HiveHash(Seq(exprs2)).eval())
+ }
+ checkResult(Literal.create(0D, DoubleType), Literal.create(-0D,
DoubleType))
Review comment:
Could you add float tests here, too?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]