This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ffef3d4 [SPARK-26950][SQL][TEST] Make RandomDataGenerator use Float.NaN or Double.NaN for all NaN values ffef3d4 is described below commit ffef3d40741b0be321421aa52a6e17a26d89f541 Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Fri Feb 22 12:25:26 2019 +0800 [SPARK-26950][SQL][TEST] Make RandomDataGenerator use Float.NaN or Double.NaN for all NaN values ## What changes were proposed in this pull request? Apache Spark uses the predefined `Float.NaN` and `Double.NaN` for NaN values, but there exists more NaN values with different binary presentations. ```scala scala> java.nio.ByteBuffer.allocate(4).putFloat(Float.NaN).array res1: Array[Byte] = Array(127, -64, 0, 0) scala> val x = java.lang.Float.intBitsToFloat(-6966608) x: Float = NaN scala> java.nio.ByteBuffer.allocate(4).putFloat(x).array res2: Array[Byte] = Array(-1, -107, -78, -80) ``` Since users can have these values, `RandomDataGenerator` generates these NaN values. However, this causes `checkEvaluationWithUnsafeProjection` failures due to the difference between `UnsafeRow` binary presentation. The following is the UT failure instance. This PR aims to fix this UT flakiness. - https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/102528/testReport/ ## How was this patch tested? Pass the Jenkins with the newly added test cases. Closes #23851 from dongjoon-hyun/SPARK-26950. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/sql/RandomDataGenerator.scala | 24 +++++++++++++++-- .../spark/sql/RandomDataGeneratorSuite.scala | 31 ++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala index 8ae3ff5..d361e62 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql -import java.lang.Double.longBitsToDouble -import java.lang.Float.intBitsToFloat import java.math.MathContext import scala.collection.mutable @@ -70,6 +68,28 @@ object RandomDataGenerator { } /** + * A wrapper of Float.intBitsToFloat to use a unique NaN value for all NaN values. + * This prevents `checkEvaluationWithUnsafeProjection` from failing due to + * the difference between `UnsafeRow` binary presentation for NaN. + * This is visible for testing. + */ + def intBitsToFloat(bits: Int): Float = { + val value = java.lang.Float.intBitsToFloat(bits) + if (value.isNaN) Float.NaN else value + } + + /** + * A wrapper of Double.longBitsToDouble to use a unique NaN value for all NaN values. + * This prevents `checkEvaluationWithUnsafeProjection` from failing due to + * the difference between `UnsafeRow` binary presentation for NaN. + * This is visible for testing. + */ + def longBitsToDouble(bits: Long): Double = { + val value = java.lang.Double.longBitsToDouble(bits) + if (value.isNaN) Double.NaN else value + } + + /** * Returns a randomly generated schema, based on the given accepted types. * * @param numFields the number of fields in this schema diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala index 3c2f8a2..3e62ca0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala @@ -17,6 +17,9 @@ package org.apache.spark.sql +import java.nio.ByteBuffer +import java.util.Arrays + import scala.util.Random import org.apache.spark.SparkFunSuite @@ -106,4 +109,32 @@ class RandomDataGeneratorSuite extends SparkFunSuite { assert(deviation.toDouble / expectedTotalElements < 2e-1) } } + + test("Use Float.NaN for all NaN values") { + val bits = -6966608 + val nan1 = java.lang.Float.intBitsToFloat(bits) + val nan2 = RandomDataGenerator.intBitsToFloat(bits) + assert(nan1.isNaN) + assert(nan2.isNaN) + + val arrayExpected = ByteBuffer.allocate(4).putFloat(Float.NaN).array + val array1 = ByteBuffer.allocate(4).putFloat(nan1).array + val array2 = ByteBuffer.allocate(4).putFloat(nan2).array + assert(!Arrays.equals(array1, arrayExpected)) + assert(Arrays.equals(array2, arrayExpected)) + } + + test("Use Double.NaN for all NaN values") { + val bits = -6966608 + val nan1 = java.lang.Double.longBitsToDouble(bits) + val nan2 = RandomDataGenerator.longBitsToDouble(bits) + assert(nan1.isNaN) + assert(nan2.isNaN) + + val arrayExpected = ByteBuffer.allocate(8).putDouble(Double.NaN).array + val array1 = ByteBuffer.allocate(8).putDouble(nan1).array + val array2 = ByteBuffer.allocate(8).putDouble(nan2).array + assert(!Arrays.equals(array1, arrayExpected)) + assert(Arrays.equals(array2, arrayExpected)) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org