This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1a577b8f8881 [SPARK-46980][SQL][MINOR] Avoid using internal APIs in dataframe end-to-end tests 1a577b8f8881 is described below commit 1a577b8f88816eabd78b3924f1e159f2593209c5 Author: Mark Jarvin <mark.jar...@databricks.com> AuthorDate: Tue Feb 6 09:11:19 2024 +0800 [SPARK-46980][SQL][MINOR] Avoid using internal APIs in dataframe end-to-end tests ### What changes were proposed in this pull request? Avoid using the internal `XORShiftRandom` API in tests by instead using public APIs to collect the random values. ### Why are the changes needed? Testing using internal APIs introduces unnecessary coupling between the implementation and the test. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Ran tests using SBT ### Was this patch authored or co-authored using generative AI tooling? No. Closes #45034 from markj-db/test-public-api-rand. Authored-by: Mark Jarvin <mark.jar...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala | 7 ++----- sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 4 +--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala index d6cf77572731..cbc39557ce4c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala @@ -305,11 +305,8 @@ class DataFrameSetOperationsSuite extends QueryTest // When generating expected results at here, we need to follow the implementation of // Rand expression. def expected(df: DataFrame): Seq[Row] = - df.rdd.collectPartitions().zipWithIndex.flatMap { - case (data, index) => - val rng = new org.apache.spark.util.random.XORShiftRandom(7 + index) - data.filter(_.getInt(0) < rng.nextDouble() * 10) - }.toSeq + df.select($"i", rand(7) * 10).as[(Long, Double)].collect() + .filter(r => r._1 < r._2).map(r => Row(r._1)).toImmutableArraySeq val union = df1.union(df2) checkAnswer( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 1442320e9a94..d85c7b7dfa3d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -57,7 +57,6 @@ import org.apache.spark.tags.SlowSQLTest import org.apache.spark.unsafe.types.CalendarInterval import org.apache.spark.util.ArrayImplicits._ import org.apache.spark.util.Utils -import org.apache.spark.util.random.XORShiftRandom @SlowSQLTest class DataFrameSuite extends QueryTest @@ -1922,8 +1921,7 @@ class DataFrameSuite extends QueryTest test("SPARK-9083: sort with non-deterministic expressions") { val seed = 33 val df = (1 to 100).map(Tuple1.apply).toDF("i").repartition(1) - val random = new XORShiftRandom(seed) - val expected = (1 to 100).map(_ -> random.nextDouble()).sortBy(_._2).map(_._1) + val expected = df.select($"i", rand(seed)).as[(Long, Double)].collect().sortBy(_._2).map(_._1) val actual = df.sort(rand(seed)).collect().map(_.getInt(0)) assert(expected === actual) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org