This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 8da3d11d94 [GLUTEN-11088] Fix GlutenTakeOrderedAndProjectSuite in
Spark-4.0 (#11194)
8da3d11d94 is described below
commit 8da3d11d9436a3bd702ec5abd209784c088ce0ae
Author: Mingliang Zhu <[email protected]>
AuthorDate: Thu Nov 27 16:34:43 2025 +0800
[GLUTEN-11088] Fix GlutenTakeOrderedAndProjectSuite in Spark-4.0 (#11194)
---
.../gluten/utils/velox/VeloxTestSettings.scala | 2 +-
.../GlutenTakeOrderedAndProjectSuite.scala | 46 +++++++++++++++++++++-
2 files changed, 45 insertions(+), 3 deletions(-)
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 39786d859a..cd505ffa21 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -636,7 +636,7 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenSQLWindowFunctionSuite]
.exclude("test with low buffer spill threshold")
enableSuite[GlutenTakeOrderedAndProjectSuite]
- // TODO: fix in Spark-4.0
+ // The results of rand() differ between vanilla spark and velox.
.exclude("SPARK-47104: Non-deterministic expressions in projection")
enableSuite[GlutenSessionExtensionSuite]
enableSuite[TestFileSourceScanExecTransformer]
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
index bc231e52ad..2731e05471 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
@@ -16,8 +16,50 @@
*/
package org.apache.spark.sql.execution
-import org.apache.spark.sql.GlutenSQLTestsBaseTrait
+import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, Rand}
+import org.apache.spark.sql.types.{IntegerType, StructType}
class GlutenTakeOrderedAndProjectSuite
extends TakeOrderedAndProjectSuite
- with GlutenSQLTestsBaseTrait {}
+ with GlutenSQLTestsBaseTrait {
+
+ private def noOpFilter(plan: SparkPlan): SparkPlan =
FilterExec(Literal(true), plan)
+
+ testGluten("SPARK-47104: Non-deterministic expressions in projection") {
+ val expected = (input: SparkPlan) => {
+ GlobalLimitExec(limit, LocalLimitExec(limit, SortExec(sortOrder, true,
input)))
+ }
+ val schema = StructType.fromDDL("a int, b int, c double")
+ val rdd = sparkContext.parallelize(
+ Seq(
+ Row(1, 2, 0.6027633705776989d),
+ Row(2, 3, 0.7151893651681639d),
+ Row(3, 4, 0.5488135024422883d)),
+ 1)
+ val df = spark.createDataFrame(rdd, schema)
+ val projection = df.queryExecution.sparkPlan.output.take(2) :+
+ Alias(Rand(Literal(0, IntegerType)), "_uuid")()
+
+ // test executeCollect
+ checkThatPlansAgree(
+ df,
+ input =>
+ TakeOrderedAndProjectExec(limit, sortOrder, projection,
SortExec(sortOrder, false, input)),
+ input => expected(input),
+ sortAnswers = false)
+
+ // test doExecute
+ checkThatPlansAgree(
+ df,
+ input =>
+ noOpFilter(
+ TakeOrderedAndProjectExec(
+ limit,
+ sortOrder,
+ projection,
+ SortExec(sortOrder, false, input))),
+ input => expected(input),
+ sortAnswers = false)
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]