This is an automated email from the ASF dual-hosted git repository.

yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 8da3d11d94 [GLUTEN-11088] Fix GlutenTakeOrderedAndProjectSuite in 
Spark-4.0 (#11194)
8da3d11d94 is described below

commit 8da3d11d9436a3bd702ec5abd209784c088ce0ae
Author: Mingliang Zhu <[email protected]>
AuthorDate: Thu Nov 27 16:34:43 2025 +0800

    [GLUTEN-11088] Fix GlutenTakeOrderedAndProjectSuite in Spark-4.0 (#11194)
---
 .../gluten/utils/velox/VeloxTestSettings.scala     |  2 +-
 .../GlutenTakeOrderedAndProjectSuite.scala         | 46 +++++++++++++++++++++-
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 39786d859a..cd505ffa21 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -636,7 +636,7 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenSQLWindowFunctionSuite]
     .exclude("test with low buffer spill threshold")
   enableSuite[GlutenTakeOrderedAndProjectSuite]
-    // TODO: fix in Spark-4.0
+    // The results of rand() differ between vanilla spark and velox.
     .exclude("SPARK-47104: Non-deterministic expressions in projection")
   enableSuite[GlutenSessionExtensionSuite]
   enableSuite[TestFileSourceScanExecTransformer]
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
index bc231e52ad..2731e05471 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
@@ -16,8 +16,50 @@
  */
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.GlutenSQLTestsBaseTrait
+import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, Rand}
+import org.apache.spark.sql.types.{IntegerType, StructType}
 
 class GlutenTakeOrderedAndProjectSuite
   extends TakeOrderedAndProjectSuite
-  with GlutenSQLTestsBaseTrait {}
+  with GlutenSQLTestsBaseTrait {
+
+  private def noOpFilter(plan: SparkPlan): SparkPlan = 
FilterExec(Literal(true), plan)
+
+  testGluten("SPARK-47104: Non-deterministic expressions in projection") {
+    val expected = (input: SparkPlan) => {
+      GlobalLimitExec(limit, LocalLimitExec(limit, SortExec(sortOrder, true, 
input)))
+    }
+    val schema = StructType.fromDDL("a int, b int, c double")
+    val rdd = sparkContext.parallelize(
+      Seq(
+        Row(1, 2, 0.6027633705776989d),
+        Row(2, 3, 0.7151893651681639d),
+        Row(3, 4, 0.5488135024422883d)),
+      1)
+    val df = spark.createDataFrame(rdd, schema)
+    val projection = df.queryExecution.sparkPlan.output.take(2) :+
+      Alias(Rand(Literal(0, IntegerType)), "_uuid")()
+
+    // test executeCollect
+    checkThatPlansAgree(
+      df,
+      input =>
+        TakeOrderedAndProjectExec(limit, sortOrder, projection, 
SortExec(sortOrder, false, input)),
+      input => expected(input),
+      sortAnswers = false)
+
+    // test doExecute
+    checkThatPlansAgree(
+      df,
+      input =>
+        noOpFilter(
+          TakeOrderedAndProjectExec(
+            limit,
+            sortOrder,
+            projection,
+            SortExec(sortOrder, false, input))),
+      input => expected(input),
+      sortAnswers = false)
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to