This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new c8f45ec4ec [GLUTEN-11088][VL] Follow-up: Spark 4.0: Fix remaining 
error in ArrowEvalPythonExecSuite (#11377)
c8f45ec4ec is described below

commit c8f45ec4eccce828e02cf72087b1c58fbffb6a18
Author: Hongze Zhang <[email protected]>
AuthorDate: Wed Jan 7 18:25:43 2026 +0000

    [GLUTEN-11088][VL] Follow-up: Spark 4.0: Fix remaining error in 
ArrowEvalPythonExecSuite (#11377)
---
 .../python/ArrowEvalPythonExecSuite.scala          | 35 +++++++++++++++++-----
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala
index f8e2554da7..7747bd2193 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala
@@ -21,6 +21,8 @@ import org.apache.gluten.execution.WholeStageTransformerSuite
 import org.apache.spark.SparkConf
 import org.apache.spark.api.python.ColumnarArrowEvalPythonExec
 import org.apache.spark.sql.IntegratedUDFTestUtils
+import org.apache.spark.sql.types.{DataType, LongType, StringType}
+import org.apache.spark.util.SparkVersionUtil
 
 class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite {
 
@@ -30,7 +32,10 @@ class ArrowEvalPythonExecSuite extends 
WholeStageTransformerSuite {
 
   override protected val resourcePath: String = "/tpch-data-parquet"
   override protected val fileFormat: String = "parquet"
-  val pyarrowTestUDF = TestScalarPandasUDF(name = "pyarrowUDF")
+  private val pyarrowTestUDFString =
+    newTestScalarPandasUDF(name = "pyarrowUDF", returnType = Some(StringType))
+  private val pyarrowTestUDFLong =
+    newTestScalarPandasUDF(name = "pyarrowUDF", returnType = Some(LongType))
 
   override def sparkConf: SparkConf = {
     super.sparkConf
@@ -55,7 +60,7 @@ class ArrowEvalPythonExecSuite extends 
WholeStageTransformerSuite {
       ("3", "3")
     ).toDF("a", "p_a")
 
-    val df2 = base.select("a").withColumn("p_a", pyarrowTestUDF(base("a")))
+    val df2 = base.select("a").withColumn("p_a", 
pyarrowTestUDFString(base("a")))
     checkSparkPlan[ColumnarArrowEvalPythonExec](df2)
     checkAnswer(df2, expected)
   }
@@ -76,13 +81,14 @@ class ArrowEvalPythonExecSuite extends 
WholeStageTransformerSuite {
       ("3", 0, "3", 0)
     ).toDF("a", "b", "p_a", "d_b")
 
-    val df = base.withColumn("p_a", 
pyarrowTestUDF(base("a"))).withColumn("d_b", base("b") * 2)
+    val df =
+      base.withColumn("p_a", 
pyarrowTestUDFString(base("a"))).withColumn("d_b", base("b") * 2)
     checkSparkPlan[ColumnarArrowEvalPythonExec](df)
     checkAnswer(df, expected)
   }
 
-  // A fix needed for Spark 4.0 change in 
https://github.com/apache/spark/pull/42864.
-  testWithMaxSparkVersion("arrow_udf test: with preprojection", "3.5") {
+  // TODO: fix on spark-4.1
+  testWithMaxSparkVersion("arrow_udf test: with preprojection", "4.0") {
     lazy val base =
       Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 
1), ("3", 0))
         .toDF("a", "b")
@@ -98,8 +104,23 @@ class ArrowEvalPythonExecSuite extends 
WholeStageTransformerSuite {
     ).toDF("a", "b", "d_b", "p_a", "p_b")
     val df = base
       .withColumn("d_b", base("b") * 2)
-      .withColumn("p_a", pyarrowTestUDF(base("a")))
-      .withColumn("p_b", pyarrowTestUDF(base("b") * 2))
+      .withColumn("p_a", pyarrowTestUDFString(base("a")))
+      .withColumn("p_b", pyarrowTestUDFLong(base("b") * 2))
     checkAnswer(df, expected)
   }
+
+  private def newTestScalarPandasUDF(
+      name: String,
+      returnType: Option[DataType] = None): TestScalarPandasUDF = {
+    if (SparkVersionUtil.gteSpark40) {
+      // After https://github.com/apache/spark/pull/42864 which landed in 
Spark 4.0, the return
+      // type of the UDF must be explicitly specified when creating the UDF 
instance with column
+      // expressions as parameter.
+      classOf[TestScalarPandasUDF]
+        .getConstructor(classOf[String], classOf[Option[DataType]])
+        .newInstance(name, returnType)
+    } else {
+      TestScalarPandasUDF(name)
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to