This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new c8f45ec4ec [GLUTEN-11088][VL] Follow-up: Spark 4.0: Fix remaining
error in ArrowEvalPythonExecSuite (#11377)
c8f45ec4ec is described below
commit c8f45ec4eccce828e02cf72087b1c58fbffb6a18
Author: Hongze Zhang <[email protected]>
AuthorDate: Wed Jan 7 18:25:43 2026 +0000
[GLUTEN-11088][VL] Follow-up: Spark 4.0: Fix remaining error in
ArrowEvalPythonExecSuite (#11377)
---
.../python/ArrowEvalPythonExecSuite.scala | 35 +++++++++++++++++-----
1 file changed, 28 insertions(+), 7 deletions(-)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala
index f8e2554da7..7747bd2193 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala
@@ -21,6 +21,8 @@ import org.apache.gluten.execution.WholeStageTransformerSuite
import org.apache.spark.SparkConf
import org.apache.spark.api.python.ColumnarArrowEvalPythonExec
import org.apache.spark.sql.IntegratedUDFTestUtils
+import org.apache.spark.sql.types.{DataType, LongType, StringType}
+import org.apache.spark.util.SparkVersionUtil
class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite {
@@ -30,7 +32,10 @@ class ArrowEvalPythonExecSuite extends
WholeStageTransformerSuite {
override protected val resourcePath: String = "/tpch-data-parquet"
override protected val fileFormat: String = "parquet"
- val pyarrowTestUDF = TestScalarPandasUDF(name = "pyarrowUDF")
+ private val pyarrowTestUDFString =
+ newTestScalarPandasUDF(name = "pyarrowUDF", returnType = Some(StringType))
+ private val pyarrowTestUDFLong =
+ newTestScalarPandasUDF(name = "pyarrowUDF", returnType = Some(LongType))
override def sparkConf: SparkConf = {
super.sparkConf
@@ -55,7 +60,7 @@ class ArrowEvalPythonExecSuite extends
WholeStageTransformerSuite {
("3", "3")
).toDF("a", "p_a")
- val df2 = base.select("a").withColumn("p_a", pyarrowTestUDF(base("a")))
+ val df2 = base.select("a").withColumn("p_a",
pyarrowTestUDFString(base("a")))
checkSparkPlan[ColumnarArrowEvalPythonExec](df2)
checkAnswer(df2, expected)
}
@@ -76,13 +81,14 @@ class ArrowEvalPythonExecSuite extends
WholeStageTransformerSuite {
("3", 0, "3", 0)
).toDF("a", "b", "p_a", "d_b")
- val df = base.withColumn("p_a",
pyarrowTestUDF(base("a"))).withColumn("d_b", base("b") * 2)
+ val df =
+ base.withColumn("p_a",
pyarrowTestUDFString(base("a"))).withColumn("d_b", base("b") * 2)
checkSparkPlan[ColumnarArrowEvalPythonExec](df)
checkAnswer(df, expected)
}
- // A fix needed for Spark 4.0 change in
https://github.com/apache/spark/pull/42864.
- testWithMaxSparkVersion("arrow_udf test: with preprojection", "3.5") {
+ // TODO: fix on spark-4.1
+ testWithMaxSparkVersion("arrow_udf test: with preprojection", "4.0") {
lazy val base =
Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0",
1), ("3", 0))
.toDF("a", "b")
@@ -98,8 +104,23 @@ class ArrowEvalPythonExecSuite extends
WholeStageTransformerSuite {
).toDF("a", "b", "d_b", "p_a", "p_b")
val df = base
.withColumn("d_b", base("b") * 2)
- .withColumn("p_a", pyarrowTestUDF(base("a")))
- .withColumn("p_b", pyarrowTestUDF(base("b") * 2))
+ .withColumn("p_a", pyarrowTestUDFString(base("a")))
+ .withColumn("p_b", pyarrowTestUDFLong(base("b") * 2))
checkAnswer(df, expected)
}
+
+ private def newTestScalarPandasUDF(
+ name: String,
+ returnType: Option[DataType] = None): TestScalarPandasUDF = {
+ if (SparkVersionUtil.gteSpark40) {
+ // After https://github.com/apache/spark/pull/42864 which landed in
Spark 4.0, the return
+ // type of the UDF must be explicitly specified when creating the UDF
instance with column
+ // expressions as parameter.
+ classOf[TestScalarPandasUDF]
+ .getConstructor(classOf[String], classOf[Option[DataType]])
+ .newInstance(name, returnType)
+ } else {
+ TestScalarPandasUDF(name)
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]