zhztheplayer commented on code in PR #11288:
URL:
https://github.com/apache/incubator-gluten/pull/11288#discussion_r2621731406
##########
backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala:
##########
@@ -81,7 +79,7 @@ class ArrowEvalPythonExecSuite extends
WholeStageTransformerSuite {
checkAnswer(df, expected)
}
- // TODO: fix on spark-4.0
+ // A fix needed for Spark 4.0 change in
https://github.com/apache/spark/pull/42864.
testWithMaxSparkVersion("arrow_udf test: with preprojection", "3.5") {
lazy val base =
Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0",
1), ("3", 0))
Review Comment:
This case remains excluded in 4.0 due to the following error:
<details>
```
[DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION] Cannot resolve "pyarrowUDF((b *
2))" due to data type mismatch: cannot cast "STRING" to "VOID". SQLSTATE: 42K09;
'Project [a#9, b#10, d_b#11, p_a#13, cast(pyarrowUDF(cast((b#10 * 2) as
string)) as void) AS p_b#15]
+- Project [a#9, b#10, d_b#11, cast(pyarrowUDF(cast(a#9 as string)) as
string) AS p_a#13]
+- Project [a#9, b#10, (b#10 * 2) AS d_b#11]
+- Project [_1#2 AS a#9, _2#3 AS b#10]
+- LocalRelation [_1#2, _2#3]
org.apache.spark.sql.AnalysisException:
[DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION] Cannot resolve "pyarrowUDF((b *
2))" due to data type mismatch: cannot cast "STRING" to "VOID". SQLSTATE: 42K09;
'Project [a#9, b#10, d_b#11, p_a#13, cast(pyarrowUDF(cast((b#10 * 2) as
string)) as void) AS p_b#15]
+- Project [a#9, b#10, d_b#11, cast(pyarrowUDF(cast(a#9 as string)) as
string) AS p_a#13]
+- Project [a#9, b#10, (b#10 * 2) AS d_b#11]
+- Project [_1#2 AS a#9, _2#3 AS b#10]
+- LocalRelation [_1#2, _2#3]
at
org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:77)
at
org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:70)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7(CheckAnalysis.scala:420)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7$adapted(CheckAnalysis.scala:402)
at
org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:252)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:251)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:251)
at scala.collection.immutable.Vector.foreach(Vector.scala:2125)
at
org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:251)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$6(CheckAnalysis.scala:402)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$6$adapted(CheckAnalysis.scala:402)
at scala.collection.immutable.List.foreach(List.scala:334)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$2(CheckAnalysis.scala:402)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$2$adapted(CheckAnalysis.scala:284)
at
org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:252)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis0(CheckAnalysis.scala:284)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis0$(CheckAnalysis.scala:255)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis0(Analyzer.scala:299)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:244)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:231)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:299)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.$anonfun$resolveInFixedPoint$1(HybridAnalyzer.scala:192)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.resolveInFixedPoint(HybridAnalyzer.scala:192)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.$anonfun$apply$1(HybridAnalyzer.scala:76)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.withTrackedAnalyzerBridgeState(HybridAnalyzer.scala:111)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.apply(HybridAnalyzer.scala:71)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:330)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:423)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:330)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$2(QueryExecution.scala:110)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:148)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:278)
at
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:654)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:278)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
at
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:277)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$1(QueryExecution.scala:110)
at scala.util.Try$.apply(Try.scala:217)
at
org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1378)
at
org.apache.spark.util.Utils$.getTryWithCallerStacktrace(Utils.scala:1439)
at org.apache.spark.util.LazyTry.get(LazyTry.scala:58)
at
org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:121)
at
org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:80)
at
org.apache.spark.sql.classic.Dataset$.$anonfun$ofRows$1(Dataset.scala:115)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
at org.apache.spark.sql.classic.Dataset$.ofRows(Dataset.scala:113)
at org.apache.spark.sql.classic.Dataset.withPlan(Dataset.scala:2263)
at org.apache.spark.sql.classic.Dataset.withColumns(Dataset.scala:1283)
at org.apache.spark.sql.classic.Dataset.withColumns(Dataset.scala:232)
at org.apache.spark.sql.Dataset.withColumn(Dataset.scala:2187)
at org.apache.spark.sql.classic.Dataset.withColumn(Dataset.scala:1819)
at
org.apache.gluten.execution.python.ArrowEvalPythonExecSuite.$anonfun$new$5(ArrowEvalPythonExecSuite.scala:100)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at org.scalatest.enablers.Timed$$anon$1.timeoutAfter(Timed.scala:127)
at
org.scalatest.concurrent.TimeLimits$.failAfterImpl(TimeLimits.scala:282)
at org.scalatest.concurrent.TimeLimits.failAfter(TimeLimits.scala:231)
at org.scalatest.concurrent.TimeLimits.failAfter$(TimeLimits.scala:230)
at org.apache.spark.SparkFunSuite.failAfter(SparkFunSuite.scala:69)
at
org.apache.spark.SparkFunSuite.$anonfun$test$2(SparkFunSuite.scala:155)
at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
at org.scalatest.Transformer.apply(Transformer.scala:20)
at
org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:226)
at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:227)
at
org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:224)
at
org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:236)
at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
at
org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:236)
at
org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:218)
at
org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:69)
at
org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)
at
org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:69)
at
org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:269)
at
org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
at scala.collection.immutable.List.foreach(List.scala:334)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
at
org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:269)
at
org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:268)
at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1564)
at org.scalatest.Suite.run(Suite.scala:1114)
at org.scalatest.Suite.run$(Suite.scala:1096)
at
org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1564)
at
org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:273)
at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:273)
at
org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:272)
at
org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:69)
at
org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:69)
at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:47)
at
org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1321)
at
org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1315)
at scala.collection.immutable.List.foreach(List.scala:334)
at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1315)
at
org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:992)
at
org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:970)
at
org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1481)
at
org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:970)
at org.scalatest.tools.Runner$.run(Runner.scala:798)
at org.scalatest.tools.Runner.run(Runner.scala)
at
org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2or3(ScalaTestRunner.java:43)
at
org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:26)
Suppressed: org.apache.spark.util.Utils$OriginalTryStackTraceException:
Full stacktrace of original doTryWithCallerStacktrace caller
at
org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:77)
at
org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:70)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7(CheckAnalysis.scala:420)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7$adapted(CheckAnalysis.scala:402)
at
org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:252)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:251)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:251)
at scala.collection.immutable.Vector.foreach(Vector.scala:2125)
at
org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:251)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$6(CheckAnalysis.scala:402)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$6$adapted(CheckAnalysis.scala:402)
at scala.collection.immutable.List.foreach(List.scala:334)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$2(CheckAnalysis.scala:402)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$2$adapted(CheckAnalysis.scala:284)
at
org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:252)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis0(CheckAnalysis.scala:284)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis0$(CheckAnalysis.scala:255)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis0(Analyzer.scala:299)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:244)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:231)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:299)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.$anonfun$resolveInFixedPoint$1(HybridAnalyzer.scala:192)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.resolveInFixedPoint(HybridAnalyzer.scala:192)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.$anonfun$apply$1(HybridAnalyzer.scala:76)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.withTrackedAnalyzerBridgeState(HybridAnalyzer.scala:111)
at
org.apache.spark.sql.catalyst.analysis.resolver.HybridAnalyzer.apply(HybridAnalyzer.scala:71)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:330)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:423)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:330)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$2(QueryExecution.scala:110)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:148)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:278)
at
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:654)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:278)
at
org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
at
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:277)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyAnalyzed$1(QueryExecution.scala:110)
at scala.util.Try$.apply(Try.scala:217)
at
org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1378)
at
org.apache.spark.util.LazyTry.tryT$lzycompute(LazyTry.scala:46)
at org.apache.spark.util.LazyTry.tryT(LazyTry.scala:46)
... 69 more
```
</details>
The error is related to https://github.com/apache/spark/pull/42864. It's not
a bug of Gluten but because the test code requires to be updated. A fix is
needed later on.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]