Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/21650#discussion_r205061160
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
---
@@ -94,36 +95,59 @@ object ExtractPythonUDFFromAggregate extends
Rule[LogicalPlan] {
*/
object ExtractPythonUDFs extends Rule[SparkPlan] with PredicateHelper {
- private def hasPythonUDF(e: Expression): Boolean = {
+ private def hasScalarPythonUDF(e: Expression): Boolean = {
e.find(PythonUDF.isScalarPythonUDF).isDefined
}
- private def canEvaluateInPython(e: PythonUDF): Boolean = {
- e.children match {
- // single PythonUDF child could be chained and evaluated in Python
- case Seq(u: PythonUDF) => canEvaluateInPython(u)
- // Python UDF can't be evaluated directly in JVM
- case children => !children.exists(hasPythonUDF)
+ private def canEvaluateInPython(e: PythonUDF, evalType: Int): Boolean = {
+ if (e.evalType != evalType) {
--- End diff --
Can we rename this function or write a comment since Scalar both Vectorized
UDF and normal UDF can be evaluated in Python each but it returns `false` in
this case?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]