Github user mgaido91 commented on a diff in the pull request:
https://github.com/apache/spark/pull/19962#discussion_r156617815
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
---
@@ -981,80 +976,52 @@ case class ScalaUDF(
}
// scalastyle:on line.size.limit
-
- private val converterClassName = classOf[Any => Any].getName
- private val scalaUDFClassName = classOf[ScalaUDF].getName
- private val typeConvertersClassName =
CatalystTypeConverters.getClass.getName + ".MODULE$"
-
- // Generate codes used to convert the arguments to Scala type for
user-defined functions
- private[this] def genCodeForConverter(ctx: CodegenContext, index: Int):
(String, String) = {
- val converterTerm = ctx.freshName("converter")
- val expressionIdx = ctx.references.size - 1
- (converterTerm,
- s"$converterClassName $converterTerm =
($converterClassName)$typeConvertersClassName" +
- s".createToScalaConverter(((Expression)((($scalaUDFClassName)" +
-
s"references[$expressionIdx]).getChildren().apply($index))).dataType());")
- }
-
override def doGenCode(
ctx: CodegenContext,
ev: ExprCode): ExprCode = {
- val scalaUDF = ctx.freshName("scalaUDF")
- val scalaUDFRef = ctx.addReferenceObj("scalaUDFRef", this,
scalaUDFClassName)
-
- // Object to convert the returned value of user-defined functions to
Catalyst type
- val catalystConverterTerm = ctx.freshName("catalystConverter")
+ val converterClassName = classOf[Any => Any].getName
+ // The type converters for inputs and the result.
+ val converters: Array[Any => Any] = children.map { c =>
+ CatalystTypeConverters.createToScalaConverter(c.dataType)
+ }.toArray :+ CatalystTypeConverters.createToCatalystConverter(dataType)
+ val convertersTerm = ctx.addReferenceObj("converters", converters,
s"$converterClassName[]")
+ val errorMsgTerm = ctx.addReferenceObj("errMsg", udfErrorMessage)
val resultTerm = ctx.freshName("result")
- // This must be called before children expressions' codegen
- // because ctx.references is used in genCodeForConverter
- val converterTerms = children.indices.map(genCodeForConverter(ctx, _))
-
- // Initialize user-defined function
- val funcClassName = s"scala.Function${children.size}"
-
- val funcTerm = ctx.freshName("udf")
-
// codegen for children expressions
val evals = children.map(_.genCode(ctx))
// Generate the codes for expressions and calling user-defined function
// We need to get the boxedType of dataType's javaType here. Because
for the dataType
// such as IntegerType, its javaType is `int` and the returned type of
user-defined
// function is Object. Trying to convert an Object to `int` will cause
casting exception.
- val evalCode = evals.map(_.code).mkString
- val (converters, funcArguments) = converterTerms.zipWithIndex.map {
- case ((convName, convInit), i) =>
- val eval = evals(i)
- val argTerm = ctx.freshName("arg")
- val convert =
- s"""
- |$convInit
- |Object $argTerm = ${eval.isNull} ? null :
$convName.apply(${eval.value});
- """.stripMargin
- (convert, argTerm)
- }.unzip
+ val evalCode = evals.map(_.code).mkString("\n")
+ val initFuncArgs = scala.collection.mutable.ListBuffer.empty[String]
+ val funcArguments = evals.zipWithIndex.map { case (eval, i) =>
--- End diff --
very nit: can we do something like:
```
val (funcArguments, initFuncArgs) = evals.zipWithIndex.map { case (eval,
i) =>
val argTerm = ctx.freshName("arg")
val initFuncArgs =
s"Object $argTerm = ${eval.isNull} ? null :
$convertersTerm[$i].apply(${eval.value});"
(argTerm, initFuncArgs)
}.unzip
```
Just to avoid side effects and be more compliant to functional
programming...
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]