Github user davies commented on a diff in the pull request:
https://github.com/apache/spark/pull/9270#discussion_r43659293
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
---
@@ -959,6 +963,122 @@ case class ScalaUDF(
}
}
+ // Generate codes used to convert the arguments to Scala type for
user-defined funtions
+ private[this] def genCodeForConverter(ctx: CodeGenContext, index: Int):
String = {
+ val converterClassName = classOf[Any => Any].getName
+ val typeConvertersClassName = CatalystTypeConverters.getClass.getName
+ ".MODULE$"
+ val expressionClassName = classOf[Expression].getName
+ val scalaUDFClassName = classOf[ScalaUDF].getName
+
+ val converterTerm = ctx.freshName("converter" + index.toString)
+ ctx.addMutableState(converterClassName, converterTerm,
+ s"this.$converterTerm =
($converterClassName)$typeConvertersClassName.createToScalaConverter(((${expressionClassName})((($scalaUDFClassName)expressions[${ctx.references.size
- 1}]).getChildren().apply($index))).dataType());")
+ converterTerm
+ }
+
+ override def genCode(
+ ctx: CodeGenContext,
+ ev: GeneratedExpressionCode): String = {
+
+ ctx.references += this
+
+ val scalaUDFClassName = classOf[ScalaUDF].getName
+ val converterClassName = classOf[Any => Any].getName
+ val typeConvertersClassName = CatalystTypeConverters.getClass.getName
+ ".MODULE$"
+ val expressionClassName = classOf[Expression].getName
+
+ // Generate codes used to convert the returned value of user-defined
functions to Catalyst type
+ val catalystConverterTerm = ctx.freshName("catalystConverter")
+ ctx.addMutableState(converterClassName, catalystConverterTerm,
+ s"this.$catalystConverterTerm =
($converterClassName)$typeConvertersClassName.createToCatalystConverter((($scalaUDFClassName)expressions[${ctx.references.size
- 1}]).dataType());")
+
+ val resultTerm = ctx.freshName("result")
+
+ // This must be called before children expressions' codegen
+ // because ctx.references is used in genCodeForConverter
+ val converterTerms = (0 until
children.size).map(genCodeForConverter(ctx, _))
+
+ // Initialize user-defined function
+ val funcClassName = children.size match {
+ case 0 =>
+ classOf[() => Any].getName
+ case 1 =>
+ classOf[(Any) => Any].getName
+ case 2 =>
+ classOf[(Any, Any) => Any].getName
+ case 3 =>
+ classOf[(Any, Any, Any) => Any].getName
+ case 4 =>
+ classOf[(Any, Any, Any, Any) => Any].getName
+ case 5 =>
+ classOf[(Any, Any, Any, Any, Any) => Any].getName
+ case 6 =>
+ classOf[(Any, Any, Any, Any, Any, Any) => Any].getName
+ case 7 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any) => Any].getName
+ case 8 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any) => Any].getName
+ case 9 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any) =>
Any].getName
+ case 10 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) =>
Any].getName
+ case 11 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) =>
Any].getName
+ case 12 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any,
Any) => Any].getName
+ case 13 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any,
Any, Any) => Any].getName
+ case 14 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any,
Any, Any, Any) => Any].getName
+ case 15 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any,
Any, Any, Any, Any) => Any].getName
+ case 16 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any,
Any, Any, Any, Any, Any) => Any].getName
+ case 17 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any,
Any, Any, Any, Any, Any, Any) => Any].getName
+ case 18 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any,
Any, Any, Any, Any, Any, Any, Any) => Any].getName
+ case 19 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any,
Any, Any, Any, Any, Any, Any, Any, Any) => Any].getName
+ case 20 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any,
Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any].getName
+ case 21 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any,
Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any].getName
+ case 22 =>
+ classOf[(Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any,
Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any].getName
+ case _ =>
+ throw new UnsupportedOperationException(
+ "ScalaUDF doesn't support user-defined functions with more than
22 arguments")
+ }
+
+ val funcTerm = ctx.freshName("udf")
+ ctx.addMutableState(funcClassName, funcTerm,
+ s"this.$funcTerm =
($funcClassName)((($scalaUDFClassName)expressions[${ctx.references.size -
1}]).userDefinedFunc());")
+
+ // codegen for children expressions
+ val evals = children.map(_.gen(ctx))
+
+ // Generate the codes for expressions and calling user-defined function
+ // We need to get the boxedType of dataType's javaType here. Because
for the dataType
+ // such as IntegerType, its javaType is `int` and the returned type of
user-defined
+ // function is Object. Trying to convert an Object to `int` will cause
casting exception.
+ val evalCode = evals.map(_.code).mkString
+ val funcArguments = converterTerms.zip(evals).map {
+ case (converter, eval) => s"$converter.apply(${eval.value})"
+ }.mkString(",")
+ val callFunc = s"${ctx.boxedType(ctx.javaType(dataType))} $resultTerm
=
(${ctx.boxedType(ctx.javaType(dataType))})${catalystConverterTerm}.apply($funcTerm.apply($funcArguments));"
+
+ evalCode + s"""
+ ${ctx.javaType(dataType)} ${ev.value} =
${ctx.defaultValue(dataType)};
+ Boolean ${ev.isNull};
+
+ $callFunc
+
+ ${ev.value} = $resultTerm;
+ ${ev.isNull} = $resultTerm == null;
+ """
+ }
+
// scalastyle:on
--- End diff --
Could you move this line up (above the new code)?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]