Github user viirya commented on a diff in the pull request:
https://github.com/apache/spark/pull/18931#discussion_r163462688
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
---
@@ -149,13 +149,100 @@ trait CodegenSupport extends SparkPlan {
ctx.freshNamePrefix = parent.variablePrefix
val evaluated = evaluateRequiredVariables(output, inputVars,
parent.usedInputs)
+
+ // Under certain conditions, we can put the logic to consume the rows
of this operator into
+ // another function. So we can prevent a generated function too long
to be optimized by JIT.
+ // The conditions:
+ // 1. The parent uses all variables in output. we can't defer variable
evaluation when consume
+ // in another function.
+ // 2. The output variables are not empty. If it's empty, we don't
bother to do that.
+ // 3. We don't use row variable. The construction of row uses deferred
variable evaluation. We
+ // can't do it.
+ // 4. The number of output variables must less than maximum number of
parameters in Java method
+ // declaration.
+ val requireAllOutput = output.forall(parent.usedInputs.contains(_))
+ val consumeFunc =
+ if (row == null && outputVars.nonEmpty && requireAllOutput &&
isValidParamLength(ctx)) {
+ constructDoConsumeFunction(ctx, inputVars)
+ } else {
+ parent.doConsume(ctx, inputVars, rowVar)
+ }
s"""
|${ctx.registerComment(s"CONSUME: ${parent.simpleString}")}
|$evaluated
- |${parent.doConsume(ctx, inputVars, rowVar)}
+ |$consumeFunc
""".stripMargin
}
+ /**
+ * In Java, a method descriptor is valid only if it represents method
parameters with a total
+ * length of 255 or less. `this` contributes one unit and a parameter of
type long or double
+ * contributes two units. Besides, for nullable parameters, we also need
to pass a boolean
+ * for the null status.
+ */
+ private def isValidParamLength(ctx: CodegenContext): Boolean = {
+ // Start value is 1 for `this`.
+ output.foldLeft(1) { case (curLength, attr) =>
+ ctx.javaType(attr.dataType) match {
+ case (ctx.JAVA_LONG | ctx.JAVA_DOUBLE) if !attr.nullable =>
curLength + 2
+ case ctx.JAVA_LONG | ctx.JAVA_DOUBLE => curLength + 3
+ case _ if !attr.nullable => curLength + 1
+ case _ => curLength + 2
+ }
+ } <= 255
+ }
+
+ /**
+ * To prevent concatenated function growing too long to be optimized by
JIT. We can separate the
+ * parent's `doConsume` codes of a `CodegenSupport` operator into a
function to call.
+ */
+ private def constructDoConsumeFunction(
+ ctx: CodegenContext,
+ inputVars: Seq[ExprCode]): String = {
+ val (callingParams, arguList, inputVarsInFunc) =
+ constructConsumeParameters(ctx, output, inputVars)
+ val rowVar = ExprCode("", "false", "unsafeRow")
+ val doConsume = ctx.freshName("doConsume")
+ val doConsumeFuncName = ctx.addNewFunction(doConsume,
+ s"""
+ | private void $doConsume($arguList) throws java.io.IOException {
+ | ${parent.doConsume(ctx, inputVarsInFunc, rowVar)}
+ | }
+ """.stripMargin)
+
+ s"""
+ | $doConsumeFuncName($callingParams);
+ """.stripMargin
+ }
+
+ /**
+ * Returns source code for calling consume function and the argument
list of the consume function
+ * and also the `ExprCode` for the argument list.
+ */
+ private def constructConsumeParameters(
+ ctx: CodegenContext,
+ attributes: Seq[Attribute],
+ variables: Seq[ExprCode]): (String, String, Seq[ExprCode]) = {
+ val params = variables.zipWithIndex.map { case (ev, i) =>
+ val arguName = ctx.freshName(s"expr_$i")
+ val arguType = ctx.javaType(attributes(i).dataType)
+
+ val (callingParam, funcParams, arguIsNull) = if
(!attributes(i).nullable) {
+ // When the argument is not nullable, we don't need to pass in
`isNull` param for it and
+ // simply give a `false`.
+ val arguIsNull = "false"
+ (ev.value, s"$arguType $arguName", arguIsNull)
+ } else {
+ val arguIsNull = ctx.freshName(s"exprIsNull_$i")
+ (ev.value + ", " + ev.isNull, s"$arguType $arguName, boolean
$arguIsNull", arguIsNull)
+ }
+ (callingParam, funcParams, ExprCode("", arguIsNull, arguName))
+ }.unzip3
+ (params._1.mkString(", "),
+ params._2.mkString(", "),
+ params._3)
--- End diff --
Done.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]