maropu commented on a change in pull request #20965: [SPARK-21870][SQL] Split
aggregation code into small functions
URL: https://github.com/apache/spark/pull/20965#discussion_r319311315
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
##########
@@ -255,41 +260,138 @@ case class HashAggregateExec(
""".stripMargin
}
+ // Splits aggregate code into small functions because the most of JVM
implementations
+ // can not compile too long functions.
+ //
+ // Note: The difference from `CodeGenerator.splitExpressions` is that we
define an individual
+ // function for each aggregation function (e.g., SUM and AVG). For example,
in a query
+ // `SELECT SUM(a), AVG(a) FROM VALUES(1) t(a)`, we define two functions
+ // for `SUM(a)` and `AVG(a)`.
+ private def splitAggregateExpressions(
+ ctx: CodegenContext,
+ aggNames: Seq[String],
+ aggBufferUpdatingExprs: Seq[Seq[Expression]],
+ aggCodeBlocks: Seq[Block],
+ subExprs: Map[Expression, SubExprEliminationState]): Option[String] = {
+ val inputVars = aggBufferUpdatingExprs.map { aggExprsForOneFunc =>
+ val inputVarsForOneFunc = aggExprsForOneFunc.map(
+ CodeGenerator.getLocalInputVariableValues(ctx, _, subExprs)).reduce(_
++ _).toSeq
+ val paramLength =
CodeGenerator.calculateParamLengthFromExprValues(inputVarsForOneFunc)
+
+ // Checks if a parameter length for the `aggExprsForOneFunc` does not go
over the JVM limit
+ if (CodeGenerator.isValidParamLength(paramLength)) {
+ Some(inputVarsForOneFunc)
+ } else {
+ None
+ }
+ }
+
+ // Checks if all the aggregate code can be split into pieces.
+ // If the parameter length of at lease one `aggExprsForOneFunc` goes over
the limit,
+ // we totally give up splitting aggregate code.
+ if (inputVars.forall(_.isDefined)) {
+ val splitCodes = inputVars.flatten.zipWithIndex.map { case (args, i) =>
+ val doAggVal = ctx.freshName(s"doAggregateVal_${aggNames(i)}")
Review comment:
renamed to `doAggFunc`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]