cloud-fan commented on a change in pull request #20965: [SPARK-21870][SQL] 
Split aggregation code into small functions
URL: https://github.com/apache/spark/pull/20965#discussion_r319953081
 
 

 ##########
 File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
 ##########
 @@ -824,59 +944,158 @@ case class HashAggregateExec(
     // generating input columns, we use `currentVars`.
     ctx.currentVars = new Array[ExprCode](aggregateBufferAttributes.length) ++ 
input
 
+    val aggNames = aggregateExpressions.map(_.aggregateFunction.prettyName)
+    // Computes start offsets for each aggregation function code
+    // in the underlying buffer row.
+    val bufferStartOffsets = {
+      val offsets = mutable.ArrayBuffer[Int]()
+      var curOffset = 0
+      updateExprs.foreach { exprsForOneFunc =>
+        offsets += curOffset
+        curOffset += exprsForOneFunc.length
+      }
+      offsets.toArray
+    }
+
     val updateRowInRegularHashMap: String = {
       ctx.INPUT_ROW = unsafeRowBuffer
-      val boundUpdateExpr = bindReferences(updateExpr, inputAttr)
-      val subExprs = 
ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
+      val boundUpdateExprs = updateExprs.map { updateExprsForOneFunc =>
+        bindReferences(updateExprsForOneFunc, inputAttr)
+      }
+      val subExprs = 
ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExprs.flatten)
       val effectiveCodes = subExprs.codes.mkString("\n")
-      val unsafeRowBufferEvals = 
ctx.withSubExprEliminationExprs(subExprs.states) {
-        boundUpdateExpr.map(_.genCode(ctx))
+      val unsafeRowBufferEvals = boundUpdateExprs.map { 
boundUpdateExprsForOneFunc =>
+        ctx.withSubExprEliminationExprs(subExprs.states) {
+          boundUpdateExprsForOneFunc.map(_.genCode(ctx))
+        }
       }
-      val updateUnsafeRowBuffer = unsafeRowBufferEvals.zipWithIndex.map { case 
(ev, i) =>
-        val dt = updateExpr(i).dataType
-        CodeGenerator.updateColumn(unsafeRowBuffer, dt, i, ev, 
updateExpr(i).nullable)
+
+      val aggCodeBlocks = updateExprs.indices.map { i =>
+        val rowBufferEvalsForOneFunc = unsafeRowBufferEvals(i)
+        val boundUpdateExprsForOneFunc = boundUpdateExprs(i)
+        val bufferOffset = bufferStartOffsets(i)
+
+        // All the update code for aggregation buffers should be placed in the 
end
+        // of each aggregation function code.
+        val updateRowBuffers = rowBufferEvalsForOneFunc.zipWithIndex.map { 
case (ev, j) =>
+          val updateExpr = boundUpdateExprsForOneFunc(j)
+          val dt = updateExpr.dataType
+          val nullable = updateExpr.nullable
+          CodeGenerator.updateColumn(unsafeRowBuffer, dt, bufferOffset + j, 
ev, nullable)
+        }
+        code"""
+           |// evaluate aggregate function for ${aggNames(i)}
+           |${evaluateVariables(rowBufferEvalsForOneFunc)}
+           |// update unsafe row buffer
+           |${updateRowBuffers.mkString("\n").trim}
+         """.stripMargin
+      }
+
+      lazy val nonSplitAggCode = {
+        s"""
+           |// common sub-expressions
+           |$effectiveCodes
+           |// evaluate aggregate functions and update aggregation buffers
+           |${aggCodeBlocks.fold(EmptyBlock)(_ + _)}
+         """.stripMargin
+      }
+
+      if (conf.codegenSplitAggregateFunc &&
+          aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) {
+        val splitAggCode = splitAggregateExpressions(
 
 Review comment:
   ditto

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to