Github user maropu commented on a diff in the pull request:
https://github.com/apache/spark/pull/21860#discussion_r214584609
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
---
@@ -853,33 +853,47 @@ case class HashAggregateExec(
val updateRowInHashMap: String = {
if (isFastHashMapEnabled) {
- ctx.INPUT_ROW = fastRowBuffer
- val boundUpdateExpr =
updateExpr.map(BindReferences.bindReference(_, inputAttr))
- val subExprs =
ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
- val effectiveCodes = subExprs.codes.mkString("\n")
- val fastRowEvals =
ctx.withSubExprEliminationExprs(subExprs.states) {
- boundUpdateExpr.map(_.genCode(ctx))
- }
- val updateFastRow = fastRowEvals.zipWithIndex.map { case (ev, i) =>
- val dt = updateExpr(i).dataType
- CodeGenerator.updateColumn(
- fastRowBuffer, dt, i, ev, updateExpr(i).nullable,
isVectorizedHashMapEnabled)
- }
+ if (isVectorizedHashMapEnabled) {
+ ctx.INPUT_ROW = fastRowBuffer
+ val boundUpdateExpr =
updateExpr.map(BindReferences.bindReference(_, inputAttr))
+ val subExprs =
ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
+ val effectiveCodes = subExprs.codes.mkString("\n")
+ val fastRowEvals =
ctx.withSubExprEliminationExprs(subExprs.states) {
+ boundUpdateExpr.map(_.genCode(ctx))
+ }
+ val updateFastRow = fastRowEvals.zipWithIndex.map { case (ev, i)
=>
+ val dt = updateExpr(i).dataType
+ CodeGenerator.updateColumn(
+ fastRowBuffer, dt, i, ev, updateExpr(i).nullable,
isVectorized = true)
+ }
- // If fast hash map is on, we first generate code to update row in
fast hash map, if the
- // previous loop up hit fast hash map. Otherwise, update row in
regular hash map.
- s"""
- |if ($fastRowBuffer != null) {
- | // common sub-expressions
- | $effectiveCodes
- | // evaluate aggregate function
- | ${evaluateVariables(fastRowEvals)}
- | // update fast row
- | ${updateFastRow.mkString("\n").trim}
- |} else {
- | $updateRowInRegularHashMap
- |}
- """.stripMargin
+ // If vectorized fast hash map is on, we first generate code to
update row
+ // in vectorized fast hash map, if the previous loop up hit
vectorized fast hash map.
+ // Otherwise, update row in regular hash map.
+ s"""
+ |if ($fastRowBuffer != null) {
+ | // common sub-expressions
+ | $effectiveCodes
+ | // evaluate aggregate function
+ | ${evaluateVariables(fastRowEvals)}
+ | // update fast row
+ | ${updateFastRow.mkString("\n").trim}
+ |} else {
+ | $updateRowInRegularHashMap
+ |}
+ """.stripMargin
+ } else {
+ // If row-based hash map is on and the previous loop up hit fast
hash map,
+ // we reuse regular hash buffer to update row of fast hash map.
+ // Otherwise, update row in regular hash map.
+ s"""
+ |// fast hash reuse regular hash to update buffer
+ |if ($fastRowBuffer != null) {
+ | $unsafeRowBuffer = $fastRowBuffer;
+ |}
+ |$updateRowInRegularHashMap
+ """.stripMargin
--- End diff --
Basically, this fix looks good to me. To make the gen'd code more
easy-to-read, can we generate the code below?;
```
/* 213 */ private void agg_doConsume_0(long agg_expr_0_0, long
agg_expr_1_0) throws java.io.IOException {
/* 214 */ UnsafeRow agg_unsafeRowAggBuffer_0 = null;
/* 215 */ UnsafeRow agg_fastAggBuffer_0 = null;
/* 216 */
/* 217 */ if (true) {
/* 218 */ if (!false) {
/* 219 */ agg_fastAggBuffer_0 = agg_fastHashMap_0.findOrInsert(
/* 220 */ agg_expr_0_0);
/* 221 */ }
/* 222 */ }
/* 223 */ // Cannot find the key in fast hash map, try regular hash map.
/* 224 */ if (agg_fastAggBuffer_0 == null) {
...
/* 255 */
/* 256 */ }
/* 257 */
/* 258 */ // Updates the proper row buffer
/* 269 */ UnsafeRow agg_aggBuffer_0 = null;
/* 260 */ if (agg_fastAggBuffer_0 != null) {
/* 261 */ agg_aggBuffer_0 = agg_fastAggBuffer_0;
/* 262 */ } else {
/* 263 */ agg_aggBuffer_0 = agg_unsafeRowAggBuffer_0;
/* 264 */ }
/* 265 */
/* 266 */ // common sub-expressions
...
/* 264 */
/* 265 */ // evaluate aggregate function
...
/* 293 */ // update row buffer
/* 294 */ agg_aggBuffer_0.setLong(0, agg_value_8);
/* 296 */ }
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]