Github user maropu commented on a diff in the pull request: https://github.com/apache/spark/pull/21860#discussion_r214584609 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala --- @@ -853,33 +853,47 @@ case class HashAggregateExec( val updateRowInHashMap: String = { if (isFastHashMapEnabled) { - ctx.INPUT_ROW = fastRowBuffer - val boundUpdateExpr = updateExpr.map(BindReferences.bindReference(_, inputAttr)) - val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr) - val effectiveCodes = subExprs.codes.mkString("\n") - val fastRowEvals = ctx.withSubExprEliminationExprs(subExprs.states) { - boundUpdateExpr.map(_.genCode(ctx)) - } - val updateFastRow = fastRowEvals.zipWithIndex.map { case (ev, i) => - val dt = updateExpr(i).dataType - CodeGenerator.updateColumn( - fastRowBuffer, dt, i, ev, updateExpr(i).nullable, isVectorizedHashMapEnabled) - } + if (isVectorizedHashMapEnabled) { + ctx.INPUT_ROW = fastRowBuffer + val boundUpdateExpr = updateExpr.map(BindReferences.bindReference(_, inputAttr)) + val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr) + val effectiveCodes = subExprs.codes.mkString("\n") + val fastRowEvals = ctx.withSubExprEliminationExprs(subExprs.states) { + boundUpdateExpr.map(_.genCode(ctx)) + } + val updateFastRow = fastRowEvals.zipWithIndex.map { case (ev, i) => + val dt = updateExpr(i).dataType + CodeGenerator.updateColumn( + fastRowBuffer, dt, i, ev, updateExpr(i).nullable, isVectorized = true) + } - // If fast hash map is on, we first generate code to update row in fast hash map, if the - // previous loop up hit fast hash map. Otherwise, update row in regular hash map. - s""" - |if ($fastRowBuffer != null) { - | // common sub-expressions - | $effectiveCodes - | // evaluate aggregate function - | ${evaluateVariables(fastRowEvals)} - | // update fast row - | ${updateFastRow.mkString("\n").trim} - |} else { - | $updateRowInRegularHashMap - |} - """.stripMargin + // If vectorized fast hash map is on, we first generate code to update row + // in vectorized fast hash map, if the previous loop up hit vectorized fast hash map. + // Otherwise, update row in regular hash map. + s""" + |if ($fastRowBuffer != null) { + | // common sub-expressions + | $effectiveCodes + | // evaluate aggregate function + | ${evaluateVariables(fastRowEvals)} + | // update fast row + | ${updateFastRow.mkString("\n").trim} + |} else { + | $updateRowInRegularHashMap + |} + """.stripMargin + } else { + // If row-based hash map is on and the previous loop up hit fast hash map, + // we reuse regular hash buffer to update row of fast hash map. + // Otherwise, update row in regular hash map. + s""" + |// fast hash reuse regular hash to update buffer + |if ($fastRowBuffer != null) { + | $unsafeRowBuffer = $fastRowBuffer; + |} + |$updateRowInRegularHashMap + """.stripMargin --- End diff -- Basically, this fix looks good to me. To make the gen'd code more easy-to-read, can we generate the code below?; ``` /* 213 */ private void agg_doConsume_0(long agg_expr_0_0, long agg_expr_1_0) throws java.io.IOException { /* 214 */ UnsafeRow agg_unsafeRowAggBuffer_0 = null; /* 215 */ UnsafeRow agg_fastAggBuffer_0 = null; /* 216 */ /* 217 */ if (true) { /* 218 */ if (!false) { /* 219 */ agg_fastAggBuffer_0 = agg_fastHashMap_0.findOrInsert( /* 220 */ agg_expr_0_0); /* 221 */ } /* 222 */ } /* 223 */ // Cannot find the key in fast hash map, try regular hash map. /* 224 */ if (agg_fastAggBuffer_0 == null) { ... /* 255 */ /* 256 */ } /* 257 */ /* 258 */ // Updates the proper row buffer /* 269 */ UnsafeRow agg_aggBuffer_0 = null; /* 260 */ if (agg_fastAggBuffer_0 != null) { /* 261 */ agg_aggBuffer_0 = agg_fastAggBuffer_0; /* 262 */ } else { /* 263 */ agg_aggBuffer_0 = agg_unsafeRowAggBuffer_0; /* 264 */ } /* 265 */ /* 266 */ // common sub-expressions ... /* 264 */ /* 265 */ // evaluate aggregate function ... /* 293 */ // update row buffer /* 294 */ agg_aggBuffer_0.setLong(0, agg_value_8); /* 296 */ } ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org