[GitHub] spark pull request #21860: [SPARK-24901][SQL]Merge the codegen of RegularHas...

maropu Sun, 02 Sep 2018 23:17:40 -0700

Github user maropu commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21860#discussion_r214584609
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
 ---
    @@ -853,33 +853,47 @@ case class HashAggregateExec(
     
         val updateRowInHashMap: String = {
           if (isFastHashMapEnabled) {
    -        ctx.INPUT_ROW = fastRowBuffer
    -        val boundUpdateExpr = 
updateExpr.map(BindReferences.bindReference(_, inputAttr))
    -        val subExprs = 
ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
    -        val effectiveCodes = subExprs.codes.mkString("\n")
    -        val fastRowEvals = 
ctx.withSubExprEliminationExprs(subExprs.states) {
    -          boundUpdateExpr.map(_.genCode(ctx))
    -        }
    -        val updateFastRow = fastRowEvals.zipWithIndex.map { case (ev, i) =>
    -          val dt = updateExpr(i).dataType
    -          CodeGenerator.updateColumn(
    -            fastRowBuffer, dt, i, ev, updateExpr(i).nullable, 
isVectorizedHashMapEnabled)
    -        }
    +        if (isVectorizedHashMapEnabled) {
    +          ctx.INPUT_ROW = fastRowBuffer
    +          val boundUpdateExpr = 
updateExpr.map(BindReferences.bindReference(_, inputAttr))
    +          val subExprs = 
ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
    +          val effectiveCodes = subExprs.codes.mkString("\n")
    +          val fastRowEvals = 
ctx.withSubExprEliminationExprs(subExprs.states) {
    +            boundUpdateExpr.map(_.genCode(ctx))
    +          }
    +          val updateFastRow = fastRowEvals.zipWithIndex.map { case (ev, i) 
=>
    +            val dt = updateExpr(i).dataType
    +            CodeGenerator.updateColumn(
    +              fastRowBuffer, dt, i, ev, updateExpr(i).nullable, 
isVectorized = true)
    +          }
     
    -        // If fast hash map is on, we first generate code to update row in 
fast hash map, if the
    -        // previous loop up hit fast hash map. Otherwise, update row in 
regular hash map.
    -        s"""
    -           |if ($fastRowBuffer != null) {
    -           |  // common sub-expressions
    -           |  $effectiveCodes
    -           |  // evaluate aggregate function
    -           |  ${evaluateVariables(fastRowEvals)}
    -           |  // update fast row
    -           |  ${updateFastRow.mkString("\n").trim}
    -           |} else {
    -           |  $updateRowInRegularHashMap
    -           |}
    -       """.stripMargin
    +          // If vectorized fast hash map is on, we first generate code to 
update row
    +          // in vectorized fast hash map, if the previous loop up hit 
vectorized fast hash map.
    +          // Otherwise, update row in regular hash map.
    +          s"""
    +             |if ($fastRowBuffer != null) {
    +             |  // common sub-expressions
    +             |  $effectiveCodes
    +             |  // evaluate aggregate function
    +             |  ${evaluateVariables(fastRowEvals)}
    +             |  // update fast row
    +             |  ${updateFastRow.mkString("\n").trim}
    +             |} else {
    +             |  $updateRowInRegularHashMap
    +             |}
    +          """.stripMargin
    +        } else {
    +          // If row-based hash map is on and the previous loop up hit fast 
hash map,
    +          // we reuse regular hash buffer to update row of fast hash map.
    +          // Otherwise, update row in regular hash map.
    +          s"""
    +             |// fast hash reuse regular hash to update buffer
    +             |if ($fastRowBuffer != null) {
    +             |  $unsafeRowBuffer = $fastRowBuffer;
    +             |}
    +             |$updateRowInRegularHashMap
    +          """.stripMargin
    --- End diff --
    
    Basically, this fix looks good to me. To make the gen'd code more 
easy-to-read, can we generate the code below?;
    ```
    /* 213 */   private void agg_doConsume_0(long agg_expr_0_0, long 
agg_expr_1_0) throws java.io.IOException {
    /* 214 */     UnsafeRow agg_unsafeRowAggBuffer_0 = null;
    /* 215 */     UnsafeRow agg_fastAggBuffer_0 = null;
    /* 216 */
    /* 217 */     if (true) {
    /* 218 */       if (!false) {
    /* 219 */         agg_fastAggBuffer_0 = agg_fastHashMap_0.findOrInsert(
    /* 220 */           agg_expr_0_0);
    /* 221 */       }
    /* 222 */     }
    /* 223 */     // Cannot find the key in fast hash map, try regular hash map.
    /* 224 */     if (agg_fastAggBuffer_0 == null) {
                  ...
    /* 255 */
    /* 256 */     }
    /* 257 */
    /* 258 */     // Updates the proper row buffer
    /* 269 */     UnsafeRow agg_aggBuffer_0 = null;
    /* 260 */     if (agg_fastAggBuffer_0 != null) {
    /* 261 */       agg_aggBuffer_0 = agg_fastAggBuffer_0;
    /* 262 */     } else {
    /* 263 */       agg_aggBuffer_0 = agg_unsafeRowAggBuffer_0;
    /* 264 */     }
    /* 265 */
    /* 266 */     // common sub-expressions
                  ...
    /* 264 */
    /* 265 */     // evaluate aggregate function
                  ...
    /* 293 */     // update row buffer
    /* 294 */     agg_aggBuffer_0.setLong(0, agg_value_8);
    /* 296 */   }
    ```



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #21860: [SPARK-24901][SQL]Merge the codegen of RegularHas...

Reply via email to