[GitHub] spark pull request #19878: [SPARK-22682][SQL] HashExpression does not need t...

viirya Mon, 04 Dec 2017 16:40:32 -0800

Github user viirya commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19878#discussion_r154818350
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
 ---
    @@ -610,25 +637,44 @@ case class HiveHash(children: Seq[Expression]) 
extends HashExpression[Int] {
     
       override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
         ev.isNull = "false"
    +
         val childHash = ctx.freshName("childHash")
    -    val childrenHash = ctx.splitExpressions(children.map { child =>
    +    val childrenHash = children.map { child =>
           val childGen = child.genCode(ctx)
           val codeToComputeHash = ctx.nullSafeExec(child.nullable, 
childGen.isNull) {
             computeHash(childGen.value, child.dataType, childHash, ctx)
           }
           s"""
              |${childGen.code}
    +         |$childHash = 0;
              |$codeToComputeHash
              |${ev.value} = (31 * ${ev.value}) + $childHash;
    -         |$childHash = 0;
            """.stripMargin
    -    })
    +    }
     
    -    ctx.addMutableState(ctx.javaType(dataType), ev.value)
    -    ctx.addMutableState(ctx.JAVA_INT, childHash, s"$childHash = 0;")
    -    ev.copy(code = s"""
    -      ${ev.value} = $seed;
    -      $childrenHash""")
    +    val codes = if (ctx.INPUT_ROW == null || ctx.currentVars != null) {
    +      childrenHash.mkString("\n")
    +    } else {
    +      ctx.splitExpressions(
    +        expressions = childrenHash,
    +        funcName = "computeHash",
    +        arguments = Seq("InternalRow" -> ctx.INPUT_ROW, ctx.JAVA_INT -> 
ev.value),
    +        returnType = ctx.JAVA_INT,
    +        makeSplitFunction = body =>
    +          s"""
    +             |${ctx.JAVA_INT} $childHash = 0;
    +             |$body
    +             |return ${ev.value};
    +           """.stripMargin,
    +        foldFunctions = _.map(funcCall => s"${ev.value} = 
$funcCall;").mkString("\n"))
    +    }
    +
    +    ev.copy(code =
    +      s"""
    +         |${ctx.JAVA_INT} ${ev.value} = $seed;
    +         |${ctx.JAVA_INT} $childHash = 0;
    --- End diff --
    
    nit: `childHash` is only needed to declare here when we don't split 
functions.



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #19878: [SPARK-22682][SQL] HashExpression does not need t...

Reply via email to