Github user viirya commented on a diff in the pull request:
https://github.com/apache/spark/pull/19878#discussion_r154818350
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
---
@@ -610,25 +637,44 @@ case class HiveHash(children: Seq[Expression])
extends HashExpression[Int] {
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
ev.isNull = "false"
+
val childHash = ctx.freshName("childHash")
- val childrenHash = ctx.splitExpressions(children.map { child =>
+ val childrenHash = children.map { child =>
val childGen = child.genCode(ctx)
val codeToComputeHash = ctx.nullSafeExec(child.nullable,
childGen.isNull) {
computeHash(childGen.value, child.dataType, childHash, ctx)
}
s"""
|${childGen.code}
+ |$childHash = 0;
|$codeToComputeHash
|${ev.value} = (31 * ${ev.value}) + $childHash;
- |$childHash = 0;
""".stripMargin
- })
+ }
- ctx.addMutableState(ctx.javaType(dataType), ev.value)
- ctx.addMutableState(ctx.JAVA_INT, childHash, s"$childHash = 0;")
- ev.copy(code = s"""
- ${ev.value} = $seed;
- $childrenHash""")
+ val codes = if (ctx.INPUT_ROW == null || ctx.currentVars != null) {
+ childrenHash.mkString("\n")
+ } else {
+ ctx.splitExpressions(
+ expressions = childrenHash,
+ funcName = "computeHash",
+ arguments = Seq("InternalRow" -> ctx.INPUT_ROW, ctx.JAVA_INT ->
ev.value),
+ returnType = ctx.JAVA_INT,
+ makeSplitFunction = body =>
+ s"""
+ |${ctx.JAVA_INT} $childHash = 0;
+ |$body
+ |return ${ev.value};
+ """.stripMargin,
+ foldFunctions = _.map(funcCall => s"${ev.value} =
$funcCall;").mkString("\n"))
+ }
+
+ ev.copy(code =
+ s"""
+ |${ctx.JAVA_INT} ${ev.value} = $seed;
+ |${ctx.JAVA_INT} $childHash = 0;
--- End diff --
nit: `childHash` is only needed to declare here when we don't split
functions.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]