cloud-fan commented on a change in pull request #25710: [SPARK-29008][SQL]
Define an individual method for each common subexpression in HashAggregateExec
URL: https://github.com/apache/spark/pull/25710#discussion_r322719651
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
##########
@@ -1028,13 +1028,67 @@ class CodegenContext {
// Get all the expressions that appear at least twice and set up the state
for subexpression
// elimination.
val commonExprs =
equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
- val codes = commonExprs.map { e =>
- val expr = e.head
- // Generate the code for this expression tree.
- val eval = expr.genCode(this)
- val state = SubExprEliminationState(eval.isNull, eval.value)
- e.foreach(localSubExprEliminationExprs.put(_, state))
- eval.code.toString
+ val commonExprVals = commonExprs.map(_.head.genCode(this))
+
+ lazy val nonSplitExprCode = {
+ commonExprs.zip(commonExprVals).map { case (exprs, eval) =>
+ // Generate the code for this expression tree.
+ val state = SubExprEliminationState(eval.isNull, eval.value)
+ exprs.foreach(localSubExprEliminationExprs.put(_, state))
+ eval.code.toString
+ }
+ }
+
+ val codes = if (commonExprVals.map(_.code.length).sum >
SQLConf.get.methodSplitThreshold) {
+ if (commonExprs.map(calculateParamLength).forall(isValidParamLength)) {
+ commonExprs.zipWithIndex.map { case (exprs, i) =>
+ val expr = exprs.head
+ val eval = commonExprVals(i)
+
+ val isNullLiteral = eval.isNull match {
+ case TrueLiteral | FalseLiteral => true
+ case _ => false
+ }
+ val (isNull, isNullEvalCode) = if (!isNullLiteral) {
+ val v = addMutableState(JAVA_BOOLEAN, "subExprIsNull")
+ (JavaCode.isNullGlobal(v), s"$v = ${eval.isNull};")
+ } else {
+ (eval.isNull, "")
+ }
+
+ // Generate the code for this expression tree and wrap it in a
function.
+ val fnName = freshName("subExpr")
+ val inputVars = getLocalInputVariableValues(this, expr).toSeq
+ val argList = inputVars.map(v => s"${v.javaType.getName}
${v.variableName}")
+ val returnType = javaType(expr.dataType)
+ val fn =
+ s"""
+ |private $returnType $fnName(${argList.mkString(", ")}) {
+ | ${eval.code}
+ | $isNullEvalCode
+ | return ${eval.value};
+ |}
+ """.stripMargin
+
+ val value = freshName("subExprValue")
+ val state = SubExprEliminationState(isNull, JavaCode.variable(value,
expr.dataType))
Review comment:
AFAIK the code of common subexpression execution is always put together, not
split. I don't think we need to worry about it now.
BTW I think one principle is: for corner cases which are really hard to
generate code, we should just fallback to interpreted mode.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]