cloud-fan commented on a change in pull request #32980:
URL: https://github.com/apache/spark/pull/32980#discussion_r655564938
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
##########
@@ -76,24 +76,32 @@ object ExprCode {
/**
* State used for subexpression elimination.
*
+ * @param code The sequence of statements required to evaluate the
subexpression.
* @param isNull A term that holds a boolean value representing whether the
expression evaluated
* to null.
* @param value A term for a value of a common sub-expression. Not valid if
`isNull`
* is set to `true`.
+ * @param childrenSubExprs The sequence of subexpressions as the children
expressions. Before
+ * evaluating this subexpression, we should evaluate
all children
+ * subexpressions first. This is used if we want to
selectively evaluate
+ * particular subexpressions, instead of all at once.
In the case, we need
+ * to make sure we evaluate all children
subexpressions too.
*/
-case class SubExprEliminationState(isNull: ExprValue, value: ExprValue)
+case class SubExprEliminationState(
Review comment:
Now this looks very similar to `ExprCode` with children tracked...
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
##########
@@ -1049,17 +1074,26 @@ class CodegenContext extends Logging {
// elimination.
val commonExprs = equivalentExpressions.getAllEquivalentExprs(1)
- val nonSplitExprCode = {
+ val nonSplitCode = {
+ val allStates = mutable.ArrayBuffer.empty[SubExprEliminationState]
commonExprs.map { exprs =>
- val eval =
withSubExprEliminationExprs(localSubExprEliminationExprsForNonSplit.toMap) {
+
withSubExprEliminationExprs(localSubExprEliminationExprsForNonSplit.toMap) {
val eval = exprs.head.genCode(this)
- // Generate the code for this expression tree.
- val state = SubExprEliminationState(eval.isNull, eval.value)
+ // Collects other subexpressions from the children.
+ val childrenSubExprs =
mutable.ArrayBuffer.empty[SubExprEliminationState]
+ exprs.head.foreach {
+ case e if subExprEliminationExprs.contains(e) =>
+ childrenSubExprs += subExprEliminationExprs(e)
+ case _ =>
+ }
+ val state = SubExprEliminationState(eval.code, eval.isNull,
eval.value,
Review comment:
seems it's simpler if we define `SubExprEliminationState` as
`SubExprEliminationState(eval: ExprValue, children: ...)`
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
##########
@@ -1111,24 +1144,33 @@ class CodegenContext extends Logging {
|}
""".stripMargin
- val state = SubExprEliminationState(isNull, JavaCode.global(value,
expr.dataType))
- exprs.foreach(localSubExprEliminationExprs.put(_, state))
+ // Collects other subexpressions from the children.
+ val childrenSubExprs =
mutable.ArrayBuffer.empty[SubExprEliminationState]
+ exprs.head.foreach {
+ case e if subExprEliminationExprs.contains(e) =>
+ childrenSubExprs += subExprEliminationExprs(e)
+ case _ =>
+ }
+
val inputVariables = inputVars.map(_.variableName).mkString(", ")
- s"${addNewFunction(fnName, fn)}($inputVariables);"
+ val code = code"${addNewFunction(fnName, fn)}($inputVariables);"
+ val state = SubExprEliminationState(code, isNull,
JavaCode.global(value, expr.dataType),
+ childrenSubExprs.toSeq.reverse)
+ exprs.foreach(localSubExprEliminationExprs.put(_, state))
}
- (splitCodes, localSubExprEliminationExprs, exprCodesNeedEvaluate)
+ (localSubExprEliminationExprs, exprCodesNeedEvaluate)
} else {
if (Utils.isTesting) {
throw
QueryExecutionErrors.failedSplitSubExpressionError(MAX_JVM_METHOD_PARAMS_LENGTH)
} else {
logInfo(QueryExecutionErrors.failedSplitSubExpressionMsg(MAX_JVM_METHOD_PARAMS_LENGTH))
- (nonSplitExprCode, localSubExprEliminationExprsForNonSplit,
Seq.empty)
+ (localSubExprEliminationExprsForNonSplit, Seq.empty)
}
}
} else {
- (nonSplitExprCode, localSubExprEliminationExprsForNonSplit, Seq.empty)
+ (localSubExprEliminationExprsForNonSplit, Seq.empty)
}
- SubExprCodes(codes, subExprsMap.toMap, exprCodes.flatten)
+ SubExprCodes(subExprsMap.toMap, exprCodes.flatten)
}
Review comment:
We may need to improve the method doc of
`subexpressionEliminationForWholeStageCodegen`. I can hardly understand this
method now...
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]