viirya commented on a change in pull request #32980:
URL: https://github.com/apache/spark/pull/32980#discussion_r655164773



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
##########
@@ -76,24 +76,35 @@ object ExprCode {
 /**
  * State used for subexpression elimination.
  *
+ * @param code The sequence of statements required to evaluate the 
subexpression.
  * @param isNull A term that holds a boolean value representing whether the 
expression evaluated
  *               to null.
  * @param value A term for a value of a common sub-expression. Not valid if 
`isNull`
  *              is set to `true`.
+ * @param childrenSubExprs The sequence of subexpressions as the children 
expressions. Before
+ *                         evaluating this subexpression, we should evaluate 
all children
+ *                         subexpressions first. This is used if we want to 
selectively evaluate
+ *                         particular subexpressions, instead of all at once. 
In the case, we need
+ *                         to make sure we evaluate all children 
subexpressions too.
  */
-case class SubExprEliminationState(isNull: ExprValue, value: ExprValue)
+case class SubExprEliminationState(
+  var code: Block,
+  isNull: ExprValue,
+  value: ExprValue,
+  childrenSubExprs: Seq[SubExprEliminationState] = Seq.empty)
 
 /**
  * Codes and common subexpressions mapping used for subexpression elimination.
  *
- * @param codes Strings representing the codes that evaluate common 
subexpressions.
+ * @param codes all `SubExprEliminationState` representing the codes that 
evaluate common
+ *              subexpressions.

Review comment:
       The're the same `SubExprEliminationState`. `states` is used as map when 
we look for subexpressions to replace in an expression. `codes` are all values 
in the map, and they are in the sequence when we create them.
   
   Now I'm thinking it more, maybe we don't need to keep the sequence 
(`codes`). As this PR cleans up child subexpressions during evaluation. The 
order of evaluation seems not important anymore.

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
##########
@@ -76,24 +76,32 @@ object ExprCode {
 /**
  * State used for subexpression elimination.
  *
+ * @param code The sequence of statements required to evaluate the 
subexpression.
  * @param isNull A term that holds a boolean value representing whether the 
expression evaluated
  *               to null.
  * @param value A term for a value of a common sub-expression. Not valid if 
`isNull`
  *              is set to `true`.
+ * @param childrenSubExprs The sequence of subexpressions as the children 
expressions. Before
+ *                         evaluating this subexpression, we should evaluate 
all children
+ *                         subexpressions first. This is used if we want to 
selectively evaluate
+ *                         particular subexpressions, instead of all at once. 
In the case, we need
+ *                         to make sure we evaluate all children 
subexpressions too.
  */
-case class SubExprEliminationState(isNull: ExprValue, value: ExprValue)
+case class SubExprEliminationState(

Review comment:
       Yea, maybe eliminate this and reuse `ExprCode`?

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
##########
@@ -1111,24 +1144,33 @@ class CodegenContext extends Logging {
                |}
                """.stripMargin
 
-          val state = SubExprEliminationState(isNull, JavaCode.global(value, 
expr.dataType))
-          exprs.foreach(localSubExprEliminationExprs.put(_, state))
+          // Collects other subexpressions from the children.
+          val childrenSubExprs = 
mutable.ArrayBuffer.empty[SubExprEliminationState]
+          exprs.head.foreach {
+            case e if subExprEliminationExprs.contains(e) =>
+              childrenSubExprs += subExprEliminationExprs(e)
+            case _ =>
+          }
+
           val inputVariables = inputVars.map(_.variableName).mkString(", ")
-          s"${addNewFunction(fnName, fn)}($inputVariables);"
+          val code = code"${addNewFunction(fnName, fn)}($inputVariables);"
+          val state = SubExprEliminationState(code, isNull, 
JavaCode.global(value, expr.dataType),
+            childrenSubExprs.toSeq.reverse)
+          exprs.foreach(localSubExprEliminationExprs.put(_, state))
         }
-        (splitCodes, localSubExprEliminationExprs, exprCodesNeedEvaluate)
+        (localSubExprEliminationExprs, exprCodesNeedEvaluate)
       } else {
         if (Utils.isTesting) {
           throw 
QueryExecutionErrors.failedSplitSubExpressionError(MAX_JVM_METHOD_PARAMS_LENGTH)
         } else {
           
logInfo(QueryExecutionErrors.failedSplitSubExpressionMsg(MAX_JVM_METHOD_PARAMS_LENGTH))
-          (nonSplitExprCode, localSubExprEliminationExprsForNonSplit, 
Seq.empty)
+          (localSubExprEliminationExprsForNonSplit, Seq.empty)
         }
       }
     } else {
-      (nonSplitExprCode, localSubExprEliminationExprsForNonSplit, Seq.empty)
+      (localSubExprEliminationExprsForNonSplit, Seq.empty)
     }
-    SubExprCodes(codes, subExprsMap.toMap, exprCodes.flatten)
+    SubExprCodes(subExprsMap.toMap, exprCodes.flatten)
   }

Review comment:
       Okay, I noticed it grows a bit longer. Let me add more comments.

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
##########
@@ -1049,17 +1074,26 @@ class CodegenContext extends Logging {
     // elimination.
     val commonExprs = equivalentExpressions.getAllEquivalentExprs(1)
 
-    val nonSplitExprCode = {
+    val nonSplitCode = {
+      val allStates = mutable.ArrayBuffer.empty[SubExprEliminationState]
       commonExprs.map { exprs =>
-        val eval = 
withSubExprEliminationExprs(localSubExprEliminationExprsForNonSplit.toMap) {
+        
withSubExprEliminationExprs(localSubExprEliminationExprsForNonSplit.toMap) {
           val eval = exprs.head.genCode(this)
-          // Generate the code for this expression tree.
-          val state = SubExprEliminationState(eval.isNull, eval.value)
+          // Collects other subexpressions from the children.
+          val childrenSubExprs = 
mutable.ArrayBuffer.empty[SubExprEliminationState]
+          exprs.head.foreach {
+            case e if subExprEliminationExprs.contains(e) =>
+              childrenSubExprs += subExprEliminationExprs(e)
+            case _ =>
+          }
+          val state = SubExprEliminationState(eval.code, eval.isNull, 
eval.value,

Review comment:
       You mean `SubExprEliminationState(eval: ExprCode, children: ...)`?
   `

##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
##########
@@ -1049,17 +1074,26 @@ class CodegenContext extends Logging {
     // elimination.
     val commonExprs = equivalentExpressions.getAllEquivalentExprs(1)
 
-    val nonSplitExprCode = {
+    val nonSplitCode = {
+      val allStates = mutable.ArrayBuffer.empty[SubExprEliminationState]
       commonExprs.map { exprs =>
-        val eval = 
withSubExprEliminationExprs(localSubExprEliminationExprsForNonSplit.toMap) {
+        
withSubExprEliminationExprs(localSubExprEliminationExprsForNonSplit.toMap) {
           val eval = exprs.head.genCode(this)
-          // Generate the code for this expression tree.
-          val state = SubExprEliminationState(eval.isNull, eval.value)
+          // Collects other subexpressions from the children.
+          val childrenSubExprs = 
mutable.ArrayBuffer.empty[SubExprEliminationState]
+          exprs.head.foreach {
+            case e if subExprEliminationExprs.contains(e) =>
+              childrenSubExprs += subExprEliminationExprs(e)
+            case _ =>
+          }
+          val state = SubExprEliminationState(eval.code, eval.isNull, 
eval.value,

Review comment:
       You mean `SubExprEliminationState(eval: ExprCode, children: ...)`?
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to