viirya commented on a change in pull request #33281:
URL: https://github.com/apache/spark/pull/33281#discussion_r667357944



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
##########
@@ -70,34 +96,39 @@ class EquivalentExpressions {
    * For example, if `((a + b) + c)` and `(a + b)` are common expressions, we 
only add
    * `((a + b) + c)`.
    */
-  private def addCommonExprs(
+  private def updateCommonExprs(
       exprs: Seq[Expression],
-      map: mutable.HashMap[ExpressionEquals, ExpressionStats]): Unit = {
+      map: mutable.HashMap[ExpressionEquals, ExpressionStats],
+      useCount: Int): Unit = {
     assert(exprs.length > 1)
     var localEquivalenceMap = mutable.HashMap.empty[ExpressionEquals, 
ExpressionStats]
-    addExprTree(exprs.head, localEquivalenceMap)
+    updateExprTree(exprs.head, localEquivalenceMap)
 
     exprs.tail.foreach { expr =>
       val otherLocalEquivalenceMap = mutable.HashMap.empty[ExpressionEquals, 
ExpressionStats]
-      addExprTree(expr, otherLocalEquivalenceMap)
+      updateExprTree(expr, otherLocalEquivalenceMap)
       localEquivalenceMap = localEquivalenceMap.filter { case (key, _) =>
         otherLocalEquivalenceMap.contains(key)
       }
     }
 
-    localEquivalenceMap.foreach { case (commonExpr, state) =>
-      val possibleParents = localEquivalenceMap.filter { case (_, v) => 
v.height > state.height }
-      val notChild = possibleParents.forall { case (k, _) =>
-        k == commonExpr || k.e.find(_.semanticEquals(commonExpr.e)).isEmpty
-      }
-      if (notChild) {
-        // If the `commonExpr` already appears in the equivalence map, calling 
`addExprTree` will
-        // increase the `useCount` and mark it as a common subexpression. 
Otherwise, `addExprTree`
-        // will recursively add `commonExpr` and its descendant to the 
equivalence map, in case
-        // they also appear in other places. For example, `If(a + b > 1, a + b 
+ c, a + b + c)`,
-        // `a + b` also appears in the condition and should be treated as 
common subexpression.
-        addExprTree(commonExpr.e, map)
-      }
+    // Start with the highest common expression, update `map` with the 
expression and remove it (and
+    // its children recursively if required) from `localEquivalenceMap`. The 
remaining highest
+    // expression in `localEquivalenceMap` is also common expression.
+    var statsOption = 
Some(localEquivalenceMap).filter(_.nonEmpty).map(_.maxBy(_._1.height)._2)
+    while (statsOption.nonEmpty) {
+      val stats = statsOption.get
+      updateExprTree(stats.expr, localEquivalenceMap, -stats.useCount)

Review comment:
       Oh, I got your idea here. For performance, is there significant 
difference? The current approach is simpler. The filtering in 
`localEquivalenceMap` is based on height so it should be very fast. Is this 
still a performance bottleneck?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to