viirya commented on a change in pull request #30245:
URL: https://github.com/apache/spark/pull/30245#discussion_r520084936



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
##########
@@ -65,11 +65,66 @@ class EquivalentExpressions {
     }
   }
 
+  /**
+   * Adds only expressions which are common in each of given expressions, in a 
recursive way.
+   * For example, given two expressions `(a + (b + (c + 1)))` and `(d + (e + 
(c + 1)))`,
+   * the common expression `(c + 1)` will be added into `equivalenceMap`.
+   */
+  def addCommonExprs(exprs: Seq[Expression], addFunc: Expression => Boolean = 
addExpr): Unit = {
+    var exprSetForAll = ExpressionSet()
+
+    addExprTree(exprs.head, (expr: Expression) => {
+      if (expr.deterministic) {
+        if (exprSetForAll.contains(expr)) {
+          true
+        } else {
+          exprSetForAll += expr
+          false
+        }
+      } else {
+        false
+      }
+    })
+
+    exprs.tail.foreach { expr =>
+      var exprSet = ExpressionSet()
+      addExprTree(expr, (expr: Expression) => {
+        if (expr.deterministic) {
+          if (exprSet.contains(expr)) {
+            true
+          } else {
+            exprSet += expr
+            false
+          }
+        } else {
+          false
+        }
+      })
+      exprSetForAll = exprSetForAll.intersect(exprSet)
+    }
+
+    exprSetForAll.foreach(addFunc)
+  }
+
+  // For some special expressions we cannot just recurse into all of its 
children, but we can
+  // recursively add the common expressions shared between all of its children.
+  def commonChildrenToRecurse(expr: Expression): Seq[Seq[Expression]] = expr 
match {
+    case i: If => Seq(Seq(i.trueValue, i.falseValue))
+    case c: CaseWhen =>
+      val conditions = c.branches.tail.map(_._1)
+      val values = c.branches.map(_._2) ++ c.elseValue
+      Seq(conditions, values)

Review comment:
       A subexpression in conditions is definitely run because it is shared 
among `conditions`. It doesn't need to be shared in `values`. Similarly, a 
subexpression among `values` doesn't need to be in `conditions` because no 
matter which condition is true, it will be evaluated.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to