cloud-fan commented on a change in pull request #33142:
URL: https://github.com/apache/spark/pull/33142#discussion_r660902531



##########
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
##########
@@ -159,35 +156,34 @@ class SubexpressionEliminationSuite extends SparkFunSuite 
with ExpressionEvalHel
     equivalence1.addExprTree(ifExpr1)
 
     // `add` is in both two branches of `If` and predicate.
-    assert(equivalence1.getAllEquivalentExprs().count(_.size == 2) == 1)
-    assert(equivalence1.getAllEquivalentExprs().filter(_.size == 2).head == 
Seq(add, add))
+    assert(equivalence1.getAllExprStates().count(_.useCount == 3) == 1)

Review comment:
       3 is the right answer: `add` appears in condition, true and false 
branches.

##########
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
##########
@@ -159,35 +156,34 @@ class SubexpressionEliminationSuite extends SparkFunSuite 
with ExpressionEvalHel
     equivalence1.addExprTree(ifExpr1)
 
     // `add` is in both two branches of `If` and predicate.
-    assert(equivalence1.getAllEquivalentExprs().count(_.size == 2) == 1)
-    assert(equivalence1.getAllEquivalentExprs().filter(_.size == 2).head == 
Seq(add, add))
+    assert(equivalence1.getAllExprStates().count(_.useCount == 3) == 1)
+    assert(equivalence1.getAllExprStates().filter(_.useCount == 3).head.expr 
eq add)
     // one-time expressions: only ifExpr and its predicate expression
-    assert(equivalence1.getAllEquivalentExprs().count(_.size == 1) == 2)
-    assert(equivalence1.getAllEquivalentExprs().filter(_.size == 
1).contains(Seq(ifExpr1)))
-    assert(equivalence1.getAllEquivalentExprs().filter(_.size == 
1).contains(Seq(condition)))
+    assert(equivalence1.getAllExprStates().count(_.useCount == 1) == 2)
+    assert(equivalence1.getAllExprStates().filter(_.useCount == 
1).exists(_.expr eq ifExpr1))
+    assert(equivalence1.getAllExprStates().filter(_.useCount == 
1).exists(_.expr eq condition))
 
     // Repeated `add` is only in one branch, so we don't count it.
     val ifExpr2 = If(condition, Add(Literal(1), Literal(3)), Add(add, add))
     val equivalence2 = new EquivalentExpressions
     equivalence2.addExprTree(ifExpr2)
 
-    assert(equivalence2.getAllEquivalentExprs(1).size == 0)
-    assert(equivalence2.getAllEquivalentExprs().count(_.size == 1) == 3)
+    assert(equivalence2.getAllExprStates(1).isEmpty)
+    assert(equivalence2.getAllExprStates().count(_.useCount == 1) == 3)
 
     val ifExpr3 = If(condition, ifExpr1, ifExpr1)
     val equivalence3 = new EquivalentExpressions
     equivalence3.addExprTree(ifExpr3)
 
-    // `add`: 2, `condition`: 2
-    assert(equivalence3.getAllEquivalentExprs().count(_.size == 2) == 2)
-    assert(equivalence3.getAllEquivalentExprs().filter(_.size == 
2).contains(Seq(add, add)))
-    assert(
-      equivalence3.getAllEquivalentExprs().filter(_.size == 
2).contains(Seq(condition, condition)))
+    // `ifExpr1`: 2, `condition`: 2, `add`: 2

Review comment:
       See https://github.com/apache/spark/pull/33142/files#r660899263
   
   `ifExpr1` is indeed a common subexpression

##########
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
##########
@@ -202,8 +198,8 @@ class SubexpressionEliminationSuite extends SparkFunSuite 
with ExpressionEvalHel
     equivalence1.addExprTree(caseWhenExpr1)
 
     // `add2` is repeatedly in all conditions.
-    assert(equivalence1.getAllEquivalentExprs().count(_.size == 2) == 1)
-    assert(equivalence1.getAllEquivalentExprs().filter(_.size == 2).head == 
Seq(add2, add2))
+    assert(equivalence1.getAllExprStates().count(_.useCount == 4) == 1)

Review comment:
       The actual count is 3, but the first condition is counted twice (in both 
`childrenToRecurse` and `commonChildrenToRecurse`). This is hard to fix but not 
a big deal.

##########
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
##########
@@ -240,8 +236,8 @@ class SubexpressionEliminationSuite extends SparkFunSuite 
with ExpressionEvalHel
     equivalence1.addExprTree(coalesceExpr1)
 
     // `add2` is repeatedly in all conditions.
-    assert(equivalence1.getAllEquivalentExprs().count(_.size == 2) == 1)
-    assert(equivalence1.getAllEquivalentExprs().filter(_.size == 2).head == 
Seq(add2, add2))
+    assert(equivalence1.getAllExprStates().count(_.useCount == 4) == 1)

Review comment:
       ditto, actual count should be 3

##########
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
##########
@@ -315,15 +311,17 @@ class SubexpressionEliminationSuite extends SparkFunSuite 
with ExpressionEvalHel
     val add1 = Add(Literal(1), Literal(2))
     val add2 = Add(Literal(2), Literal(3))
     val add3 = Add(add1, add2)
-    val condition = (GreaterThan(add3, Literal(3)), add3) :: Nil
+    val condition = (Literal(true), add3) :: Nil

Review comment:
       This is a long-standing issue, discussed in 
https://github.com/apache/spark/pull/32559/files#r633911273
   
   If the condition is `GreaterThan(add3, Literal(3)`, then `add1` and `add2` 
will be treated as common subexpressions, similar to 
https://github.com/apache/spark/pull/33142/files#diff-27f45b563969445dc46c8586c8f45c59cd1a583d400bc69457ea6f2e3c94f202R178
   
   This is because the first condition of `CaseWhen` is part of 
`childrenToRecurse`




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to