cloud-fan commented on a change in pull request #33142:
URL: https://github.com/apache/spark/pull/33142#discussion_r660902531
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
##########
@@ -159,35 +156,34 @@ class SubexpressionEliminationSuite extends SparkFunSuite
with ExpressionEvalHel
equivalence1.addExprTree(ifExpr1)
// `add` is in both two branches of `If` and predicate.
- assert(equivalence1.getAllEquivalentExprs().count(_.size == 2) == 1)
- assert(equivalence1.getAllEquivalentExprs().filter(_.size == 2).head ==
Seq(add, add))
+ assert(equivalence1.getAllExprStates().count(_.useCount == 3) == 1)
Review comment:
3 is the right answer: `add` appears in condition, true and false
branches.
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
##########
@@ -159,35 +156,34 @@ class SubexpressionEliminationSuite extends SparkFunSuite
with ExpressionEvalHel
equivalence1.addExprTree(ifExpr1)
// `add` is in both two branches of `If` and predicate.
- assert(equivalence1.getAllEquivalentExprs().count(_.size == 2) == 1)
- assert(equivalence1.getAllEquivalentExprs().filter(_.size == 2).head ==
Seq(add, add))
+ assert(equivalence1.getAllExprStates().count(_.useCount == 3) == 1)
+ assert(equivalence1.getAllExprStates().filter(_.useCount == 3).head.expr
eq add)
// one-time expressions: only ifExpr and its predicate expression
- assert(equivalence1.getAllEquivalentExprs().count(_.size == 1) == 2)
- assert(equivalence1.getAllEquivalentExprs().filter(_.size ==
1).contains(Seq(ifExpr1)))
- assert(equivalence1.getAllEquivalentExprs().filter(_.size ==
1).contains(Seq(condition)))
+ assert(equivalence1.getAllExprStates().count(_.useCount == 1) == 2)
+ assert(equivalence1.getAllExprStates().filter(_.useCount ==
1).exists(_.expr eq ifExpr1))
+ assert(equivalence1.getAllExprStates().filter(_.useCount ==
1).exists(_.expr eq condition))
// Repeated `add` is only in one branch, so we don't count it.
val ifExpr2 = If(condition, Add(Literal(1), Literal(3)), Add(add, add))
val equivalence2 = new EquivalentExpressions
equivalence2.addExprTree(ifExpr2)
- assert(equivalence2.getAllEquivalentExprs(1).size == 0)
- assert(equivalence2.getAllEquivalentExprs().count(_.size == 1) == 3)
+ assert(equivalence2.getAllExprStates(1).isEmpty)
+ assert(equivalence2.getAllExprStates().count(_.useCount == 1) == 3)
val ifExpr3 = If(condition, ifExpr1, ifExpr1)
val equivalence3 = new EquivalentExpressions
equivalence3.addExprTree(ifExpr3)
- // `add`: 2, `condition`: 2
- assert(equivalence3.getAllEquivalentExprs().count(_.size == 2) == 2)
- assert(equivalence3.getAllEquivalentExprs().filter(_.size ==
2).contains(Seq(add, add)))
- assert(
- equivalence3.getAllEquivalentExprs().filter(_.size ==
2).contains(Seq(condition, condition)))
+ // `ifExpr1`: 2, `condition`: 2, `add`: 2
Review comment:
See https://github.com/apache/spark/pull/33142/files#r660899263
`ifExpr1` is indeed a common subexpression
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
##########
@@ -202,8 +198,8 @@ class SubexpressionEliminationSuite extends SparkFunSuite
with ExpressionEvalHel
equivalence1.addExprTree(caseWhenExpr1)
// `add2` is repeatedly in all conditions.
- assert(equivalence1.getAllEquivalentExprs().count(_.size == 2) == 1)
- assert(equivalence1.getAllEquivalentExprs().filter(_.size == 2).head ==
Seq(add2, add2))
+ assert(equivalence1.getAllExprStates().count(_.useCount == 4) == 1)
Review comment:
The actual count is 3, but the first condition is counted twice (in both
`childrenToRecurse` and `commonChildrenToRecurse`). This is hard to fix but not
a big deal.
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
##########
@@ -240,8 +236,8 @@ class SubexpressionEliminationSuite extends SparkFunSuite
with ExpressionEvalHel
equivalence1.addExprTree(coalesceExpr1)
// `add2` is repeatedly in all conditions.
- assert(equivalence1.getAllEquivalentExprs().count(_.size == 2) == 1)
- assert(equivalence1.getAllEquivalentExprs().filter(_.size == 2).head ==
Seq(add2, add2))
+ assert(equivalence1.getAllExprStates().count(_.useCount == 4) == 1)
Review comment:
ditto, actual count should be 3
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
##########
@@ -315,15 +311,17 @@ class SubexpressionEliminationSuite extends SparkFunSuite
with ExpressionEvalHel
val add1 = Add(Literal(1), Literal(2))
val add2 = Add(Literal(2), Literal(3))
val add3 = Add(add1, add2)
- val condition = (GreaterThan(add3, Literal(3)), add3) :: Nil
+ val condition = (Literal(true), add3) :: Nil
Review comment:
This is a long-standing issue, discussed in
https://github.com/apache/spark/pull/32559/files#r633911273
If the condition is `GreaterThan(add3, Literal(3)`, then `add1` and `add2`
will be treated as common subexpressions, similar to
https://github.com/apache/spark/pull/33142/files#diff-27f45b563969445dc46c8586c8f45c59cd1a583d400bc69457ea6f2e3c94f202R178
This is because the first condition of `CaseWhen` is part of
`childrenToRecurse`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]