peter-toth commented on code in PR #41677:
URL: https://github.com/apache/spark/pull/41677#discussion_r1385001916
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala:
##########
@@ -243,28 +414,43 @@ class EquivalentExpressions(
/**
* Wrapper around an Expression that provides semantic equality.
*/
-case class ExpressionEquals(e: Expression) {
+case class ExpressionEquals(expr: Expression) {
private def getHeight(tree: Expression): Int = {
tree.children.map(getHeight).reduceOption(_ max _).getOrElse(0) + 1
}
// This is used to do a fast pre-check for child-parent relationship. For
example, expr1 can
// only be a parent of expr2 if expr1.height is larger than expr2.height.
- lazy val height = getHeight(e)
+ lazy val height = getHeight(expr)
override def equals(o: Any): Boolean = o match {
- case other: ExpressionEquals => e.semanticEquals(other.e) && height ==
other.height
+ case other: ExpressionEquals =>
+ expr.canonicalized == other.expr.canonicalized && height == other.height
case _ => false
}
- override def hashCode: Int = Objects.hash(e.semanticHash(): Integer, height:
Integer)
+ override def hashCode: Int = Objects.hash(expr.semanticHash(): Integer,
height: Integer)
}
/**
- * A wrapper in place of using Seq[Expression] to record a group of equivalent
expressions.
+ * This class stores the expected evaluation count of expressions split into
`evalCount` +
+ * `realEvalCount` that records sure evaluations and `condEvalCount` +
`realCondEvalCount` that
+ * records conditional evaluations. The `real...` fields are filled up during
`inflate()`.
Review Comment:
Btw `transitive` (currently called `real`) is actually a sum of the direct
additions and the transitive additions from parent expressions.
If we `addExprTree(a + b)` and `addExpr((a + b) + 1)` then the direct of `a
+ b` is 1 and the transitive of `a + b` is 2.
We wouldn't need the `transitive` fields if did recurse during
`addExprTree()`.
But you know, the previous version of `EquivalentExpressions` used
`useCount`. And with `useCount` when same or overlapping expressions were added
to the data structure the second `addExprTree()` didn't fully recurse, but it
stopped when the first common subexpression was found. Now with the new
structure, we just record the `direct` additions during `addExprTree()` and
fill the `transitives` during `inflate()` to be par on with the old version.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala:
##########
@@ -243,28 +414,43 @@ class EquivalentExpressions(
/**
* Wrapper around an Expression that provides semantic equality.
*/
-case class ExpressionEquals(e: Expression) {
+case class ExpressionEquals(expr: Expression) {
private def getHeight(tree: Expression): Int = {
tree.children.map(getHeight).reduceOption(_ max _).getOrElse(0) + 1
}
// This is used to do a fast pre-check for child-parent relationship. For
example, expr1 can
// only be a parent of expr2 if expr1.height is larger than expr2.height.
- lazy val height = getHeight(e)
+ lazy val height = getHeight(expr)
override def equals(o: Any): Boolean = o match {
- case other: ExpressionEquals => e.semanticEquals(other.e) && height ==
other.height
+ case other: ExpressionEquals =>
+ expr.canonicalized == other.expr.canonicalized && height == other.height
case _ => false
}
- override def hashCode: Int = Objects.hash(e.semanticHash(): Integer, height:
Integer)
+ override def hashCode: Int = Objects.hash(expr.semanticHash(): Integer,
height: Integer)
}
/**
- * A wrapper in place of using Seq[Expression] to record a group of equivalent
expressions.
+ * This class stores the expected evaluation count of expressions split into
`evalCount` +
+ * `realEvalCount` that records sure evaluations and `condEvalCount` +
`realCondEvalCount` that
+ * records conditional evaluations. The `real...` fields are filled up during
`inflate()`.
Review Comment:
Btw `transitive` (currently called `real`) is actually a sum of the direct
additions and the transitive additions from parent expressions.
If we `addExprTree(a + b)` and `addExpr((a + b) + 1)` then the direct of `a
+ b` is 1 and the transitive of `a + b` is 2.
We wouldn't need the `transitive` fields if we did recurse during
`addExprTree()`.
But you know, the previous version of `EquivalentExpressions` used
`useCount`. And with `useCount` when same or overlapping expressions were added
to the data structure the second `addExprTree()` didn't fully recurse, but it
stopped when the first common subexpression was found. Now with the new
structure, we just record the `direct` additions during `addExprTree()` and
fill the `transitives` during `inflate()` to be par on with the old version.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]