tanelk commented on pull request #31024:
URL: https://github.com/apache/spark/pull/31024#issuecomment-757121187
> It would be nice to do this recursively.
> Currently you split the outer `and` filters and sort them. each of these
can have an `or` filter as a child and it would be beneficial to split and sort
those too. Inside `or` the filter selectivity rule must be the other way around.
I was thinking something like this.
```diff
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala
index 805f9afeba..345c30896f 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala
@@ -28,7 +28,10 @@ import org.apache.spark.sql.catalyst.rules.Rule
object PredicateReorder extends Rule[LogicalPlan] with PredicateHelper {
// Get priority from a Expression. Expressions with higher priority are
executed in preference
// to expressions with lower priority.
- private def getPriority(exp: Expression, filterEstimation:
Option[FilterEstimation]) = exp match {
+ private def getPriority(
+ exp: Expression,
+ filterEstimation: Option[FilterEstimation],
+ conjuction: Boolean) = exp match {
case e: Expression if
e.find(_.isInstanceOf[SubqueryExpression]).isDefined => 1.0
case e: Expression if
e.find(_.isInstanceOf[UserDefinedExpression]).isDefined => 2.0
case e: Expression if e.find(_.isInstanceOf[MultiLikeBase]).isDefined ||
@@ -38,8 +41,11 @@ object PredicateReorder extends Rule[LogicalPlan] with
PredicateHelper {
case c: CaseWhen => c.branches.size
}.max
3.0 + (1.0 / maxSize)
- case e =>
+ case e if conjuction =>
filterEstimation.flatMap(_.calculateFilterSelectivity(e,
false).map(5.0 - _)).getOrElse(4.0)
+ case e if !conjuction =>
+ filterEstimation.flatMap(_.calculateFilterSelectivity(e,
false).map(4.0 + _)).getOrElse(4.0)
+
}
private def reorderPredicates(e: Expression, estimation:
Option[FilterEstimation]): Expression = {
@@ -47,10 +53,16 @@ object PredicateReorder extends Rule[LogicalPlan] with
PredicateHelper {
case _: Or =>
splitDisjunctivePredicates(e)
.map(reorderPredicates(_, estimation))
+ .map(e => (e, getPriority(e, estimation, conjuction = false)))
+ .sortWith(_._2 > _._2)
+ .map(_._1)
.reduceLeft(Or)
case _: And =>
splitConjunctivePredicates(e)
- .map(e => (e, getPriority(e, estimation))).sortWith(_._2 >
_._2).map(_._1)
+ .map(reorderPredicates(_, estimation))
+ .map(e => (e, getPriority(e, estimation, conjuction = true)))
+ .sortWith(_._2 > _._2)
+ .map(_._1)
.reduceLeft(And)
case _ => e
}
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]