maryannxue commented on a change in pull request #34062:
URL: https://github.com/apache/spark/pull/34062#discussion_r780403353
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala
##########
@@ -216,11 +216,53 @@ object PartitionPruning extends Rule[LogicalPlan] with
PredicateHelper with Join
}
/**
- * Search a filtering predicate in a given logical plan
+ * Returns whether an expression is likely to be selective in dynamic
partition filtering.
+ * 1. the predicate is selective.
+ * 2. the filtering predicate must not be subset of join key. In case it is,
then partition
+ * filter can be inferred statically in optimization phase, hence return
false.
*/
- private def hasSelectivePredicate(plan: LogicalPlan): Boolean = {
+ private def isLikelySelectiveWithInferFiltersEnabled(
+ e: Expression,
+ joinKey: Expression): (Boolean, Boolean) = e match {
+ case Not(expr) => isLikelySelectiveWithInferFiltersEnabled(expr, joinKey)
+ case And(l, r) =>
+ val (isSelectiveLeft, notSubsetOfJoinKeyLeft) =
+ isLikelySelectiveWithInferFiltersEnabled(l, joinKey)
+ val (isSelectiveRight, notSubsetOfJoinKeyRight) =
+ isLikelySelectiveWithInferFiltersEnabled(r, joinKey)
+ (isSelectiveLeft || isSelectiveRight, notSubsetOfJoinKeyLeft ||
notSubsetOfJoinKeyRight)
+ case Or(l, r) =>
+ val (isSelectiveLeft, notSubsetOfJoinKeyLeft) =
+ isLikelySelectiveWithInferFiltersEnabled(l, joinKey)
+ val (isSelectiveRight, notSubsetOfJoinKeyRight) =
+ isLikelySelectiveWithInferFiltersEnabled(r, joinKey)
+ (isSelectiveLeft && isSelectiveRight && (notSubsetOfJoinKeyLeft ||
notSubsetOfJoinKeyRight),
+ notSubsetOfJoinKeyLeft || notSubsetOfJoinKeyRight)
+ case expr: StringRegexExpression => (true, !isSubsetOfJoinKey(expr,
joinKey))
+ case expr: BinaryComparison => (true, !isSubsetOfJoinKey(expr, joinKey))
+ case expr: In => (true, !isSubsetOfJoinKey(expr, joinKey))
+ case expr: InSet => (true, !isSubsetOfJoinKey(expr, joinKey))
+ case expr: StringPredicate => (true, !isSubsetOfJoinKey(expr, joinKey))
+ case expr: MultiLikeBase => (true, !isSubsetOfJoinKey(expr, joinKey))
+ case _ => (false, false)
+ }
+
+ private def isSubsetOfJoinKey(e: Expression, joinKey: Expression): Boolean =
+ e.references.subsetOf(joinKey.references)
+
+ /**
+ * Search a filtering predicate in a given logical plan.
+ */
+ private def hasSelectivePredicate(plan: LogicalPlan, joinKey: Expression):
Boolean = {
Review comment:
Do we need to make this so complicated? This is one of the last rules
applied and by this time all static filters have been implied and pushed down?
Can't we simply test if there's any static equality-condition filters on the
same partition column that the dynamic filter would apply on?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]