Repository: spark Updated Branches: refs/heads/master 6b80ce4fb -> 13e489b67
[SPARK-22759][SQL] Filters can be combined iff both are deterministic ## What changes were proposed in this pull request? The query execution/optimization does not guarantee the expressions are evaluated in order. We only can combine them if and only if both are deterministic. We need to update the optimizer rule: CombineFilters. ## How was this patch tested? Updated the existing tests. Author: gatorsmile <gatorsm...@gmail.com> Closes #19947 from gatorsmile/combineFilters. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/13e489b6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/13e489b6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/13e489b6 Branch: refs/heads/master Commit: 13e489b6754f4d3569dad99bf5be2d5b0914dd68 Parents: 6b80ce4 Author: gatorsmile <gatorsm...@gmail.com> Authored: Tue Dec 12 22:48:31 2017 -0800 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Tue Dec 12 22:48:31 2017 -0800 ---------------------------------------------------------------------- .../apache/spark/sql/catalyst/optimizer/Optimizer.scala | 4 +++- .../sql/catalyst/optimizer/FilterPushdownSuite.scala | 12 ++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/13e489b6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 484cd8c..5776935 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -695,7 +695,9 @@ object CombineUnions extends Rule[LogicalPlan] { */ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper { def apply(plan: LogicalPlan): LogicalPlan = plan transform { - case Filter(fc, nf @ Filter(nc, grandChild)) => + // The query execution/optimization does not guarantee the expressions are evaluated in order. + // We only can combine them if and only if both are deterministic. + case Filter(fc, nf @ Filter(nc, grandChild)) if fc.deterministic && nc.deterministic => (ExpressionSet(splitConjunctivePredicates(fc)) -- ExpressionSet(splitConjunctivePredicates(nc))).reduceOption(And) match { case Some(ac) => http://git-wip-us.apache.org/repos/asf/spark/blob/13e489b6/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala index de0e7c7..641824e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala @@ -94,19 +94,15 @@ class FilterPushdownSuite extends PlanTest { comparePlans(optimized, correctAnswer) } - test("combine redundant deterministic filters") { + test("do not combine non-deterministic filters even if they are identical") { val originalQuery = testRelation .where(Rand(0) > 0.1 && 'a === 1) - .where(Rand(0) > 0.1 && 'a === 1) + .where(Rand(0) > 0.1 && 'a === 1).analyze - val optimized = Optimize.execute(originalQuery.analyze) - val correctAnswer = - testRelation - .where(Rand(0) > 0.1 && 'a === 1 && Rand(0) > 0.1) - .analyze + val optimized = Optimize.execute(originalQuery) - comparePlans(optimized, correctAnswer) + comparePlans(optimized, originalQuery) } test("SPARK-16164: Filter pushdown should keep the ordering in the logical plan") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org