spark git commit: [SPARK-22759][SQL] Filters can be combined iff both are deterministic

lixiao Tue, 12 Dec 2017 22:49:46 -0800

Repository: spark
Updated Branches:
  refs/heads/master 6b80ce4fb -> 13e489b67



[SPARK-22759][SQL] Filters can be combined iff both are deterministic

## What changes were proposed in this pull request?
The query execution/optimization does not guarantee the expressions are 
evaluated in order. We only can combine them if and only if both are 
deterministic. We need to update the optimizer rule: CombineFilters.

## How was this patch tested?
Updated the existing tests.

Author: gatorsmile <[email protected]>

Closes #19947 from gatorsmile/combineFilters.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/13e489b6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/13e489b6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/13e489b6

Branch: refs/heads/master
Commit: 13e489b6754f4d3569dad99bf5be2d5b0914dd68
Parents: 6b80ce4
Author: gatorsmile <[email protected]>
Authored: Tue Dec 12 22:48:31 2017 -0800
Committer: gatorsmile <[email protected]>
Committed: Tue Dec 12 22:48:31 2017 -0800

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/optimizer/Optimizer.scala |  4 +++-
 .../sql/catalyst/optimizer/FilterPushdownSuite.scala    | 12 ++++--------
 2 files changed, 7 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/13e489b6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 484cd8c..5776935 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -695,7 +695,9 @@ object CombineUnions extends Rule[LogicalPlan] {
  */
 object CombineFilters extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case Filter(fc, nf @ Filter(nc, grandChild)) =>
+    // The query execution/optimization does not guarantee the expressions are 
evaluated in order.
+    // We only can combine them if and only if both are deterministic.
+    case Filter(fc, nf @ Filter(nc, grandChild)) if fc.deterministic && 
nc.deterministic =>
       (ExpressionSet(splitConjunctivePredicates(fc)) --
         ExpressionSet(splitConjunctivePredicates(nc))).reduceOption(And) match 
{
         case Some(ac) =>

http://git-wip-us.apache.org/repos/asf/spark/blob/13e489b6/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index de0e7c7..641824e 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -94,19 +94,15 @@ class FilterPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("combine redundant deterministic filters") {
+  test("do not combine non-deterministic filters even if they are identical") {
     val originalQuery =
       testRelation
         .where(Rand(0) > 0.1 && 'a === 1)
-        .where(Rand(0) > 0.1 && 'a === 1)
+        .where(Rand(0) > 0.1 && 'a === 1).analyze
 
-    val optimized = Optimize.execute(originalQuery.analyze)
-    val correctAnswer =
-      testRelation
-        .where(Rand(0) > 0.1 && 'a === 1 && Rand(0) > 0.1)
-        .analyze
+    val optimized = Optimize.execute(originalQuery)
 
-    comparePlans(optimized, correctAnswer)
+    comparePlans(optimized, originalQuery)
   }
 
   test("SPARK-16164: Filter pushdown should keep the ordering in the logical 
plan") {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-22759][SQL] Filters can be combined iff both are deterministic

Reply via email to