Repository: spark
Updated Branches:
  refs/heads/master c8706980a -> cd91f9671


[SPARK-20175][SQL] Exists should not be evaluated in Join operator

## What changes were proposed in this pull request?

Similar to `ListQuery`, `Exists` should not be evaluated in `Join` operator too.

## How was this patch tested?

Jenkins tests.

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: Liang-Chi Hsieh <[email protected]>

Closes #17491 from viirya/dont-push-exists-to-join.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cd91f967
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cd91f967
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cd91f967

Branch: refs/heads/master
Commit: cd91f967145909852d9af09b10b80f86ed05edb5
Parents: c870698
Author: Liang-Chi Hsieh <[email protected]>
Authored: Tue Apr 11 20:33:10 2017 +0800
Committer: Wenchen Fan <[email protected]>
Committed: Tue Apr 11 20:33:10 2017 +0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/expressions/predicates.scala       |  3 ++-
 .../test/scala/org/apache/spark/sql/SubquerySuite.scala   | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/cd91f967/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 8acb740..5034566 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -92,11 +92,12 @@ trait PredicateHelper {
   protected def canEvaluateWithinJoin(expr: Expression): Boolean = expr match {
     // Non-deterministic expressions are not allowed as join conditions.
     case e if !e.deterministic => false
-    case l: ListQuery =>
+    case _: ListQuery | _: Exists =>
       // A ListQuery defines the query which we want to search in an IN 
subquery expression.
       // Currently the only way to evaluate an IN subquery is to convert it to 
a
       // LeftSemi/LeftAnti/ExistenceJoin by `RewritePredicateSubquery` rule.
       // It cannot be evaluated as part of a Join operator.
+      // An Exists shouldn't be push into a Join operator too.
       false
     case e: SubqueryExpression =>
       // non-correlated subquery will be replaced as literal

http://git-wip-us.apache.org/repos/asf/spark/blob/cd91f967/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 5fe6667..0f0199c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -844,4 +844,14 @@ class SubquerySuite extends QueryTest with 
SharedSQLContext {
         Row(0) :: Row(1) :: Nil)
     }
   }
+
+  test("ListQuery and Exists should work even no correlated references") {
+    checkAnswer(
+      sql("select * from l, r where l.a = r.c AND (r.d in (select d from r) OR 
l.a >= 1)"),
+      Row(2, 1.0, 2, 3.0) :: Row(2, 1.0, 2, 3.0) :: Row(2, 1.0, 2, 3.0) ::
+        Row(2, 1.0, 2, 3.0) :: Row(3.0, 3.0, 3, 2.0) :: Row(6, null, 6, null) 
:: Nil)
+    checkAnswer(
+      sql("select * from l, r where l.a = r.c + 1 AND (exists (select * from 
r) OR l.a = r.c)"),
+      Row(3, 3.0, 2, 3.0) :: Row(3, 3.0, 2, 3.0) :: Nil)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to