Repository: spark
Updated Branches:
  refs/heads/master c0e333dbe -> 5596ce83c


[MINOR][SQL] Additional test case for CheckCartesianProducts rule

## What changes were proposed in this pull request?

While discovering optimization rules and their test coverage, I did not find 
any tests for `CheckCartesianProducts` in the Catalyst folder. So, I decided to 
create a new test suite. Once I finished, I found a test in `JoinSuite` for 
this functionality so feel free to discard this change if it does not make much 
sense. The proposed test suite covers a few additional use cases.

Author: aokolnychyi <anton.okolnyc...@sap.com>

Closes #18909 from aokolnychyi/check-cartesian-join-tests.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5596ce83
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5596ce83
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5596ce83

Branch: refs/heads/master
Commit: 5596ce83c4d1971510dacfee4c8b0cf438475135
Parents: c0e333d
Author: aokolnychyi <anton.okolnyc...@sap.com>
Authored: Sun Aug 13 21:33:16 2017 -0700
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Sun Aug 13 21:33:16 2017 -0700

----------------------------------------------------------------------
 .../optimizer/CheckCartesianProductsSuite.scala | 91 ++++++++++++++++++++
 .../resources/sql-tests/inputs/cross-join.sql   |  3 +-
 .../sql-tests/results/cross-join.sql.out        | 12 ++-
 .../scala/org/apache/spark/sql/JoinSuite.scala  | 32 +++++++
 4 files changed, 136 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5596ce83/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala
new file mode 100644
index 0000000..21220b3
--- /dev/null
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.scalatest.Matchers._
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.internal.SQLConf.CROSS_JOINS_ENABLED
+
+class CheckCartesianProductsSuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches = Batch("Check Cartesian Products", Once, 
CheckCartesianProducts) :: Nil
+  }
+
+  val testRelation1 = LocalRelation('a.int, 'b.int)
+  val testRelation2 = LocalRelation('c.int, 'd.int)
+
+  val joinTypesWithRequiredCondition = Seq(Inner, LeftOuter, RightOuter, 
FullOuter)
+  val joinTypesWithoutRequiredCondition = Seq(LeftSemi, LeftAnti, 
ExistenceJoin('exists))
+
+  test("CheckCartesianProducts doesn't throw an exception if cross joins are 
enabled)") {
+    withSQLConf(CROSS_JOINS_ENABLED.key -> "true") {
+      noException should be thrownBy {
+        for (joinType <- joinTypesWithRequiredCondition ++ 
joinTypesWithoutRequiredCondition) {
+          performCartesianProductCheck(joinType)
+        }
+      }
+    }
+  }
+
+  test("CheckCartesianProducts throws an exception for join types that require 
a join condition") {
+    withSQLConf(CROSS_JOINS_ENABLED.key -> "false") {
+      for (joinType <- joinTypesWithRequiredCondition) {
+        val thrownException = the [AnalysisException] thrownBy {
+          performCartesianProductCheck(joinType)
+        }
+        assert(thrownException.message.contains("Detected cartesian product"))
+      }
+    }
+  }
+
+  test("CheckCartesianProducts doesn't throw an exception if a join condition 
is present") {
+    withSQLConf(CROSS_JOINS_ENABLED.key -> "false") {
+      for (joinType <- joinTypesWithRequiredCondition) {
+        noException should be thrownBy {
+          performCartesianProductCheck(joinType, Some('a === 'd))
+        }
+      }
+    }
+  }
+
+  test("CheckCartesianProducts doesn't throw an exception if join types don't 
require conditions") {
+    withSQLConf(CROSS_JOINS_ENABLED.key -> "false") {
+      for (joinType <- joinTypesWithoutRequiredCondition) {
+        noException should be thrownBy {
+          performCartesianProductCheck(joinType)
+        }
+      }
+    }
+  }
+
+  private def performCartesianProductCheck(
+      joinType: JoinType,
+      condition: Option[Expression] = None): Unit = {
+    val analyzedPlan = testRelation1.join(testRelation2, joinType, 
condition).analyze
+    val optimizedPlan = Optimize.execute(analyzedPlan)
+    comparePlans(analyzedPlan, optimizedPlan)
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/5596ce83/sql/core/src/test/resources/sql-tests/inputs/cross-join.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cross-join.sql 
b/sql/core/src/test/resources/sql-tests/inputs/cross-join.sql
index aa73124..b64197e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cross-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cross-join.sql
@@ -32,4 +32,5 @@ create temporary view D(d, vd) as select * from nt1;
 
 -- Allowed since cross join with C is explicit
 select * from ((A join B on (a = b)) cross join C) join D on (a = d);
-
+-- Cross joins with non-equal predicates
+SELECT * FROM nt1 CROSS JOIN nt2 ON (nt1.k > nt2.k);

http://git-wip-us.apache.org/repos/asf/spark/blob/5596ce83/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out 
b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out
index 562e174..e75cc44 100644
--- a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 12
+-- Number of queries: 13
 
 
 -- !query 0
@@ -127,3 +127,13 @@ three      3       three   3       two     2       three   
3
 two    2       two     2       one     1       two     2
 two    2       two     2       three   3       two     2
 two    2       two     2       two     2       two     2
+
+-- !query 12
+SELECT * FROM nt1 CROSS JOIN nt2 ON (nt1.k > nt2.k)
+-- !query 12 schema
+struct<k:string,v1:int,k:string,v2:int>
+-- !query 12 output
+three  3       one     1
+three  3       one     5
+two    2       one     1
+two    2       one     5

http://git-wip-us.apache.org/repos/asf/spark/blob/5596ce83/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 0008d50..3f0c864 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -216,6 +216,9 @@ class JoinSuite extends QueryTest with SharedSQLContext {
           Row(1, null, 2, 2) ::
           Row(2, 2, 1, null) ::
           Row(2, 2, 2, 2) :: Nil)
+      checkAnswer(
+        testData3.as("x").join(testData3.as("y"), $"x.a" > $"y.a"),
+        Row(2, 2, 1, null) :: Nil)
     }
     withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") {
       val e = intercept[Exception] {
@@ -604,6 +607,35 @@ class JoinSuite extends QueryTest with SharedSQLContext {
     }
 
     cartesianQueries.foreach(checkCartesianDetection)
+
+    // Check that left_semi, left_anti, existence joins without conditions do 
not throw
+    // an exception if cross joins are disabled
+    withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") {
+      checkAnswer(
+        sql("SELECT * FROM testData3 LEFT SEMI JOIN testData2"),
+        Row(1, null) :: Row (2, 2) :: Nil)
+      checkAnswer(
+        sql("SELECT * FROM testData3 LEFT ANTI JOIN testData2"),
+        Nil)
+      checkAnswer(
+        sql(
+          """
+            |SELECT a FROM testData3
+            |WHERE
+            |  EXISTS (SELECT * FROM testData)
+            |OR
+            |  EXISTS (SELECT * FROM testData2)""".stripMargin),
+        Row(1) :: Row(2) :: Nil)
+      checkAnswer(
+        sql(
+          """
+            |SELECT key FROM testData
+            |WHERE
+            |  key IN (SELECT a FROM testData2)
+            |OR
+            |  key IN (SELECT a FROM testData3)""".stripMargin),
+        Row(1) :: Row(2) :: Row(3) :: Nil)
+    }
   }
 
   test("test SortMergeJoin (without spill)") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to