spark git commit: [SPARK-12236][SQL] JDBC filter tests all pass if filters are not really pushed down

marmbrus Tue, 15 Dec 2015 17:03:07 -0800

Repository: spark
Updated Branches:
  refs/heads/master 86ea64dd1 -> 28112657e



[SPARK-12236][SQL] JDBC filter tests all pass if filters are not really pushed 
down

https://issues.apache.org/jira/browse/SPARK-12236
Currently JDBC filters are not tested properly. All the tests pass even if the 
filters are not pushed down due to Spark-side filtering.

In this PR,
Firstly, I corrected the tests to properly check the pushed down filters by 
removing Spark-side filtering.
Also, `!=` was being tested which is actually not pushed down. So I removed 
them.
Lastly, I moved the `stripSparkFilter()` function to `SQLTestUtils` as this 
functions would be shared for all tests for pushed down filters. This function 
would be also shared with ORC datasource as the filters for that are also not 
being tested properly.

Author: hyukjinkwon <gurwls...@gmail.com>

Closes #10221 from HyukjinKwon/SPARK-12236.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28112657
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28112657
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28112657

Branch: refs/heads/master
Commit: 28112657ea5919451291c21b4b8e1eb3db0ec8d4
Parents: 86ea64d
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Tue Dec 15 17:02:14 2015 -0800
Committer: Michael Armbrust <mich...@databricks.com>
Committed: Tue Dec 15 17:02:14 2015 -0800

----------------------------------------------------------------------
 .../datasources/parquet/ParquetFilterSuite.scala     | 15 ---------------
 .../scala/org/apache/spark/sql/jdbc/JDBCSuite.scala  | 10 ++++------
 .../org/apache/spark/sql/test/SQLTestUtils.scala     | 15 +++++++++++++++
 3 files changed, 19 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/28112657/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index daf41bc..6178e37 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -110,21 +110,6 @@ class ParquetFilterSuite extends QueryTest with 
ParquetTest with SharedSQLContex
     checkBinaryFilterPredicate(predicate, filterClass, Seq(Row(expected)))(df)
   }
 
-  /**
-   * Strip Spark-side filtering in order to check if a datasource filters rows 
correctly.
-   */
-  protected def stripSparkFilter(df: DataFrame): DataFrame = {
-    val schema = df.schema
-    val childRDD = df
-      .queryExecution
-      .executedPlan.asInstanceOf[org.apache.spark.sql.execution.Filter]
-      .child
-      .execute()
-      .map(row => Row.fromSeq(row.toSeq(schema)))
-
-    sqlContext.createDataFrame(childRDD, schema)
-  }
-
   test("filter pushdown - boolean") {
     withParquetDataFrame((true :: false :: Nil).map(b => 
Tuple1.apply(Option(b)))) { implicit df =>
       checkFilterPredicate('_1.isNull, classOf[Eq[_]], Seq.empty[Row])

http://git-wip-us.apache.org/repos/asf/spark/blob/28112657/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 8c24aa3..a360947 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -176,12 +176,10 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter 
with SharedSQLContext
   }
 
   test("SELECT * WHERE (simple predicates)") {
-    assert(sql("SELECT * FROM foobar WHERE THEID < 1").collect().size === 0)
-    assert(sql("SELECT * FROM foobar WHERE THEID != 2").collect().size === 2)
-    assert(sql("SELECT * FROM foobar WHERE THEID = 1").collect().size === 1)
-    assert(sql("SELECT * FROM foobar WHERE NAME = 'fred'").collect().size === 
1)
-    assert(sql("SELECT * FROM foobar WHERE NAME > 'fred'").collect().size === 
2)
-    assert(sql("SELECT * FROM foobar WHERE NAME != 'fred'").collect().size === 
2)
+    assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID < 
1")).collect().size === 0)
+    assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 
1")).collect().size === 1)
+    assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 
'fred'")).collect().size === 1)
+    assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 
'fred'")).collect().size === 2)
   }
 
   test("SELECT * WHERE (quoted strings)") {

http://git-wip-us.apache.org/repos/asf/spark/blob/28112657/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 9214569..e87da15 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -180,6 +180,21 @@ private[sql] trait SQLTestUtils
   }
 
   /**
+   * Strip Spark-side filtering in order to check if a datasource filters rows 
correctly.
+   */
+  protected def stripSparkFilter(df: DataFrame): DataFrame = {
+    val schema = df.schema
+    val childRDD = df
+      .queryExecution
+      .executedPlan.asInstanceOf[org.apache.spark.sql.execution.Filter]
+      .child
+      .execute()
+      .map(row => Row.fromSeq(row.toSeq(schema)))
+
+    sqlContext.createDataFrame(childRDD, schema)
+  }
+
+  /**
    * Turn a logical plan into a [[DataFrame]]. This should be removed once we 
have an easier
    * way to construct [[DataFrame]] directly out of local data without relying 
on implicits.
    */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-12236][SQL] JDBC filter tests all pass if filters are not really pushed down

Reply via email to