spark git commit: [SPARK-25406][SQL] For ParquetSchemaPruningSuite.scala, move calls to `withSQLConf` inside calls to `test`

dbtsai Thu, 13 Sep 2018 10:09:02 -0700

Repository: spark
Updated Branches:
  refs/heads/master 45c4ebc81 -> a7e5aa6cd



[SPARK-25406][SQL] For ParquetSchemaPruningSuite.scala, move calls to 
`withSQLConf` inside calls to `test`

(Link to Jira: https://issues.apache.org/jira/browse/SPARK-25406)

## What changes were proposed in this pull request?

The current use of `withSQLConf` in `ParquetSchemaPruningSuite.scala` is 
incorrect. The desired configuration settings are not being set when running 
the test cases.

This PR fixes that defective usage and addresses the test failures that were 
previously masked by that defect.

## How was this patch tested?

I added code to relevant test cases to print the expected SQL configuration 
settings and found that the settings were not being set as expected. When I 
changed the order of calls to `test` and `withSQLConf` I found that the 
configuration settings were being set as expected.

Closes #22394 from mallman/spark-25406-fix_broken_schema_pruning_tests.

Authored-by: Michael Allman <m...@allman.ms>
Signed-off-by: DB Tsai <d_t...@apple.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7e5aa6c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7e5aa6c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7e5aa6c

Branch: refs/heads/master
Commit: a7e5aa6cd430d0a49bb6dac92c007fab189db3a3
Parents: 45c4ebc
Author: Michael Allman <m...@allman.ms>
Authored: Thu Sep 13 17:08:45 2018 +0000
Committer: DB Tsai <d_t...@apple.com>
Committed: Thu Sep 13 17:08:45 2018 +0000

----------------------------------------------------------------------
 .../parquet/ParquetSchemaPruningSuite.scala     | 63 ++++++++++++--------
 1 file changed, 38 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a7e5aa6c/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
index 7b132af..434c441 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
@@ -218,20 +218,24 @@ class ParquetSchemaPruningSuite
   }
 
   private def testSchemaPruning(testName: String)(testThunk: => Unit) {
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
-      test(s"Spark vectorized reader - without partition data column - 
$testName") {
+    test(s"Spark vectorized reader - without partition data column - 
$testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
         withContacts(testThunk)
       }
-      test(s"Spark vectorized reader - with partition data column - 
$testName") {
+    }
+    test(s"Spark vectorized reader - with partition data column - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
         withContactsWithDataPartitionColumn(testThunk)
       }
     }
 
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-      test(s"Parquet-mr reader - without partition data column - $testName") {
+    test(s"Parquet-mr reader - without partition data column - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
         withContacts(testThunk)
       }
-      test(s"Parquet-mr reader - with partition data column - $testName") {
+    }
+    test(s"Parquet-mr reader - with partition data column - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
         withContactsWithDataPartitionColumn(testThunk)
       }
     }
@@ -271,7 +275,7 @@ class ParquetSchemaPruningSuite
     MixedCase(1, "r1c1", MixedCaseColumn("123", 2)) ::
     Nil
 
-  testMixedCasePruning("select with exact column names") {
+  testExactCaseQueryPruning("select with exact column names") {
     val query = sql("select CoL1, coL2.B from mixedcase")
     checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
     checkAnswer(query.orderBy("id"),
@@ -280,7 +284,7 @@ class ParquetSchemaPruningSuite
       Nil)
   }
 
-  testMixedCasePruning("select with lowercase column names") {
+  testMixedCaseQueryPruning("select with lowercase column names") {
     val query = sql("select col1, col2.b from mixedcase")
     checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
     checkAnswer(query.orderBy("id"),
@@ -289,7 +293,7 @@ class ParquetSchemaPruningSuite
       Nil)
   }
 
-  testMixedCasePruning("select with different-case column names") {
+  testMixedCaseQueryPruning("select with different-case column names") {
     val query = sql("select cOL1, cOl2.b from mixedcase")
     checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
     checkAnswer(query.orderBy("id"),
@@ -298,34 +302,43 @@ class ParquetSchemaPruningSuite
       Nil)
   }
 
-  testMixedCasePruning("filter with different-case column names") {
+  testMixedCaseQueryPruning("filter with different-case column names") {
     val query = sql("select id from mixedcase where Col2.b = 2")
     checkScan(query, "struct<id:int,coL2:struct<B:int>>")
     checkAnswer(query.orderBy("id"), Row(1) :: Nil)
   }
 
-  private def testMixedCasePruning(testName: String)(testThunk: => Unit) {
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
-      SQLConf.CASE_SENSITIVE.key -> "true") {
-      test(s"Spark vectorized reader - case-sensitive parser - mixed-case 
schema - $testName") {
-          withMixedCaseData(testThunk)
+  // Tests schema pruning for a query whose column and field names are exactly 
the same as the table
+  // schema's column and field names. N.B. this implies that `testThunk` 
should pass using either a
+  // case-sensitive or case-insensitive query parser
+  private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit) {
+    test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema 
- $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
+        SQLConf.CASE_SENSITIVE.key -> "true") {
+        withMixedCaseData(testThunk)
       }
     }
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
-      SQLConf.CASE_SENSITIVE.key -> "false") {
-      test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - 
$testName") {
+    test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - 
$testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
+        SQLConf.CASE_SENSITIVE.key -> "true") {
         withMixedCaseData(testThunk)
       }
     }
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
-      SQLConf.CASE_SENSITIVE.key -> "false") {
-      test(s"Spark vectorized reader - case-insensitive parser - mixed-case 
schema - $testName") {
-          withMixedCaseData(testThunk)
+    testMixedCaseQueryPruning(testName)(testThunk)
+  }
+
+  // Tests schema pruning for a query whose column and field names may differ 
in case from the table
+  // schema's column and field names
+  private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit) {
+    test(s"Spark vectorized reader - case-insensitive parser - mixed-case 
schema - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
+        SQLConf.CASE_SENSITIVE.key -> "false") {
+        withMixedCaseData(testThunk)
       }
     }
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
-      SQLConf.CASE_SENSITIVE.key -> "true") {
-      test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - 
$testName") {
+    test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - 
$testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
+        SQLConf.CASE_SENSITIVE.key -> "false") {
         withMixedCaseData(testThunk)
       }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-25406][SQL] For ParquetSchemaPruningSuite.scala, move calls to `withSQLConf` inside calls to `test`

Reply via email to