spark git commit: [SPARK-15901][SQL][TEST] Verification of CONVERT_METASTORE_ORC and CONVERT_METASTORE_PARQUET

lian Wed, 15 Jun 2016 14:09:36 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 f1e9d2d92 -> 4253ba71b



[SPARK-15901][SQL][TEST] Verification of CONVERT_METASTORE_ORC and 
CONVERT_METASTORE_PARQUET

#### What changes were proposed in this pull request?
So far, we do not have test cases for verifying whether the external parameters 
`HiveUtils .CONVERT_METASTORE_ORC` and `HiveUtils.CONVERT_METASTORE_PARQUET` 
properly works when users use non-default values. This PR is to add such test 
cases for avoiding potential regression.

#### How was this patch tested?
N/A

Author: gatorsmile <[email protected]>

Closes #13622 from gatorsmile/addTestCase4parquetOrcConversion.

(cherry picked from commit 09925735b5e53db61ed12abae58864670a3a5f98)
Signed-off-by: Cheng Lian <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4253ba71
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4253ba71
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4253ba71

Branch: refs/heads/branch-2.0
Commit: 4253ba71b6d291f0fcb3b67048ff915595c83c9e
Parents: f1e9d2d
Author: gatorsmile <[email protected]>
Authored: Wed Jun 15 14:08:55 2016 -0700
Committer: Cheng Lian <[email protected]>
Committed: Wed Jun 15 14:09:05 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/hive/orc/OrcQuerySuite.scala      | 75 +++++++++++---------
 .../apache/spark/sql/hive/parquetSuites.scala   | 40 +++++++++++
 2 files changed, 83 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4253ba71/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
index e6c9c5d..cd41da7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.hive.orc
 
-import java.io.File
 import java.nio.charset.StandardCharsets
 
 import org.scalatest.BeforeAndAfterAll
@@ -25,7 +24,7 @@ import org.scalatest.BeforeAndAfterAll
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.sql.hive.{HiveUtils, MetastoreRelation}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
 import org.apache.spark.sql.internal.SQLConf
@@ -401,36 +400,48 @@ class OrcQuerySuite extends QueryTest with 
BeforeAndAfterAll with OrcTest {
     }
   }
 
-  test("SPARK-14070 Use ORC data source for SQL queries on ORC tables") {
-    withTempPath { dir =>
-      withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true",
-        HiveUtils.CONVERT_METASTORE_ORC.key -> "true") {
-        val path = dir.getCanonicalPath
-
-        withTable("dummy_orc") {
-          withTempTable("single") {
-            spark.sql(
-              s"""CREATE TABLE dummy_orc(key INT, value STRING)
-                  |STORED AS ORC
-                  |LOCATION '$path'
-               """.stripMargin)
-
-            val singleRowDF = Seq((0, "foo")).toDF("key", "value").coalesce(1)
-            singleRowDF.createOrReplaceTempView("single")
-
-            spark.sql(
-              s"""INSERT INTO TABLE dummy_orc
-                  |SELECT key, value FROM single
-               """.stripMargin)
-
-            val df = spark.sql("SELECT * FROM dummy_orc WHERE key=0")
-            checkAnswer(df, singleRowDF)
-
-            val queryExecution = df.queryExecution
-            queryExecution.analyzed.collectFirst {
-              case _: LogicalRelation => ()
-            }.getOrElse {
-              fail(s"Expecting the query plan to have LogicalRelation, but 
got:\n$queryExecution")
+  test("Verify the ORC conversion parameter: CONVERT_METASTORE_ORC") {
+    withTempTable("single") {
+      val singleRowDF = Seq((0, "foo")).toDF("key", "value")
+      singleRowDF.createOrReplaceTempView("single")
+
+      Seq("true", "false").foreach { orcConversion =>
+        withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> orcConversion) {
+          withTable("dummy_orc") {
+            withTempPath { dir =>
+              val path = dir.getCanonicalPath
+              spark.sql(
+                s"""
+                   |CREATE TABLE dummy_orc(key INT, value STRING)
+                   |STORED AS ORC
+                   |LOCATION '$path'
+                 """.stripMargin)
+
+              spark.sql(
+                s"""
+                   |INSERT INTO TABLE dummy_orc
+                   |SELECT key, value FROM single
+                 """.stripMargin)
+
+              val df = spark.sql("SELECT * FROM dummy_orc WHERE key=0")
+              checkAnswer(df, singleRowDF)
+
+              val queryExecution = df.queryExecution
+              if (orcConversion == "true") {
+                queryExecution.analyzed.collectFirst {
+                  case _: LogicalRelation => ()
+                }.getOrElse {
+                  fail(s"Expecting the query plan to convert orc to data 
sources, " +
+                    s"but got:\n$queryExecution")
+                }
+              } else {
+                queryExecution.analyzed.collectFirst {
+                  case _: MetastoreRelation => ()
+                }.getOrElse {
+                  fail(s"Expecting no conversion from orc to data sources, " +
+                    s"but got:\n$queryExecution")
+                }
+              }
             }
           }
         }

http://git-wip-us.apache.org/repos/asf/spark/blob/4253ba71/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 06b74da..6af9976 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -676,6 +676,46 @@ class ParquetSourceSuite extends ParquetPartitioningTest {
     }
   }
 
+  test("Verify the PARQUET conversion parameter: CONVERT_METASTORE_PARQUET") {
+    withTempTable("single") {
+      val singleRowDF = Seq((0, "foo")).toDF("key", "value")
+      singleRowDF.createOrReplaceTempView("single")
+
+      Seq("true", "false").foreach { parquetConversion =>
+        withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> 
parquetConversion) {
+          val tableName = "test_parquet_ctas"
+          withTable(tableName) {
+            sql(
+              s"""
+                 |CREATE TABLE $tableName STORED AS PARQUET
+                 |AS SELECT tmp.key, tmp.value FROM single tmp
+               """.stripMargin)
+
+            val df = spark.sql(s"SELECT * FROM $tableName WHERE key=0")
+            checkAnswer(df, singleRowDF)
+
+            val queryExecution = df.queryExecution
+            if (parquetConversion == "true") {
+              queryExecution.analyzed.collectFirst {
+                case _: LogicalRelation =>
+              }.getOrElse {
+                fail(s"Expecting the query plan to convert parquet to data 
sources, " +
+                  s"but got:\n$queryExecution")
+              }
+            } else {
+              queryExecution.analyzed.collectFirst {
+                case _: MetastoreRelation =>
+              }.getOrElse {
+                fail(s"Expecting no conversion from parquet to data sources, " 
+
+                  s"but got:\n$queryExecution")
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
   test("values in arrays and maps stored in parquet are always nullable") {
     val df = createDataFrame(Tuple2(Map(2 -> 3), Seq(4, 5, 6)) :: 
Nil).toDF("m", "a")
     val mapType1 = MapType(IntegerType, IntegerType, valueContainsNull = false)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-15901][SQL][TEST] Verification of CONVERT_METASTORE_ORC and CONVERT_METASTORE_PARQUET

Reply via email to