Repository: spark Updated Branches: refs/heads/branch-2.0 f1e9d2d92 -> 4253ba71b
[SPARK-15901][SQL][TEST] Verification of CONVERT_METASTORE_ORC and CONVERT_METASTORE_PARQUET #### What changes were proposed in this pull request? So far, we do not have test cases for verifying whether the external parameters `HiveUtils .CONVERT_METASTORE_ORC` and `HiveUtils.CONVERT_METASTORE_PARQUET` properly works when users use non-default values. This PR is to add such test cases for avoiding potential regression. #### How was this patch tested? N/A Author: gatorsmile <gatorsm...@gmail.com> Closes #13622 from gatorsmile/addTestCase4parquetOrcConversion. (cherry picked from commit 09925735b5e53db61ed12abae58864670a3a5f98) Signed-off-by: Cheng Lian <l...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4253ba71 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4253ba71 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4253ba71 Branch: refs/heads/branch-2.0 Commit: 4253ba71b6d291f0fcb3b67048ff915595c83c9e Parents: f1e9d2d Author: gatorsmile <gatorsm...@gmail.com> Authored: Wed Jun 15 14:08:55 2016 -0700 Committer: Cheng Lian <l...@databricks.com> Committed: Wed Jun 15 14:09:05 2016 -0700 ---------------------------------------------------------------------- .../spark/sql/hive/orc/OrcQuerySuite.scala | 75 +++++++++++--------- .../apache/spark/sql/hive/parquetSuites.scala | 40 +++++++++++ 2 files changed, 83 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/4253ba71/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala index e6c9c5d..cd41da7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.hive.orc -import java.io.File import java.nio.charset.StandardCharsets import org.scalatest.BeforeAndAfterAll @@ -25,7 +24,7 @@ import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.datasources.LogicalRelation -import org.apache.spark.sql.hive.HiveUtils +import org.apache.spark.sql.hive.{HiveUtils, MetastoreRelation} import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ import org.apache.spark.sql.internal.SQLConf @@ -401,36 +400,48 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest { } } - test("SPARK-14070 Use ORC data source for SQL queries on ORC tables") { - withTempPath { dir => - withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> "true", - HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { - val path = dir.getCanonicalPath - - withTable("dummy_orc") { - withTempTable("single") { - spark.sql( - s"""CREATE TABLE dummy_orc(key INT, value STRING) - |STORED AS ORC - |LOCATION '$path' - """.stripMargin) - - val singleRowDF = Seq((0, "foo")).toDF("key", "value").coalesce(1) - singleRowDF.createOrReplaceTempView("single") - - spark.sql( - s"""INSERT INTO TABLE dummy_orc - |SELECT key, value FROM single - """.stripMargin) - - val df = spark.sql("SELECT * FROM dummy_orc WHERE key=0") - checkAnswer(df, singleRowDF) - - val queryExecution = df.queryExecution - queryExecution.analyzed.collectFirst { - case _: LogicalRelation => () - }.getOrElse { - fail(s"Expecting the query plan to have LogicalRelation, but got:\n$queryExecution") + test("Verify the ORC conversion parameter: CONVERT_METASTORE_ORC") { + withTempTable("single") { + val singleRowDF = Seq((0, "foo")).toDF("key", "value") + singleRowDF.createOrReplaceTempView("single") + + Seq("true", "false").foreach { orcConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> orcConversion) { + withTable("dummy_orc") { + withTempPath { dir => + val path = dir.getCanonicalPath + spark.sql( + s""" + |CREATE TABLE dummy_orc(key INT, value STRING) + |STORED AS ORC + |LOCATION '$path' + """.stripMargin) + + spark.sql( + s""" + |INSERT INTO TABLE dummy_orc + |SELECT key, value FROM single + """.stripMargin) + + val df = spark.sql("SELECT * FROM dummy_orc WHERE key=0") + checkAnswer(df, singleRowDF) + + val queryExecution = df.queryExecution + if (orcConversion == "true") { + queryExecution.analyzed.collectFirst { + case _: LogicalRelation => () + }.getOrElse { + fail(s"Expecting the query plan to convert orc to data sources, " + + s"but got:\n$queryExecution") + } + } else { + queryExecution.analyzed.collectFirst { + case _: MetastoreRelation => () + }.getOrElse { + fail(s"Expecting no conversion from orc to data sources, " + + s"but got:\n$queryExecution") + } + } } } } http://git-wip-us.apache.org/repos/asf/spark/blob/4253ba71/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala index 06b74da..6af9976 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala @@ -676,6 +676,46 @@ class ParquetSourceSuite extends ParquetPartitioningTest { } } + test("Verify the PARQUET conversion parameter: CONVERT_METASTORE_PARQUET") { + withTempTable("single") { + val singleRowDF = Seq((0, "foo")).toDF("key", "value") + singleRowDF.createOrReplaceTempView("single") + + Seq("true", "false").foreach { parquetConversion => + withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { + val tableName = "test_parquet_ctas" + withTable(tableName) { + sql( + s""" + |CREATE TABLE $tableName STORED AS PARQUET + |AS SELECT tmp.key, tmp.value FROM single tmp + """.stripMargin) + + val df = spark.sql(s"SELECT * FROM $tableName WHERE key=0") + checkAnswer(df, singleRowDF) + + val queryExecution = df.queryExecution + if (parquetConversion == "true") { + queryExecution.analyzed.collectFirst { + case _: LogicalRelation => + }.getOrElse { + fail(s"Expecting the query plan to convert parquet to data sources, " + + s"but got:\n$queryExecution") + } + } else { + queryExecution.analyzed.collectFirst { + case _: MetastoreRelation => + }.getOrElse { + fail(s"Expecting no conversion from parquet to data sources, " + + s"but got:\n$queryExecution") + } + } + } + } + } + } + } + test("values in arrays and maps stored in parquet are always nullable") { val df = createDataFrame(Tuple2(Map(2 -> 3), Seq(4, 5, 6)) :: Nil).toDF("m", "a") val mapType1 = MapType(IntegerType, IntegerType, valueContainsNull = false) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org