Repository: spark
Updated Branches:
  refs/heads/master 170eeb345 -> 07b3f045c


[SPARK-18464][SQL] support old table which doesn't store schema in metastore

## What changes were proposed in this pull request?

Before Spark 2.1, users can create an external data source table without 
schema, and we will infer the table schema at runtime. In Spark 2.1, we decided 
to infer the schema when the table was created, so that we don't need to infer 
it again and again at runtime.

This is a good improvement, but we should still respect and support old tables 
which doesn't store table schema in metastore.

## How was this patch tested?

regression test.

Author: Wenchen Fan <wenc...@databricks.com>

Closes #15900 from cloud-fan/hive-catalog.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/07b3f045
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/07b3f045
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/07b3f045

Branch: refs/heads/master
Commit: 07b3f045cd6f79b92bc86b3b1b51d3d5e6bd37ce
Parents: 170eeb3
Author: Wenchen Fan <wenc...@databricks.com>
Authored: Thu Nov 17 00:00:38 2016 -0800
Committer: Reynold Xin <r...@databricks.com>
Committed: Thu Nov 17 00:00:38 2016 -0800

----------------------------------------------------------------------
 .../spark/sql/execution/command/tables.scala    |  8 ++++++-
 .../spark/sql/hive/HiveExternalCatalog.scala    |  5 +++++
 .../spark/sql/hive/HiveMetastoreCatalog.scala   |  4 +++-
 .../sql/hive/MetastoreDataSourcesSuite.scala    | 22 ++++++++++++++++++++
 4 files changed, 37 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/07b3f045/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 119e732..7049e53 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -431,7 +431,13 @@ case class DescribeTableCommand(
       describeSchema(catalog.lookupRelation(table).schema, result)
     } else {
       val metadata = catalog.getTableMetadata(table)
-      describeSchema(metadata.schema, result)
+      if (metadata.schema.isEmpty) {
+        // In older version(prior to 2.1) of Spark, the table schema can be 
empty and should be
+        // inferred at runtime. We should still support it.
+        describeSchema(catalog.lookupRelation(metadata.identifier).schema, 
result)
+      } else {
+        describeSchema(metadata.schema, result)
+      }
 
       describePartitionInfo(metadata, result)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/07b3f045/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index cbd00da..8433058 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -1023,6 +1023,11 @@ object HiveExternalCatalog {
       // After SPARK-6024, we removed this flag.
       // Although we are not using `spark.sql.sources.schema` any more, we 
need to still support.
       DataType.fromJson(schema.get).asInstanceOf[StructType]
+    } else if 
(props.filterKeys(_.startsWith(DATASOURCE_SCHEMA_PREFIX)).isEmpty) {
+      // If there is no schema information in table properties, it means the 
schema of this table
+      // was empty when saving into metastore, which is possible in older 
version(prior to 2.1) of
+      // Spark. We should respect it.
+      new StructType()
     } else {
       val numSchemaParts = props.get(DATASOURCE_SCHEMA_NUMPARTS)
       if (numSchemaParts.isDefined) {

http://git-wip-us.apache.org/repos/asf/spark/blob/07b3f045/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 8e5fc88..edbde5d 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -64,7 +64,9 @@ private[hive] class HiveMetastoreCatalog(sparkSession: 
SparkSession) extends Log
         val dataSource =
           DataSource(
             sparkSession,
-            userSpecifiedSchema = Some(table.schema),
+            // In older version(prior to 2.1) of Spark, the table schema can 
be empty and should be
+            // inferred at runtime. We should still support it.
+            userSpecifiedSchema = if (table.schema.isEmpty) None else 
Some(table.schema),
             partitionColumns = table.partitionColumnNames,
             bucketSpec = table.bucketSpec,
             className = table.provider.get,

http://git-wip-us.apache.org/repos/asf/spark/blob/07b3f045/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index c50f92e..4ab1a54 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -1371,4 +1371,26 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
       }
     }
   }
+
+  test("SPARK-18464: support old table which doesn't store schema in table 
properties") {
+    withTable("old") {
+      withTempPath { path =>
+        Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath)
+        val tableDesc = CatalogTable(
+          identifier = TableIdentifier("old", Some("default")),
+          tableType = CatalogTableType.EXTERNAL,
+          storage = CatalogStorageFormat.empty.copy(
+            properties = Map("path" -> path.getAbsolutePath)
+          ),
+          schema = new StructType(),
+          properties = Map(
+            HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet"))
+        hiveClient.createTable(tableDesc, ignoreIfExists = false)
+
+        checkAnswer(spark.table("old"), Row(1, "a"))
+
+        checkAnswer(sql("DESC old"), Row("i", "int", null) :: Row("j", 
"string", null) :: Nil)
+      }
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to