Repository: spark Updated Branches: refs/heads/master 9deddbb13 -> a81ef9e1f
[SPARK-25418][SQL] The metadata of DataSource table should not include Hive-generated storage properties. ## What changes were proposed in this pull request? When Hive support enabled, Hive catalog puts extra storage properties into table metadata even for DataSource tables, but we should not have them. ## How was this patch tested? Modified a test. Closes #22410 from ueshin/issues/SPARK-25418/hive_metadata. Authored-by: Takuya UESHIN <ues...@databricks.com> Signed-off-by: gatorsmile <gatorsm...@gmail.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a81ef9e1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a81ef9e1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a81ef9e1 Branch: refs/heads/master Commit: a81ef9e1f9bea79aab4a72a5efff69193ee386de Parents: 9deddbb Author: Takuya UESHIN <ues...@databricks.com> Authored: Thu Sep 13 22:22:00 2018 -0700 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Thu Sep 13 22:22:00 2018 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala | 7 ++++++- .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/a81ef9e1/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 5cc1047..505124a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -28,6 +28,7 @@ import scala.util.control.NonFatal import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.hive.ql.metadata.HiveException +import org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT import org.apache.thrift.TException import org.apache.spark.{SparkConf, SparkException} @@ -806,6 +807,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat updateLocationInStorageProps(table, newPath = None).copy( locationUri = tableLocation.map(CatalogUtils.stringToURI(_))) } + val storageWithoutHiveGeneratedProperties = storageWithLocation.copy( + properties = storageWithLocation.properties.filterKeys(!HIVE_GENERATED_STORAGE_PROPERTIES(_))) val partitionProvider = table.properties.get(TABLE_PARTITION_PROVIDER) val schemaFromTableProps = getSchemaFromTableProperties(table) @@ -814,7 +817,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat table.copy( provider = Some(provider), - storage = storageWithLocation, + storage = storageWithoutHiveGeneratedProperties, schema = reorderedSchema, partitionColumnNames = partColumnNames, bucketSpec = getBucketSpecFromTableProperties(table), @@ -1309,6 +1312,8 @@ object HiveExternalCatalog { val CREATED_SPARK_VERSION = SPARK_SQL_PREFIX + "create.version" + val HIVE_GENERATED_STORAGE_PROPERTIES = Set(SERIALIZATION_FORMAT) + // When storing data source tables in hive metastore, we need to set data schema to empty if the // schema is hive-incompatible. However we need a hack to preserve existing behavior. Before // Spark 2.0, we do not set a default serde here (this was done in Hive), and so if the user http://git-wip-us.apache.org/repos/asf/spark/blob/a81ef9e1/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 69ee2bb..be1aa83 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -72,7 +72,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA outputFormat = serde.get.outputFormat, serde = serde.get.serde, compressed = false, - properties = Map("serialization.format" -> "1")) + properties = Map.empty) } else { CatalogStorageFormat( locationUri = Some(catalog.defaultTablePath(name)), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org