Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/15797#discussion_r86806933
--- Diff:
sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala ---
@@ -226,106 +208,139 @@ private[spark] class HiveExternalCatalog(conf:
SparkConf, hadoopConf: Configurat
} else {
tableDefinition.storage.locationUri
}
- // Ideally we should also put `locationUri` in table properties like
provider, schema, etc.
- // However, in older version of Spark we already store table
location in storage properties
- // with key "path". Here we keep this behaviour for backward
compatibility.
- val storagePropsWithLocation = tableDefinition.storage.properties ++
- tableLocation.map("path" -> _)
-
- // converts the table metadata to Spark SQL specific format, i.e.
set data schema, names and
- // bucket specification to empty. Note that partition columns are
retained, so that we can
- // call partition-related Hive API later.
- def newSparkSQLSpecificMetastoreTable(): CatalogTable = {
- tableDefinition.copy(
- // Hive only allows directory paths as location URIs while Spark
SQL data source tables
- // also allow file paths. For non-hive-compatible format, we
should not set location URI
- // to avoid hive metastore to throw exception.
- storage = tableDefinition.storage.copy(
- locationUri = None,
- properties = storagePropsWithLocation),
- schema = tableDefinition.partitionSchema,
- bucketSpec = None,
- properties = tableDefinition.properties ++ tableProperties)
+
+ if (tableDefinition.provider.get == DDLUtils.HIVE_PROVIDER) {
+ val tableWithDataSourceProps = tableDefinition.copy(
+ // We can't leave `locationUri` empty and count on Hive
metastore to set a default table
+ // location, because Hive metastore is not case preserving and
the table name is always
+ // lower cased when appear in the default table path, which is
not expected.
+ storage = tableDefinition.storage.copy(locationUri =
tableLocation),
+ // Here we follow data source tables and put table metadata like
provider, schema, etc. in
+ // table properties, so that we can work around the Hive
metastore issue about not case
+ // preserving and make Hive serde table support mixed-case
column names.
+ properties = tableDefinition.properties ++
tableMetaToTableProps(tableDefinition))
+ client.createTable(tableWithDataSourceProps, ignoreIfExists)
+ } else {
+ createDataSourceTable(
+ tableDefinition.withNewStorage(locationUri = tableLocation),
+ ignoreIfExists)
}
+ }
+ }
- // converts the table metadata to Hive compatible format, i.e. set
the serde information.
- def newHiveCompatibleMetastoreTable(serde: HiveSerDe): CatalogTable
= {
- val location = if (tableDefinition.tableType == EXTERNAL) {
- // When we hit this branch, we are saving an external data
source table with hive
- // compatible format, which means the data source is file-based
and must have a `path`.
- require(tableDefinition.storage.locationUri.isDefined,
- "External file-based data source table must have a `path`
entry in storage properties.")
- Some(new
Path(tableDefinition.storage.locationUri.get).toUri.toString)
- } else {
- None
- }
+ private def createDataSourceTable(
--- End diff --
This method just copies the code in the above if branch:
https://github.com/apache/spark/pull/15797/files#diff-159191585e10542f013cb3a714f26075R199
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]