This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 8319cec9869 [SPARK-41355][SQL] Workaround hive table name validation issue 8319cec9869 is described below commit 8319cec9869ffece624ad4117fea31d452c45e5b Author: Kun Wan <wan...@apache.org> AuthorDate: Mon Dec 5 16:19:21 2022 +0800 [SPARK-41355][SQL] Workaround hive table name validation issue ### What changes were proposed in this pull request? Restore dbName and tableName in `HiveShim.getTable()` method. When we create a hive table, hive will convert the dbName and tableName in lower case: https://github.com/apache/hive/blob/release-2.3.9-rc0/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L1446-L1482 And then throw an exception in `Hive.alterTable()`: https://github.com/apache/hive/blob/release-2.3.9-rc0/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java#L623 For example: * We want to create a table called `tAb_I` * Hive metastore will check if the table name is valid by `MetaStoreUtils.validateName(tbl.getTableName())` * Hive will call `HiveStringUtils.normalizeIdentifier(tbl.getTableName())` and then save the save the table name to lower case, **but after setting the local to "tr", it will be `tab_ı` which is not a valid table name** * When we run alter table command, we will first get the hive table from hive metastore which is not a valid table name. * Update some properties or other, and then try to save it to hive metastore. * Hive metastore will check if the table name is valid and then throw exception `org.apache.hadoop.hive.ql.metadata.HiveException: [tab_ı]: is not a valid table name` ### Why are the changes needed? Bug fix for alter table command. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add UT Closes #38765 from wankunde/write_stats_directly2. Authored-by: Kun Wan <wan...@apache.org> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/execution/command/DDLSuite.scala | 19 +++++++++++++++++++ .../org/apache/spark/sql/hive/client/HiveShim.scala | 7 ++++++- .../org/apache/spark/sql/hive/StatisticsSuite.scala | 2 +- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 6d4e907835e..1e12340d983 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1963,6 +1963,25 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase { } } + test(s"Support alter table command with CASE_SENSITIVE is true") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"true") { + withLocale("tr") { + val dbName = "DaTaBaSe_I" + withDatabase(dbName) { + sql(s"CREATE DATABASE $dbName") + sql(s"USE $dbName") + + val tabName = "tAb_I" + withTable(tabName) { + sql(s"CREATE TABLE $tabName(col_I int) USING PARQUET") + sql(s"ALTER TABLE $tabName SET TBLPROPERTIES ('foo' = 'a')") + checkAnswer(sql(s"SELECT col_I FROM $tabName"), Nil) + } + } + } + } + } + test("set command rejects SparkConf entries") { val ex = intercept[AnalysisException] { sql(s"SET ${config.CPUS_PER_TASK.key} = 4") diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala index 95e5582cb8c..5e5d2757e9d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala @@ -620,7 +620,12 @@ private[client] class Shim_v0_12 extends Shim with Logging { tableName: String, throwException: Boolean): Table = { recordHiveCall() - hive.getTable(dbName, tableName, throwException) + val table = hive.getTable(dbName, tableName, throwException) + if (table != null) { + table.getTTable.setTableName(tableName) + table.getTTable.setDbName(dbName) + } + table } override def getTablesByPattern(hive: Hive, dbName: String, pattern: String): Seq[String] = { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index a1f34945868..4b69a01834d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -537,7 +537,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto }.getMessage assert(message.contains( "DS is not a valid partition column in table " + - s"`$SESSION_CATALOG_NAME`.`default`.`${tableName.toLowerCase(Locale.ROOT)}`")) + s"`$SESSION_CATALOG_NAME`.`default`.`$tableName`")) } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org