This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 8319cec9869 [SPARK-41355][SQL] Workaround hive table name validation 
issue
8319cec9869 is described below

commit 8319cec9869ffece624ad4117fea31d452c45e5b
Author: Kun Wan <wan...@apache.org>
AuthorDate: Mon Dec 5 16:19:21 2022 +0800

    [SPARK-41355][SQL] Workaround hive table name validation issue
    
    ### What changes were proposed in this pull request?
    
    Restore dbName and tableName in `HiveShim.getTable()` method.
    
    When we create a hive table, hive will convert the dbName and tableName in 
lower case: 
https://github.com/apache/hive/blob/release-2.3.9-rc0/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L1446-L1482
    
    And then throw an exception in `Hive.alterTable()`:
    
    
https://github.com/apache/hive/blob/release-2.3.9-rc0/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java#L623
    
    For example:
    * We want to create a table called `tAb_I`
    * Hive metastore will check if the table name is valid by 
`MetaStoreUtils.validateName(tbl.getTableName())`
    * Hive will call `HiveStringUtils.normalizeIdentifier(tbl.getTableName())` 
and then save the save the table name to lower case, **but after setting the 
local to "tr", it will be `tab_ı` which is not a valid table name**
    * When we run alter table command, we will first get the hive table from 
hive metastore which is not a valid table name.
    * Update some properties or other, and then try to save it to hive 
metastore.
    * Hive metastore will check if the table name is valid and then throw 
exception `org.apache.hadoop.hive.ql.metadata.HiveException: [tab_ı]: is not a 
valid table name`
    
    ### Why are the changes needed?
    
    Bug fix for alter table command.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Add UT
    
    Closes #38765 from wankunde/write_stats_directly2.
    
    Authored-by: Kun Wan <wan...@apache.org>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../apache/spark/sql/execution/command/DDLSuite.scala | 19 +++++++++++++++++++
 .../org/apache/spark/sql/hive/client/HiveShim.scala   |  7 ++++++-
 .../org/apache/spark/sql/hive/StatisticsSuite.scala   |  2 +-
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 6d4e907835e..1e12340d983 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1963,6 +1963,25 @@ abstract class DDLSuite extends QueryTest with 
DDLSuiteBase {
     }
   }
 
+  test(s"Support alter table command with CASE_SENSITIVE is true") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"true") {
+      withLocale("tr") {
+        val dbName = "DaTaBaSe_I"
+        withDatabase(dbName) {
+          sql(s"CREATE DATABASE $dbName")
+          sql(s"USE $dbName")
+
+          val tabName = "tAb_I"
+          withTable(tabName) {
+            sql(s"CREATE TABLE $tabName(col_I int) USING PARQUET")
+            sql(s"ALTER TABLE $tabName SET TBLPROPERTIES ('foo' = 'a')")
+            checkAnswer(sql(s"SELECT col_I FROM $tabName"), Nil)
+          }
+        }
+      }
+    }
+  }
+
   test("set command rejects SparkConf entries") {
     val ex = intercept[AnalysisException] {
       sql(s"SET ${config.CPUS_PER_TASK.key} = 4")
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 95e5582cb8c..5e5d2757e9d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -620,7 +620,12 @@ private[client] class Shim_v0_12 extends Shim with Logging 
{
       tableName: String,
       throwException: Boolean): Table = {
     recordHiveCall()
-    hive.getTable(dbName, tableName, throwException)
+    val table = hive.getTable(dbName, tableName, throwException)
+    if (table != null) {
+      table.getTTable.setTableName(tableName)
+      table.getTTable.setDbName(dbName)
+    }
+    table
   }
 
   override def getTablesByPattern(hive: Hive, dbName: String, pattern: 
String): Seq[String] = {
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index a1f34945868..4b69a01834d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -537,7 +537,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase 
with TestHiveSingleto
         }.getMessage
         assert(message.contains(
           "DS is not a valid partition column in table " +
-            
s"`$SESSION_CATALOG_NAME`.`default`.`${tableName.toLowerCase(Locale.ROOT)}`"))
+            s"`$SESSION_CATALOG_NAME`.`default`.`$tableName`"))
       }
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to