This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push: new 76576b6 [SPARK-30050][SQL] analyze table and rename table should not erase hive table bucketing info 76576b6 is described below commit 76576b649ae086e3d063d4e711400a5326e0177d Author: Liang-Chi Hsieh <vii...@gmail.com> AuthorDate: Mon Dec 2 13:40:11 2019 +0800 [SPARK-30050][SQL] analyze table and rename table should not erase hive table bucketing info ### What changes were proposed in this pull request? This patch adds Hive provider into table metadata in `HiveExternalCatalog.alterTableStats`. When we call `HiveClient.alterTable`, `alterTable` will erase if it can not find hive provider in given table metadata. Rename table also has this issue. ### Why are the changes needed? Because running `ANALYZE TABLE` on a Hive table, if the table has bucketing info, will erase existing bucket info. ### Does this PR introduce any user-facing change? Yes. After this PR, running `ANALYZE TABLE` on Hive table, won't erase existing bucketing info. ### How was this patch tested? Unit test. Closes #26685 from viirya/fix-hive-bucket. Lead-authored-by: Liang-Chi Hsieh <vii...@gmail.com> Co-authored-by: Liang-Chi Hsieh <liang...@uber.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 85cb388ae3f25b0e6a7fc1a2d78fd1c3ec03f341) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/hive/client/HiveClientImpl.scala | 4 ++-- .../spark/sql/hive/HiveExternalCatalogSuite.scala | 26 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 8b33ff2..701f108 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -49,7 +49,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} import org.apache.spark.sql.execution.QueryExecutionException -import org.apache.spark.sql.execution.command.DDLUtils +import org.apache.spark.sql.hive.HiveExternalCatalog import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX} import org.apache.spark.sql.hive.client.HiveClientImpl._ import org.apache.spark.sql.internal.SQLConf @@ -961,7 +961,7 @@ private[hive] object HiveClientImpl { } table.bucketSpec match { - case Some(bucketSpec) if DDLUtils.isHiveTable(table) => + case Some(bucketSpec) if !HiveExternalCatalog.isDatasourceTable(table) => hiveTable.setNumBuckets(bucketSpec.numBuckets) hiveTable.setBucketCols(bucketSpec.bucketColumnNames.toList.asJava) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala index 4662300..79e569b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala @@ -108,6 +108,32 @@ class HiveExternalCatalogSuite extends ExternalCatalogSuite { assert(bucketString.contains("10")) } + test("SPARK-30050: analyze/rename table should not erase the bucketing metadata at hive side") { + val catalog = newBasicCatalog() + externalCatalog.client.runSqlHive( + """ + |CREATE TABLE db1.t(a string, b string) + |CLUSTERED BY (a, b) SORTED BY (a, b) INTO 10 BUCKETS + |STORED AS PARQUET + """.stripMargin) + + val bucketString1 = externalCatalog.client.runSqlHive("DESC FORMATTED db1.t") + .filter(_.contains("Num Buckets")).head + assert(bucketString1.contains("10")) + + catalog.alterTableStats("db1", "t", None) + + val bucketString2 = externalCatalog.client.runSqlHive("DESC FORMATTED db1.t") + .filter(_.contains("Num Buckets")).head + assert(bucketString2.contains("10")) + + catalog.renameTable("db1", "t", "t2") + + val bucketString3 = externalCatalog.client.runSqlHive("DESC FORMATTED db1.t2") + .filter(_.contains("Num Buckets")).head + assert(bucketString3.contains("10")) + } + test("SPARK-23001: NullPointerException when running desc database") { val catalog = newBasicCatalog() catalog.createDatabase(newDb("dbWithNullDesc").copy(description = null), ignoreIfExists = false) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org