[
https://issues.apache.org/jira/browse/HUDI-2520?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Raymond Xu updated HUDI-2520:
-----------------------------
Status: In Progress (was: Open)
> Certify sync with Hive 3
> ------------------------
>
> Key: HUDI-2520
> URL: https://issues.apache.org/jira/browse/HUDI-2520
> Project: Apache Hudi
> Issue Type: Task
> Components: hive, meta-sync
> Reporter: Sagar Sumit
> Assignee: rex xiong
> Priority: Blocker
> Fix For: 0.11.0
>
> Attachments: image-2022-03-14-15-52-02-021.png
>
>
> # when execute CTAS statment,the query failed due to twice sync meta problem:
> HoodieSparkSqlWriter synced meta first time, followed by
> HoodieCatalog.createHoodieTable synced the second time when
> HoodieStagedTable.commitStagedChanges
> {code:java}
> create table if not exists h3_cow using hudi partitioned by (dt) options
> (type = 'cow', primaryKey = 'id,name') as select 1 as id, 'a1' as name, 20 as
> price, '2021-01-03' as dt;
> 22/03/14 14:26:21 ERROR [main] Utils: Aborting task
> org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException: Table or
> view 'h3_cow' already exists in database 'default'
> at
> org.apache.spark.sql.hudi.command.CreateHoodieTableCommand$.createHiveDataSourceTable(CreateHoodieTableCommand.scala:172)
> at
> org.apache.spark.sql.hudi.command.CreateHoodieTableCommand$.createTableInCatalog(CreateHoodieTableCommand.scala:148)
> at
> org.apache.spark.sql.hudi.catalog.HoodieCatalog.createHoodieTable(HoodieCatalog.scala:254)
> at
> org.apache.spark.sql.hudi.catalog.HoodieStagedTable.commitStagedChanges(HoodieStagedTable.scala:62)
> at
> org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.$anonfun$writeToTable$1(WriteToDataSourceV2Exec.scala:484)
> at
> org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1496)
> at
> org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.writeToTable(WriteToDataSourceV2Exec.scala:468)
> at
> org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.writeToTable$(WriteToDataSourceV2Exec.scala:463)
> at
> org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.writeToTable(WriteToDataSourceV2Exec.scala:106)
> at
> org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.run(WriteToDataSourceV2Exec.scala:127)
> at
> org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)
> at
> org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)
> at
> org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)
> at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> at
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
> at
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
> at
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481){code}
> 2. when truncate partition table,neither metadata nor data is truncated and
> truncate partition table with partition specs fails
> {code:java}
> // truncate partition table without partition spec, the query is success but
> never delete data
> spark-sql> truncate table mor_partition_table_0314;
> Time taken: 0.256 seconds
> // truncate partition table with partition spec,
> spark-sql> truncate table mor_partition_table_0314 partition(dt=3);
> Error in query: Table spark_catalog.default.mor_partition_table_0314 does not
> support partition management.;
> 'TruncatePartition unresolvedpartitionspec((dt,3), None)
> +- ResolvedTable org.apache.spark.sql.hudi.catalog.HoodieCatalog@63f609a4,
> default.mor_partition_table_0314,
> {code}
>
>
--
This message was sent by Atlassian Jira
(v8.20.1#820001)