[jira] [Updated] (HUDI-2520) Certify sync with Hive 3

rex xiong (Jira) Sun, 13 Mar 2022 23:34:03 -0700


     [ 
https://issues.apache.org/jira/browse/HUDI-2520?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


rex xiong updated HUDI-2520:
----------------------------
    Description: 
# when execute CTAS statment，the query failed due to twice sync meta problem: 
HoodieSparkSqlWriter synced meta first time, followed by 
HoodieCatalog.createHoodieTable synced the second time when 
HoodieStagedTable.commitStagedChanges
{code:java}
create table if not exists h3_cow using hudi partitioned by (dt) options (type 
= 'cow', primaryKey = 'id,name') as select 1 as id, 'a1' as name, 20 as price, 
'2021-01-03' as dt;

22/03/14 14:26:21 ERROR [main] Utils: Aborting task
org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException: Table or 
view 'h3_cow' already exists in database 'default'
        at 
org.apache.spark.sql.hudi.command.CreateHoodieTableCommand$.createHiveDataSourceTable(CreateHoodieTableCommand.scala:172)
        at 
org.apache.spark.sql.hudi.command.CreateHoodieTableCommand$.createTableInCatalog(CreateHoodieTableCommand.scala:148)
        at 
org.apache.spark.sql.hudi.catalog.HoodieCatalog.createHoodieTable(HoodieCatalog.scala:254)
        at 
org.apache.spark.sql.hudi.catalog.HoodieStagedTable.commitStagedChanges(HoodieStagedTable.scala:62)
        at 
org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.$anonfun$writeToTable$1(WriteToDataSourceV2Exec.scala:484)
        at 
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1496)
        at 
org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.writeToTable(WriteToDataSourceV2Exec.scala:468)
        at 
org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.writeToTable$(WriteToDataSourceV2Exec.scala:463)
        at 
org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.writeToTable(WriteToDataSourceV2Exec.scala:106)
        at 
org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.run(WriteToDataSourceV2Exec.scala:127)
        at 
org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)
        at 
org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)
        at 
org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)
        at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
        at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
        at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
        at 
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
        at 
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
        at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)

{code}

  was:
# when execute CTAS statment，the query failed due to twice sync meta problem: 
HoodieSparkSqlWriter synced meta first time, followed by 
HoodieCatalog.createHoodieTable synced the second time when 
HoodieStagedTable.commitStagedChanges
{code:java}
create table if not exists h3_cow using hudi partitioned by (dt) options (type 
= 'cow', primaryKey = 'id,name') as select 1 as id, 'a1' as name, 20 as price, 
'2021-01-03' as dt;

22/03/14 14:26:21 ERROR [main] Utils: Aborting task
org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException: Table or 
view 'h3_cow' already exists in database 'default'
        at 
org.apache.spark.sql.hudi.command.CreateHoodieTableCommand$.createHiveDataSourceTable(CreateHoodieTableCommand.scala:172)
        at 
org.apache.spark.sql.hudi.command.CreateHoodieTableCommand$.createTableInCatalog(CreateHoodieTableCommand.scala:148)
        at 
org.apache.spark.sql.hudi.catalog.HoodieCatalog.createHoodieTable(HoodieCatalog.scala:254)
        at 
org.apache.spark.sql.hudi.catalog.HoodieStagedTable.commitStagedChanges(HoodieStagedTable.scala:62)
        at 
org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.$anonfun$writeToTable$1(WriteToDataSourceV2Exec.scala:484)
        at 
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1496)
        at 
org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.writeToTable(WriteToDataSourceV2Exec.scala:468)
        at 
org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.writeToTable$(WriteToDataSourceV2Exec.scala:463)
        at 
org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.writeToTable(WriteToDataSourceV2Exec.scala:106)
        at 
org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.run(WriteToDataSourceV2Exec.scala:127)
        at 
org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)
        at 
org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)
        at 
org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)
        at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
        at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
        at 
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
        at 
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
        at 
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
        at 
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)

{code}

 #  


> Certify sync with Hive 3
> ------------------------
>
>                 Key: HUDI-2520
>                 URL: https://issues.apache.org/jira/browse/HUDI-2520
>             Project: Apache Hudi
>          Issue Type: Task
>          Components: hive, meta-sync
>            Reporter: Sagar Sumit
>            Assignee: rex xiong
>            Priority: Blocker
>             Fix For: 0.11.0
>
>
> # when execute CTAS statment，the query failed due to twice sync meta problem: 
> HoodieSparkSqlWriter synced meta first time, followed by 
> HoodieCatalog.createHoodieTable synced the second time when 
> HoodieStagedTable.commitStagedChanges
> {code:java}
> create table if not exists h3_cow using hudi partitioned by (dt) options 
> (type = 'cow', primaryKey = 'id,name') as select 1 as id, 'a1' as name, 20 as 
> price, '2021-01-03' as dt;
> 22/03/14 14:26:21 ERROR [main] Utils: Aborting task
> org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException: Table or 
> view 'h3_cow' already exists in database 'default'
>         at 
> org.apache.spark.sql.hudi.command.CreateHoodieTableCommand$.createHiveDataSourceTable(CreateHoodieTableCommand.scala:172)
>         at 
> org.apache.spark.sql.hudi.command.CreateHoodieTableCommand$.createTableInCatalog(CreateHoodieTableCommand.scala:148)
>         at 
> org.apache.spark.sql.hudi.catalog.HoodieCatalog.createHoodieTable(HoodieCatalog.scala:254)
>         at 
> org.apache.spark.sql.hudi.catalog.HoodieStagedTable.commitStagedChanges(HoodieStagedTable.scala:62)
>         at 
> org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.$anonfun$writeToTable$1(WriteToDataSourceV2Exec.scala:484)
>         at 
> org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1496)
>         at 
> org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.writeToTable(WriteToDataSourceV2Exec.scala:468)
>         at 
> org.apache.spark.sql.execution.datasources.v2.TableWriteExecHelper.writeToTable$(WriteToDataSourceV2Exec.scala:463)
>         at 
> org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.writeToTable(WriteToDataSourceV2Exec.scala:106)
>         at 
> org.apache.spark.sql.execution.datasources.v2.AtomicCreateTableAsSelectExec.run(WriteToDataSourceV2Exec.scala:127)
>         at 
> org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)
>         at 
> org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)
>         at 
> org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)
>         at 
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
>         at 
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>         at 
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>         at 
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>         at 
> org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>         at 
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>         at 
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
>         at 
> org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
>         at 
> org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
>         at 
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
>         at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
> {code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

[jira] [Updated] (HUDI-2520) Certify sync with Hive 3

Reply via email to