[ 
https://issues.apache.org/jira/browse/HUDI-7205?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Y Ethan Guo updated HUDI-7205:
------------------------------
    Fix Version/s: 1.1.0

> Optimize MDT table deletion
> ---------------------------
>
>                 Key: HUDI-7205
>                 URL: https://issues.apache.org/jira/browse/HUDI-7205
>             Project: Apache Hudi
>          Issue Type: Improvement
>          Components: metadata
>            Reporter: sivabalan narayanan
>            Priority: Major
>             Fix For: 1.1.0
>
>
> Hudi tries to honor MDT disablement as part of every write. But the deletion 
> is triggered everytime even if the table does not exist and all configs are 
> already disabled. 
>  
> This results in updating hoodie.properties repeatedly and can run into 
> concurrency issues. 
>  
> {code:java}
> 23/12/07 04:34:32 ERROR DagScheduler: Exception executing node
> org.apache.hudi.exception.HoodieIOException: Error updating table configs.
>         at 
> org.apache.hudi.common.table.HoodieTableConfig.modify(HoodieTableConfig.java:445)
>         at 
> org.apache.hudi.common.table.HoodieTableConfig.update(HoodieTableConfig.java:454)
>         at 
> org.apache.hudi.common.table.HoodieTableConfig.setMetadataPartitionState(HoodieTableConfig.java:780)
>         at 
> org.apache.hudi.common.table.HoodieTableConfig.clearMetadataPartitions(HoodieTableConfig.java:811)
>         at 
> org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataTable(HoodieTableMetadataUtil.java:1412)
>         at 
> org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataTable(HoodieTableMetadataUtil.java:289)
>         at 
> org.apache.hudi.table.HoodieTable.maybeDeleteMetadataTable(HoodieTable.java:953)
>         at 
> org.apache.hudi.table.HoodieSparkTable.getMetadataWriter(HoodieSparkTable.java:116)
>         at 
> org.apache.hudi.table.HoodieTable.getMetadataWriter(HoodieTable.java:905)
>         at 
> org.apache.hudi.client.BaseHoodieWriteClient.writeTableMetadata(BaseHoodieWriteClient.java:360)
>         at 
> org.apache.hudi.client.BaseHoodieWriteClient.commit(BaseHoodieWriteClient.java:286)
>         at 
> org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:236)
>         at 
> org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:104){code}
> {code:java}
>         at 
> org.apache.hudi.utilities.streamer.StreamSync.writeToSinkAndDoMetaSync(StreamSync.java:826)
>         at 
> org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:450)
>         at 
> org.apache.hudi.integ.testsuite.HoodieDeltaStreamerWrapper.upsert(HoodieDeltaStreamerWrapper.java:48)
>         at 
> org.apache.hudi.integ.testsuite.HoodieDeltaStreamerWrapper.insert(HoodieDeltaStreamerWrapper.java:52)
>         at 
> org.apache.hudi.integ.testsuite.HoodieInlineTestSuiteWriter.insert(HoodieInlineTestSuiteWriter.java:111)
>         at 
> org.apache.hudi.integ.testsuite.dag.nodes.InsertNode.ingest(InsertNode.java:70)
>         at 
> org.apache.hudi.integ.testsuite.dag.nodes.InsertNode.execute(InsertNode.java:53)
>         at 
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:135)
>         at 
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:104)
>         at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
>         at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>         at java.lang.Thread.run(Thread.java:750)
> Caused by: org.apache.hadoop.fs.FileAlreadyExistsException: 
> s3a://jenkins-infra-hudi/hudi/job-run/HudiIntegTestsDeltastreamerAsyncManualEKS/data/2023-12-07/30/MERGE_ON_READdeltastreamer-non-partitioned.yamltest-nonpartitioned.properties/91/output/.hoodie/hoodie.properties
>  already exists
>         at 
> org.apache.hadoop.fs.s3a.S3AFileSystem.create(S3AFileSystem.java:813)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195){code}
> {code:java}
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
>         at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1064)
>         at 
> org.apache.hudi.common.fs.HoodieWrapperFileSystem.lambda$create$2(HoodieWrapperFileSystem.java:238)
>         at 
> org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeMetrics(HoodieWrapperFileSystem.java:114)
>         at 
> org.apache.hudi.common.fs.HoodieWrapperFileSystem.create(HoodieWrapperFileSystem.java:237)
>         at 
> org.apache.hudi.common.table.HoodieTableConfig.recoverIfNeeded(HoodieTableConfig.java:389)
>         at 
> org.apache.hudi.common.table.HoodieTableConfig.modify(HoodieTableConfig.java:410)
>         ... 26 more
> 23/12/07 04:34:32 INFO DagScheduler: Forcing shutdown of executor service, 
> this might kill running tasks
> 23/12/07 04:34:32 ERROR HoodieTestSuiteJob: Failed to run Test Suite 
> java.util.concurrent.ExecutionException: 
> org.apache.hudi.exception.HoodieException: 
> org.apache.hudi.exception.HoodieIOException: Error updating table configs.
>         at java.util.concurrent.FutureTask.report(FutureTask.java:122)
>         at java.util.concurrent.FutureTask.get(FutureTask.java:206)
>         at 
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:112)
>         at 
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:67){code}
> {code:java}
> )
>         at 
> org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:
> 218)
>         at 
> org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:184)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>         at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:498)
>         at 
> org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
>         at 
> org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955)
>         at 
> org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
>         at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
>         at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
>         at 
> org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1043)
>         at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1052)
>         at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: org.apache.hudi.exception.HoodieException: 
> org.apache.hudi.exception.HoodieIOException: Error updating table configs.
>         at 
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:142)
>  {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to