[
https://issues.apache.org/jira/browse/HUDI-7205?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Y Ethan Guo updated HUDI-7205:
------------------------------
Fix Version/s: 1.1.0
> Optimize MDT table deletion
> ---------------------------
>
> Key: HUDI-7205
> URL: https://issues.apache.org/jira/browse/HUDI-7205
> Project: Apache Hudi
> Issue Type: Improvement
> Components: metadata
> Reporter: sivabalan narayanan
> Priority: Major
> Fix For: 1.1.0
>
>
> Hudi tries to honor MDT disablement as part of every write. But the deletion
> is triggered everytime even if the table does not exist and all configs are
> already disabled.
>
> This results in updating hoodie.properties repeatedly and can run into
> concurrency issues.
>
> {code:java}
> 23/12/07 04:34:32 ERROR DagScheduler: Exception executing node
> org.apache.hudi.exception.HoodieIOException: Error updating table configs.
> at
> org.apache.hudi.common.table.HoodieTableConfig.modify(HoodieTableConfig.java:445)
> at
> org.apache.hudi.common.table.HoodieTableConfig.update(HoodieTableConfig.java:454)
> at
> org.apache.hudi.common.table.HoodieTableConfig.setMetadataPartitionState(HoodieTableConfig.java:780)
> at
> org.apache.hudi.common.table.HoodieTableConfig.clearMetadataPartitions(HoodieTableConfig.java:811)
> at
> org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataTable(HoodieTableMetadataUtil.java:1412)
> at
> org.apache.hudi.metadata.HoodieTableMetadataUtil.deleteMetadataTable(HoodieTableMetadataUtil.java:289)
> at
> org.apache.hudi.table.HoodieTable.maybeDeleteMetadataTable(HoodieTable.java:953)
> at
> org.apache.hudi.table.HoodieSparkTable.getMetadataWriter(HoodieSparkTable.java:116)
> at
> org.apache.hudi.table.HoodieTable.getMetadataWriter(HoodieTable.java:905)
> at
> org.apache.hudi.client.BaseHoodieWriteClient.writeTableMetadata(BaseHoodieWriteClient.java:360)
> at
> org.apache.hudi.client.BaseHoodieWriteClient.commit(BaseHoodieWriteClient.java:286)
> at
> org.apache.hudi.client.BaseHoodieWriteClient.commitStats(BaseHoodieWriteClient.java:236)
> at
> org.apache.hudi.client.SparkRDDWriteClient.commit(SparkRDDWriteClient.java:104){code}
> {code:java}
> at
> org.apache.hudi.utilities.streamer.StreamSync.writeToSinkAndDoMetaSync(StreamSync.java:826)
> at
> org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:450)
> at
> org.apache.hudi.integ.testsuite.HoodieDeltaStreamerWrapper.upsert(HoodieDeltaStreamerWrapper.java:48)
> at
> org.apache.hudi.integ.testsuite.HoodieDeltaStreamerWrapper.insert(HoodieDeltaStreamerWrapper.java:52)
> at
> org.apache.hudi.integ.testsuite.HoodieInlineTestSuiteWriter.insert(HoodieInlineTestSuiteWriter.java:111)
> at
> org.apache.hudi.integ.testsuite.dag.nodes.InsertNode.ingest(InsertNode.java:70)
> at
> org.apache.hudi.integ.testsuite.dag.nodes.InsertNode.execute(InsertNode.java:53)
> at
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:135)
> at
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.lambda$execute$0(DagScheduler.java:104)
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:750)
> Caused by: org.apache.hadoop.fs.FileAlreadyExistsException:
> s3a://jenkins-infra-hudi/hudi/job-run/HudiIntegTestsDeltastreamerAsyncManualEKS/data/2023-12-07/30/MERGE_ON_READdeltastreamer-non-partitioned.yamltest-nonpartitioned.properties/91/output/.hoodie/hoodie.properties
> already exists
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem.create(S3AFileSystem.java:813)
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1195){code}
> {code:java}
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1175)
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1064)
> at
> org.apache.hudi.common.fs.HoodieWrapperFileSystem.lambda$create$2(HoodieWrapperFileSystem.java:238)
> at
> org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeMetrics(HoodieWrapperFileSystem.java:114)
> at
> org.apache.hudi.common.fs.HoodieWrapperFileSystem.create(HoodieWrapperFileSystem.java:237)
> at
> org.apache.hudi.common.table.HoodieTableConfig.recoverIfNeeded(HoodieTableConfig.java:389)
> at
> org.apache.hudi.common.table.HoodieTableConfig.modify(HoodieTableConfig.java:410)
> ... 26 more
> 23/12/07 04:34:32 INFO DagScheduler: Forcing shutdown of executor service,
> this might kill running tasks
> 23/12/07 04:34:32 ERROR HoodieTestSuiteJob: Failed to run Test Suite
> java.util.concurrent.ExecutionException:
> org.apache.hudi.exception.HoodieException:
> org.apache.hudi.exception.HoodieIOException: Error updating table configs.
> at java.util.concurrent.FutureTask.report(FutureTask.java:122)
> at java.util.concurrent.FutureTask.get(FutureTask.java:206)
> at
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.execute(DagScheduler.java:112)
> at
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.schedule(DagScheduler.java:67){code}
> {code:java}
> )
> at
> org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.runTestSuite(HoodieTestSuiteJob.java:
> 218)
> at
> org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.main(HoodieTestSuiteJob.java:184)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
> at
> org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955)
> at
> org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
> at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
> at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
> at
> org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1043)
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1052)
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: org.apache.hudi.exception.HoodieException:
> org.apache.hudi.exception.HoodieIOException: Error updating table configs.
> at
> org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler.executeNode(DagScheduler.java:142)
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)