[ 
https://issues.apache.org/jira/browse/HUDI-6679?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ethan Guo updated HUDI-6679:
----------------------------
    Description: 
When both files and record_index partitions are enabled, for the first commit 
in the data table, the transaction fails when initializing the second partition 
in the MDT.  In this case, the timelines look like below.  In this case, when 
restarting the pipeline, the rollback triggers irrelevant bootstrap rollback 
logic causing MDT to be corrupted.

DT
{code:java}
<commit_time>.commit.requested
<commit_time>.commit.inflight {code}
MDT
{code:java}
00000000000000010.deltacommit.requested
00000000000000010.deltacommit.inflight
00000000000000010.deltacommit
00000000000000011.deltacommit.requested
00000000000000011.deltacommit.inflight{code}
{code:java}
org.apache.hudi.exception.HoodieRollbackException: Failed to rollback 
s3a://<base_path>/hoodie_table commits 20230807064006967
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollback(BaseHoodieTableServiceClient.java:918)
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollback(BaseHoodieTableServiceClient.java:865)
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollbackFailedWrites(BaseHoodieTableServiceClient.java:739)
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollbackFailedWrites(BaseHoodieTableServiceClient.java:723)
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollbackFailedWrites(BaseHoodieTableServiceClient.java:718)
    at 
org.apache.hudi.client.BaseHoodieWriteClient.lambda$startCommitWithTime$97cdbdca$1(BaseHoodieWriteClient.java:928)
    at 
org.apache.hudi.common.util.CleanerUtils.rollbackFailedWrites(CleanerUtils.java:222)
    at 
org.apache.hudi.client.BaseHoodieWriteClient.startCommitWithTime(BaseHoodieWriteClient.java:927)
    at 
org.apache.hudi.client.BaseHoodieWriteClient.startCommitWithTime(BaseHoodieWriteClient.java:920)
    at 
org.apache.hudi.utilities.streamer.StreamSync.startCommit(StreamSync.java:890)
    at 
org.apache.hudi.utilities.streamer.StreamSync.writeToSink(StreamSync.java:767)
    at 
org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:445)
    at 
org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:767)
    at 
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
    at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:750)
Caused by: java.lang.IllegalArgumentException: FileGroup count for MDT 
partition files should be >0
    at 
org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:42)
    at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.prepRecords(HoodieBackedTableMetadataWriter.java:1098)
    at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commitInternal(SparkHoodieBackedTableMetadataWriter.java:135)
    at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commit(SparkHoodieBackedTableMetadataWriter.java:122)
    at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:837)
    at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:1013)
    at 
org.apache.hudi.table.action.BaseActionExecutor.lambda$writeTableMetadata$2(BaseActionExecutor.java:77)
    at org.apache.hudi.common.util.Option.ifPresent(Option.java:97)
    at 
org.apache.hudi.table.action.BaseActionExecutor.writeTableMetadata(BaseActionExecutor.java:77)
    at 
org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.finishRollback(BaseRollbackActionExecutor.java:264)
    at 
org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.runRollback(BaseRollbackActionExecutor.java:120)
    at 
org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.execute(BaseRollbackActionExecutor.java:141)
    at 
org.apache.hudi.table.HoodieSparkMergeOnReadTable.rollback(HoodieSparkMergeOnReadTable.java:218)
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollback(BaseHoodieTableServiceClient.java:901)
    ... 16 more {code}

  was:
 

 
{code:java}
org.apache.hudi.exception.HoodieRollbackException: Failed to rollback 
s3a://<base_path>/hoodie_table commits 20230807064006967
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollback(BaseHoodieTableServiceClient.java:918)
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollback(BaseHoodieTableServiceClient.java:865)
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollbackFailedWrites(BaseHoodieTableServiceClient.java:739)
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollbackFailedWrites(BaseHoodieTableServiceClient.java:723)
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollbackFailedWrites(BaseHoodieTableServiceClient.java:718)
    at 
org.apache.hudi.client.BaseHoodieWriteClient.lambda$startCommitWithTime$97cdbdca$1(BaseHoodieWriteClient.java:928)
    at 
org.apache.hudi.common.util.CleanerUtils.rollbackFailedWrites(CleanerUtils.java:222)
    at 
org.apache.hudi.client.BaseHoodieWriteClient.startCommitWithTime(BaseHoodieWriteClient.java:927)
    at 
org.apache.hudi.client.BaseHoodieWriteClient.startCommitWithTime(BaseHoodieWriteClient.java:920)
    at 
org.apache.hudi.utilities.streamer.StreamSync.startCommit(StreamSync.java:890)
    at 
org.apache.hudi.utilities.streamer.StreamSync.writeToSink(StreamSync.java:767)
    at 
org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:445)
    at 
org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:767)
    at 
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
    at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:750)
Caused by: java.lang.IllegalArgumentException: FileGroup count for MDT 
partition files should be >0
    at 
org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:42)
    at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.prepRecords(HoodieBackedTableMetadataWriter.java:1098)
    at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commitInternal(SparkHoodieBackedTableMetadataWriter.java:135)
    at 
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commit(SparkHoodieBackedTableMetadataWriter.java:122)
    at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:837)
    at 
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:1013)
    at 
org.apache.hudi.table.action.BaseActionExecutor.lambda$writeTableMetadata$2(BaseActionExecutor.java:77)
    at org.apache.hudi.common.util.Option.ifPresent(Option.java:97)
    at 
org.apache.hudi.table.action.BaseActionExecutor.writeTableMetadata(BaseActionExecutor.java:77)
    at 
org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.finishRollback(BaseRollbackActionExecutor.java:264)
    at 
org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.runRollback(BaseRollbackActionExecutor.java:120)
    at 
org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.execute(BaseRollbackActionExecutor.java:141)
    at 
org.apache.hudi.table.HoodieSparkMergeOnReadTable.rollback(HoodieSparkMergeOnReadTable.java:218)
    at 
org.apache.hudi.client.BaseHoodieTableServiceClient.rollback(BaseHoodieTableServiceClient.java:901)
    ... 16 more {code}


> Fix initialization of metadata table partitions upon failure
> ------------------------------------------------------------
>
>                 Key: HUDI-6679
>                 URL: https://issues.apache.org/jira/browse/HUDI-6679
>             Project: Apache Hudi
>          Issue Type: Bug
>            Reporter: Ethan Guo
>            Assignee: Ethan Guo
>            Priority: Blocker
>             Fix For: 0.14.0
>
>
> When both files and record_index partitions are enabled, for the first commit 
> in the data table, the transaction fails when initializing the second 
> partition in the MDT.  In this case, the timelines look like below.  In this 
> case, when restarting the pipeline, the rollback triggers irrelevant 
> bootstrap rollback logic causing MDT to be corrupted.
> DT
> {code:java}
> <commit_time>.commit.requested
> <commit_time>.commit.inflight {code}
> MDT
> {code:java}
> 00000000000000010.deltacommit.requested
> 00000000000000010.deltacommit.inflight
> 00000000000000010.deltacommit
> 00000000000000011.deltacommit.requested
> 00000000000000011.deltacommit.inflight{code}
> {code:java}
> org.apache.hudi.exception.HoodieRollbackException: Failed to rollback 
> s3a://<base_path>/hoodie_table commits 20230807064006967
>     at 
> org.apache.hudi.client.BaseHoodieTableServiceClient.rollback(BaseHoodieTableServiceClient.java:918)
>     at 
> org.apache.hudi.client.BaseHoodieTableServiceClient.rollback(BaseHoodieTableServiceClient.java:865)
>     at 
> org.apache.hudi.client.BaseHoodieTableServiceClient.rollbackFailedWrites(BaseHoodieTableServiceClient.java:739)
>     at 
> org.apache.hudi.client.BaseHoodieTableServiceClient.rollbackFailedWrites(BaseHoodieTableServiceClient.java:723)
>     at 
> org.apache.hudi.client.BaseHoodieTableServiceClient.rollbackFailedWrites(BaseHoodieTableServiceClient.java:718)
>     at 
> org.apache.hudi.client.BaseHoodieWriteClient.lambda$startCommitWithTime$97cdbdca$1(BaseHoodieWriteClient.java:928)
>     at 
> org.apache.hudi.common.util.CleanerUtils.rollbackFailedWrites(CleanerUtils.java:222)
>     at 
> org.apache.hudi.client.BaseHoodieWriteClient.startCommitWithTime(BaseHoodieWriteClient.java:927)
>     at 
> org.apache.hudi.client.BaseHoodieWriteClient.startCommitWithTime(BaseHoodieWriteClient.java:920)
>     at 
> org.apache.hudi.utilities.streamer.StreamSync.startCommit(StreamSync.java:890)
>     at 
> org.apache.hudi.utilities.streamer.StreamSync.writeToSink(StreamSync.java:767)
>     at 
> org.apache.hudi.utilities.streamer.StreamSync.syncOnce(StreamSync.java:445)
>     at 
> org.apache.hudi.utilities.streamer.HoodieStreamer$StreamSyncService.lambda$startService$1(HoodieStreamer.java:767)
>     at 
> java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
>     at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>     at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>     at java.lang.Thread.run(Thread.java:750)
> Caused by: java.lang.IllegalArgumentException: FileGroup count for MDT 
> partition files should be >0
>     at 
> org.apache.hudi.common.util.ValidationUtils.checkArgument(ValidationUtils.java:42)
>     at 
> org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.prepRecords(HoodieBackedTableMetadataWriter.java:1098)
>     at 
> org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commitInternal(SparkHoodieBackedTableMetadataWriter.java:135)
>     at 
> org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commit(SparkHoodieBackedTableMetadataWriter.java:122)
>     at 
> org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:837)
>     at 
> org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:1013)
>     at 
> org.apache.hudi.table.action.BaseActionExecutor.lambda$writeTableMetadata$2(BaseActionExecutor.java:77)
>     at org.apache.hudi.common.util.Option.ifPresent(Option.java:97)
>     at 
> org.apache.hudi.table.action.BaseActionExecutor.writeTableMetadata(BaseActionExecutor.java:77)
>     at 
> org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.finishRollback(BaseRollbackActionExecutor.java:264)
>     at 
> org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.runRollback(BaseRollbackActionExecutor.java:120)
>     at 
> org.apache.hudi.table.action.rollback.BaseRollbackActionExecutor.execute(BaseRollbackActionExecutor.java:141)
>     at 
> org.apache.hudi.table.HoodieSparkMergeOnReadTable.rollback(HoodieSparkMergeOnReadTable.java:218)
>     at 
> org.apache.hudi.client.BaseHoodieTableServiceClient.rollback(BaseHoodieTableServiceClient.java:901)
>     ... 16 more {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to