This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 3770c20a9e26 fix: fix the issue where lsm writer could not write again 
after failure (#17472)
3770c20a9e26 is described below

commit 3770c20a9e265884afcbeb67bec98bb21109cdcd
Author: chaoyang <[email protected]>
AuthorDate: Mon Dec 8 18:52:22 2025 +0800

    fix: fix the issue where lsm writer could not write again after failure 
(#17472)
    
    * fix corrput files for archived timeline
    
    ---------
    
    Signed-off-by: TheR1sing3un <[email protected]>
    Co-authored-by: danny0405 <[email protected]>
---
 .../timeline/versioning/v2/LSMTimelineWriter.java      | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java
index c8bb499dbd9b..ffaf6f20bcfc 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java
@@ -208,6 +208,9 @@ public class LSMTimelineWriter {
     int newVersion = currentVersion < 0 ? 1 : currentVersion + 1;
     // create manifest file
     final StoragePath manifestFilePath = 
LSMTimeline.getManifestFilePath(newVersion, archivePath);
+    // the version is basically the latest version plus 1, if the preceding 
failed write succeed
+    // to write a manifest file but failed to write the version file, a 
corrupt manifest file was left with just the `newVersion`.
+    deleteIfExists(manifestFilePath);
     metaClient.getStorage().createImmutableFileInPath(manifestFilePath, 
Option.of(HoodieInstantWriter.convertByteArrayToWriter(content)));
     // update version file
     updateVersionFile(newVersion);
@@ -217,6 +220,7 @@ public class LSMTimelineWriter {
     byte[] content = getUTF8Bytes(String.valueOf(newVersion));
     final StoragePath versionFilePath = 
LSMTimeline.getVersionFilePath(archivePath);
     metaClient.getStorage().deleteFile(versionFilePath);
+    // if the step fails here, either the writer or reader would list the 
manifest files to find the latest snapshot version.
     metaClient.getStorage().createImmutableFileInPath(versionFilePath, 
Option.of(HoodieInstantWriter.convertByteArrayToWriter(content)));
   }
 
@@ -295,9 +299,11 @@ public class LSMTimelineWriter {
     return Option.empty();
   }
 
-  public void compactFiles(List<String> candidateFiles, String 
compactedFileName) {
+  public void compactFiles(List<String> candidateFiles, String 
compactedFileName) throws IOException {
     LOG.info("Starting to compact source files.");
-    try (HoodieFileWriter writer = openWriter(new StoragePath(archivePath, 
compactedFileName))) {
+    StoragePath compactedFilePath = new StoragePath(archivePath, 
compactedFileName);
+    deleteIfExists(compactedFilePath);
+    try (HoodieFileWriter writer = openWriter(compactedFilePath)) {
       for (String fileName : candidateFiles) {
         // Read the input source file
         try (HoodieAvroParquetReader reader = (HoodieAvroParquetReader) 
HoodieIOFactory.getIOFactory(metaClient.getStorage())
@@ -411,6 +417,14 @@ public class LSMTimelineWriter {
     return newFileName(minInstant, maxInstant, currentLayer + 1);
   }
 
+  private void deleteIfExists(StoragePath filePath) throws IOException {
+    if (metaClient.getStorage().exists(filePath)) {
+      // delete file if exists when try to overwrite file
+      metaClient.getStorage().deleteFile(filePath);
+      LOG.info("Delete corrupt file: {} left by failed write", filePath);
+    }
+  }
+
   /**
    * Get or create a writer config for parquet writer.
    */

Reply via email to