This is an automated email from the ASF dual-hosted git repository. sivabalan pushed a commit to branch release-1.1.1-rc2-prep in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 8cb746ad898acf22c9c62b6fa87b0a8b90212a6f Author: chaoyang <[email protected]> AuthorDate: Mon Dec 8 18:52:22 2025 +0800 fix: fix the issue where lsm writer could not write again after failure (#17472) * fix corrput files for archived timeline --------- Signed-off-by: TheR1sing3un <[email protected]> Co-authored-by: danny0405 <[email protected]> --- .../timeline/versioning/v2/LSMTimelineWriter.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java index 321035d0774f..66fb64b87b3a 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java @@ -206,6 +206,9 @@ public class LSMTimelineWriter { int newVersion = currentVersion < 0 ? 1 : currentVersion + 1; // create manifest file final StoragePath manifestFilePath = LSMTimeline.getManifestFilePath(newVersion, archivePath); + // the version is basically the latest version plus 1, if the preceding failed write succeed + // to write a manifest file but failed to write the version file, a corrupt manifest file was left with just the `newVersion`. + deleteIfExists(manifestFilePath); metaClient.getStorage().createImmutableFileInPath(manifestFilePath, Option.of(HoodieInstantWriter.convertByteArrayToWriter(content))); // update version file updateVersionFile(newVersion); @@ -215,6 +218,7 @@ public class LSMTimelineWriter { byte[] content = getUTF8Bytes(String.valueOf(newVersion)); final StoragePath versionFilePath = LSMTimeline.getVersionFilePath(archivePath); metaClient.getStorage().deleteFile(versionFilePath); + // if the step fails here, either the writer or reader would list the manifest files to find the latest snapshot version. metaClient.getStorage().createImmutableFileInPath(versionFilePath, Option.of(HoodieInstantWriter.convertByteArrayToWriter(content))); } @@ -293,9 +297,11 @@ public class LSMTimelineWriter { return Option.empty(); } - public void compactFiles(List<String> candidateFiles, String compactedFileName) { + public void compactFiles(List<String> candidateFiles, String compactedFileName) throws IOException { LOG.info("Starting to compact source files."); - try (HoodieFileWriter writer = openWriter(new StoragePath(archivePath, compactedFileName))) { + StoragePath compactedFilePath = new StoragePath(archivePath, compactedFileName); + deleteIfExists(compactedFilePath); + try (HoodieFileWriter writer = openWriter(compactedFilePath)) { for (String fileName : candidateFiles) { // Read the input source file try (HoodieAvroParquetReader reader = (HoodieAvroParquetReader) HoodieIOFactory.getIOFactory(metaClient.getStorage()) @@ -406,6 +412,14 @@ public class LSMTimelineWriter { return newFileName(minInstant, maxInstant, currentLayer + 1); } + private void deleteIfExists(StoragePath filePath) throws IOException { + if (metaClient.getStorage().exists(filePath)) { + // delete file if exists when try to overwrite file + metaClient.getStorage().deleteFile(filePath); + LOG.info("Delete corrupt file: {} left by failed write", filePath); + } + } + /** * Get or create a writer config for parquet writer. */
