This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 3770c20a9e26 fix: fix the issue where lsm writer could not write again
after failure (#17472)
3770c20a9e26 is described below
commit 3770c20a9e265884afcbeb67bec98bb21109cdcd
Author: chaoyang <[email protected]>
AuthorDate: Mon Dec 8 18:52:22 2025 +0800
fix: fix the issue where lsm writer could not write again after failure
(#17472)
* fix corrput files for archived timeline
---------
Signed-off-by: TheR1sing3un <[email protected]>
Co-authored-by: danny0405 <[email protected]>
---
.../timeline/versioning/v2/LSMTimelineWriter.java | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java
index c8bb499dbd9b..ffaf6f20bcfc 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/LSMTimelineWriter.java
@@ -208,6 +208,9 @@ public class LSMTimelineWriter {
int newVersion = currentVersion < 0 ? 1 : currentVersion + 1;
// create manifest file
final StoragePath manifestFilePath =
LSMTimeline.getManifestFilePath(newVersion, archivePath);
+ // the version is basically the latest version plus 1, if the preceding
failed write succeed
+ // to write a manifest file but failed to write the version file, a
corrupt manifest file was left with just the `newVersion`.
+ deleteIfExists(manifestFilePath);
metaClient.getStorage().createImmutableFileInPath(manifestFilePath,
Option.of(HoodieInstantWriter.convertByteArrayToWriter(content)));
// update version file
updateVersionFile(newVersion);
@@ -217,6 +220,7 @@ public class LSMTimelineWriter {
byte[] content = getUTF8Bytes(String.valueOf(newVersion));
final StoragePath versionFilePath =
LSMTimeline.getVersionFilePath(archivePath);
metaClient.getStorage().deleteFile(versionFilePath);
+ // if the step fails here, either the writer or reader would list the
manifest files to find the latest snapshot version.
metaClient.getStorage().createImmutableFileInPath(versionFilePath,
Option.of(HoodieInstantWriter.convertByteArrayToWriter(content)));
}
@@ -295,9 +299,11 @@ public class LSMTimelineWriter {
return Option.empty();
}
- public void compactFiles(List<String> candidateFiles, String
compactedFileName) {
+ public void compactFiles(List<String> candidateFiles, String
compactedFileName) throws IOException {
LOG.info("Starting to compact source files.");
- try (HoodieFileWriter writer = openWriter(new StoragePath(archivePath,
compactedFileName))) {
+ StoragePath compactedFilePath = new StoragePath(archivePath,
compactedFileName);
+ deleteIfExists(compactedFilePath);
+ try (HoodieFileWriter writer = openWriter(compactedFilePath)) {
for (String fileName : candidateFiles) {
// Read the input source file
try (HoodieAvroParquetReader reader = (HoodieAvroParquetReader)
HoodieIOFactory.getIOFactory(metaClient.getStorage())
@@ -411,6 +417,14 @@ public class LSMTimelineWriter {
return newFileName(minInstant, maxInstant, currentLayer + 1);
}
+ private void deleteIfExists(StoragePath filePath) throws IOException {
+ if (metaClient.getStorage().exists(filePath)) {
+ // delete file if exists when try to overwrite file
+ metaClient.getStorage().deleteFile(filePath);
+ LOG.info("Delete corrupt file: {} left by failed write", filePath);
+ }
+ }
+
/**
* Get or create a writer config for parquet writer.
*/