This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-1.1.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 3468a173c2634a2cd8fd2927a65ae859c241f89c Author: Shuo Cheng <[email protected]> AuthorDate: Fri Oct 24 11:53:39 2025 +0800 fix: Fix cleaning of historical internal schema files (#14126) --- .../internal/schema/io/FileBasedInternalSchemaStorageManager.java | 4 ++-- .../internal/schema/io/TestFileBasedInternalSchemaStorageManager.java | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java index 633b1b9c5d6f..a436f14c1312 100644 --- a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java +++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java @@ -107,7 +107,7 @@ public class FileBasedInternalSchemaStorageManager extends AbstractInternalSchem .filter(f -> f.isFile()) .map(file -> file.getPath().getName()).collect(Collectors.toList()); List<String> residualSchemaFiles = - candidateSchemaFiles.stream().filter(f -> !validateCommits.contains(f.split("\\.")[0])) + candidateSchemaFiles.stream().filter(f -> !validateCommits.contains(getMetaClient().getInstantFileNameParser().extractTimestamp(f))) .collect(Collectors.toList()); // clean residual files residualSchemaFiles.forEach(f -> { @@ -130,7 +130,7 @@ public class FileBasedInternalSchemaStorageManager extends AbstractInternalSchem .filter(f -> f.isFile()) .map(file -> file.getPath().getName()).collect(Collectors.toList()); List<String> validateSchemaFiles = - candidateSchemaFiles.stream().filter(f -> validateCommits.contains(f.split("\\.")[0])) + candidateSchemaFiles.stream().filter(f -> validateCommits.contains(getMetaClient().getInstantFileNameParser().extractTimestamp(f))) .collect(Collectors.toList()); for (int i = 0; i < validateSchemaFiles.size(); i++) { storage.deleteFile(new StoragePath(validateSchemaFiles.get(i))); diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java index fe53e575fd80..9c92c107815e 100644 --- a/hudi-hadoop-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java @@ -94,6 +94,8 @@ public class TestFileBasedInternalSchemaStorageManager extends HoodieCommonTestH // now the residual file created by 3st persist should be removed. File f = new File(metaClient.getSchemaFolderName() + File.separator + "0002.schemacommit"); assertTrue(!f.exists()); + assertEquals(currentSchema, fm.getSchemaByKey("0").get()); + assertEquals(secondSchema, fm.getSchemaByKey("1").get()); assertEquals(lastSchema, fm.getSchemaByKey("3").get()); }
