This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch release-1.1.0
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 3468a173c2634a2cd8fd2927a65ae859c241f89c
Author: Shuo Cheng <[email protected]>
AuthorDate: Fri Oct 24 11:53:39 2025 +0800

    fix: Fix cleaning of historical internal schema files (#14126)
---
 .../internal/schema/io/FileBasedInternalSchemaStorageManager.java     | 4 ++--
 .../internal/schema/io/TestFileBasedInternalSchemaStorageManager.java | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java
 
b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java
index 633b1b9c5d6f..a436f14c1312 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java
@@ -107,7 +107,7 @@ public class FileBasedInternalSchemaStorageManager extends 
AbstractInternalSchem
             .filter(f -> f.isFile())
             .map(file -> 
file.getPath().getName()).collect(Collectors.toList());
         List<String> residualSchemaFiles =
-            candidateSchemaFiles.stream().filter(f -> 
!validateCommits.contains(f.split("\\.")[0]))
+            candidateSchemaFiles.stream().filter(f -> 
!validateCommits.contains(getMetaClient().getInstantFileNameParser().extractTimestamp(f)))
                 .collect(Collectors.toList());
         // clean residual files
         residualSchemaFiles.forEach(f -> {
@@ -130,7 +130,7 @@ public class FileBasedInternalSchemaStorageManager extends 
AbstractInternalSchem
             .filter(f -> f.isFile())
             .map(file -> 
file.getPath().getName()).collect(Collectors.toList());
         List<String> validateSchemaFiles =
-            candidateSchemaFiles.stream().filter(f -> 
validateCommits.contains(f.split("\\.")[0]))
+            candidateSchemaFiles.stream().filter(f -> 
validateCommits.contains(getMetaClient().getInstantFileNameParser().extractTimestamp(f)))
                 .collect(Collectors.toList());
         for (int i = 0; i < validateSchemaFiles.size(); i++) {
           storage.deleteFile(new StoragePath(validateSchemaFiles.get(i)));
diff --git 
a/hudi-hadoop-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java
 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java
index fe53e575fd80..9c92c107815e 100644
--- 
a/hudi-hadoop-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java
+++ 
b/hudi-hadoop-common/src/test/java/org/apache/hudi/internal/schema/io/TestFileBasedInternalSchemaStorageManager.java
@@ -94,6 +94,8 @@ public class TestFileBasedInternalSchemaStorageManager 
extends HoodieCommonTestH
     // now the residual file created by 3st persist should be removed.
     File f = new File(metaClient.getSchemaFolderName() + File.separator + 
"0002.schemacommit");
     assertTrue(!f.exists());
+    assertEquals(currentSchema, fm.getSchemaByKey("0").get());
+    assertEquals(secondSchema, fm.getSchemaByKey("1").get());
     assertEquals(lastSchema, fm.getSchemaByKey("3").get());
   }
 

Reply via email to