This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 8972856ed08 [HUDI-6847] Improve the incremental clean fallback logic 
(#9681)
8972856ed08 is described below

commit 8972856ed08cfe067617985436df73298d6bbf84
Author: Bingeng Huang <[email protected]>
AuthorDate: Fri Sep 15 12:40:36 2023 +0800

    [HUDI-6847] Improve the incremental clean fallback logic (#9681)
    
    Current incremental clean includes clean instants when deciding if should 
fallback to full clean. This commit changes to only include commits only, 
because incremental clean only use commits to decide which partition should 
clean.
    
    Co-authored-by: hbg <[email protected]>
---
 .../hudi/table/action/clean/CleanPlanner.java      |  2 +-
 .../java/org/apache/hudi/table/TestCleaner.java    | 37 +++++++++++-----------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
index d89c876bdfc..86070844701 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
@@ -171,7 +171,7 @@ public class CleanPlanner<T, I, K, O> implements 
Serializable {
                   
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
           if ((cleanMetadata.getEarliestCommitToRetain() != null)
                   && (cleanMetadata.getEarliestCommitToRetain().length() > 0)
-                  && 
!hoodieTable.getActiveTimeline().isBeforeTimelineStarts(cleanMetadata.getEarliestCommitToRetain()))
 {
+                  && 
!hoodieTable.getActiveTimeline().getCommitsTimeline().isBeforeTimelineStarts(cleanMetadata.getEarliestCommitToRetain()))
 {
             return getPartitionPathsForIncrementalCleaning(cleanMetadata, 
instantToRetain);
           }
         }
diff --git 
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
 
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
index 24ced3456f6..37d86587f91 100644
--- 
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
+++ 
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
@@ -1124,15 +1124,16 @@ public class TestCleaner extends HoodieCleanerTestBase {
         put(p1, CollectionUtils.createImmutableList(file1P1, file2P1));
       }
     });
-    commitWithMdt("1", part1ToFileId, testTable, metadataWriter);
-    commitWithMdt("2", part1ToFileId, testTable, metadataWriter);
+    commitWithMdt("10", part1ToFileId, testTable, metadataWriter);
+    testTable.addClean("15");
+    commitWithMdt("20", part1ToFileId, testTable, metadataWriter);
 
     // add clean instant
     HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new 
HoodieActionInstant("", "", ""),
         "", "", new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new 
HashMap<>(), new ArrayList<>());
     HoodieCleanMetadata cleanMeta = new HoodieCleanMetadata("", 0L, 0,
-        "2", "", new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new 
HashMap<>());
-    testTable.addClean("3", cleanerPlan, cleanMeta);
+        "20", "", new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new 
HashMap<>());
+    testTable.addClean("30", cleanerPlan, cleanMeta);
 
     // add file in partition "part_2"
     String file3P2 = UUID.randomUUID().toString();
@@ -1142,8 +1143,8 @@ public class TestCleaner extends HoodieCleanerTestBase {
         put(p2, CollectionUtils.createImmutableList(file3P2, file4P2));
       }
     });
-    commitWithMdt("3", part2ToFileId, testTable, metadataWriter);
-    commitWithMdt("4", part2ToFileId, testTable, metadataWriter);
+    commitWithMdt("30", part2ToFileId, testTable, metadataWriter);
+    commitWithMdt("40", part2ToFileId, testTable, metadataWriter);
 
     // empty commits
     String file5P2 = UUID.randomUUID().toString();
@@ -1153,25 +1154,25 @@ public class TestCleaner extends HoodieCleanerTestBase {
         put(p2, CollectionUtils.createImmutableList(file5P2, file6P2));
       }
     });
-    commitWithMdt("5", part2ToFileId, testTable, metadataWriter);
-    commitWithMdt("6", part2ToFileId, testTable, metadataWriter);
+    commitWithMdt("50", part2ToFileId, testTable, metadataWriter);
+    commitWithMdt("60", part2ToFileId, testTable, metadataWriter);
 
     // archive commit 1, 2
     new HoodieTimelineArchiver<>(config, HoodieSparkTable.create(config, 
context, metaClient))
         .archiveIfRequired(context, false);
     metaClient = HoodieTableMetaClient.reload(metaClient);
-    assertFalse(metaClient.getActiveTimeline().containsInstant("1"));
-    assertFalse(metaClient.getActiveTimeline().containsInstant("2"));
+    assertFalse(metaClient.getActiveTimeline().containsInstant("10"));
+    assertFalse(metaClient.getActiveTimeline().containsInstant("20"));
 
     runCleaner(config);
-    assertFalse(testTable.baseFileExists(p1, "1", file1P1), "Clean old 
FileSlice in p1 by fallback to full clean");
-    assertFalse(testTable.baseFileExists(p1, "1", file2P1), "Clean old 
FileSlice in p1 by fallback to full clean");
-    assertFalse(testTable.baseFileExists(p2, "3", file3P2), "Clean old 
FileSlice in p2");
-    assertFalse(testTable.baseFileExists(p2, "3", file4P2), "Clean old 
FileSlice in p2");
-    assertTrue(testTable.baseFileExists(p1, "2", file1P1), "Latest FileSlice 
exists");
-    assertTrue(testTable.baseFileExists(p1, "2", file2P1), "Latest FileSlice 
exists");
-    assertTrue(testTable.baseFileExists(p2, "4", file3P2), "Latest FileSlice 
exists");
-    assertTrue(testTable.baseFileExists(p2, "4", file4P2), "Latest FileSlice 
exists");
+    assertFalse(testTable.baseFileExists(p1, "10", file1P1), "Clean old 
FileSlice in p1 by fallback to full clean");
+    assertFalse(testTable.baseFileExists(p1, "10", file2P1), "Clean old 
FileSlice in p1 by fallback to full clean");
+    assertFalse(testTable.baseFileExists(p2, "30", file3P2), "Clean old 
FileSlice in p2");
+    assertFalse(testTable.baseFileExists(p2, "30", file4P2), "Clean old 
FileSlice in p2");
+    assertTrue(testTable.baseFileExists(p1, "20", file1P1), "Latest FileSlice 
exists");
+    assertTrue(testTable.baseFileExists(p1, "20", file2P1), "Latest FileSlice 
exists");
+    assertTrue(testTable.baseFileExists(p2, "40", file3P2), "Latest FileSlice 
exists");
+    assertTrue(testTable.baseFileExists(p2, "40", file4P2), "Latest FileSlice 
exists");
   }
 
   /**

Reply via email to