This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 8972856ed08 [HUDI-6847] Improve the incremental clean fallback logic
(#9681)
8972856ed08 is described below
commit 8972856ed08cfe067617985436df73298d6bbf84
Author: Bingeng Huang <[email protected]>
AuthorDate: Fri Sep 15 12:40:36 2023 +0800
[HUDI-6847] Improve the incremental clean fallback logic (#9681)
Current incremental clean includes clean instants when deciding if should
fallback to full clean. This commit changes to only include commits only,
because incremental clean only use commits to decide which partition should
clean.
Co-authored-by: hbg <[email protected]>
---
.../hudi/table/action/clean/CleanPlanner.java | 2 +-
.../java/org/apache/hudi/table/TestCleaner.java | 37 +++++++++++-----------
2 files changed, 20 insertions(+), 19 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
index d89c876bdfc..86070844701 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
@@ -171,7 +171,7 @@ public class CleanPlanner<T, I, K, O> implements
Serializable {
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
if ((cleanMetadata.getEarliestCommitToRetain() != null)
&& (cleanMetadata.getEarliestCommitToRetain().length() > 0)
- &&
!hoodieTable.getActiveTimeline().isBeforeTimelineStarts(cleanMetadata.getEarliestCommitToRetain()))
{
+ &&
!hoodieTable.getActiveTimeline().getCommitsTimeline().isBeforeTimelineStarts(cleanMetadata.getEarliestCommitToRetain()))
{
return getPartitionPathsForIncrementalCleaning(cleanMetadata,
instantToRetain);
}
}
diff --git
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
index 24ced3456f6..37d86587f91 100644
---
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
+++
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
@@ -1124,15 +1124,16 @@ public class TestCleaner extends HoodieCleanerTestBase {
put(p1, CollectionUtils.createImmutableList(file1P1, file2P1));
}
});
- commitWithMdt("1", part1ToFileId, testTable, metadataWriter);
- commitWithMdt("2", part1ToFileId, testTable, metadataWriter);
+ commitWithMdt("10", part1ToFileId, testTable, metadataWriter);
+ testTable.addClean("15");
+ commitWithMdt("20", part1ToFileId, testTable, metadataWriter);
// add clean instant
HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new
HoodieActionInstant("", "", ""),
"", "", new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new
HashMap<>(), new ArrayList<>());
HoodieCleanMetadata cleanMeta = new HoodieCleanMetadata("", 0L, 0,
- "2", "", new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new
HashMap<>());
- testTable.addClean("3", cleanerPlan, cleanMeta);
+ "20", "", new HashMap<>(), CleanPlanV2MigrationHandler.VERSION, new
HashMap<>());
+ testTable.addClean("30", cleanerPlan, cleanMeta);
// add file in partition "part_2"
String file3P2 = UUID.randomUUID().toString();
@@ -1142,8 +1143,8 @@ public class TestCleaner extends HoodieCleanerTestBase {
put(p2, CollectionUtils.createImmutableList(file3P2, file4P2));
}
});
- commitWithMdt("3", part2ToFileId, testTable, metadataWriter);
- commitWithMdt("4", part2ToFileId, testTable, metadataWriter);
+ commitWithMdt("30", part2ToFileId, testTable, metadataWriter);
+ commitWithMdt("40", part2ToFileId, testTable, metadataWriter);
// empty commits
String file5P2 = UUID.randomUUID().toString();
@@ -1153,25 +1154,25 @@ public class TestCleaner extends HoodieCleanerTestBase {
put(p2, CollectionUtils.createImmutableList(file5P2, file6P2));
}
});
- commitWithMdt("5", part2ToFileId, testTable, metadataWriter);
- commitWithMdt("6", part2ToFileId, testTable, metadataWriter);
+ commitWithMdt("50", part2ToFileId, testTable, metadataWriter);
+ commitWithMdt("60", part2ToFileId, testTable, metadataWriter);
// archive commit 1, 2
new HoodieTimelineArchiver<>(config, HoodieSparkTable.create(config,
context, metaClient))
.archiveIfRequired(context, false);
metaClient = HoodieTableMetaClient.reload(metaClient);
- assertFalse(metaClient.getActiveTimeline().containsInstant("1"));
- assertFalse(metaClient.getActiveTimeline().containsInstant("2"));
+ assertFalse(metaClient.getActiveTimeline().containsInstant("10"));
+ assertFalse(metaClient.getActiveTimeline().containsInstant("20"));
runCleaner(config);
- assertFalse(testTable.baseFileExists(p1, "1", file1P1), "Clean old
FileSlice in p1 by fallback to full clean");
- assertFalse(testTable.baseFileExists(p1, "1", file2P1), "Clean old
FileSlice in p1 by fallback to full clean");
- assertFalse(testTable.baseFileExists(p2, "3", file3P2), "Clean old
FileSlice in p2");
- assertFalse(testTable.baseFileExists(p2, "3", file4P2), "Clean old
FileSlice in p2");
- assertTrue(testTable.baseFileExists(p1, "2", file1P1), "Latest FileSlice
exists");
- assertTrue(testTable.baseFileExists(p1, "2", file2P1), "Latest FileSlice
exists");
- assertTrue(testTable.baseFileExists(p2, "4", file3P2), "Latest FileSlice
exists");
- assertTrue(testTable.baseFileExists(p2, "4", file4P2), "Latest FileSlice
exists");
+ assertFalse(testTable.baseFileExists(p1, "10", file1P1), "Clean old
FileSlice in p1 by fallback to full clean");
+ assertFalse(testTable.baseFileExists(p1, "10", file2P1), "Clean old
FileSlice in p1 by fallback to full clean");
+ assertFalse(testTable.baseFileExists(p2, "30", file3P2), "Clean old
FileSlice in p2");
+ assertFalse(testTable.baseFileExists(p2, "30", file4P2), "Clean old
FileSlice in p2");
+ assertTrue(testTable.baseFileExists(p1, "20", file1P1), "Latest FileSlice
exists");
+ assertTrue(testTable.baseFileExists(p1, "20", file2P1), "Latest FileSlice
exists");
+ assertTrue(testTable.baseFileExists(p2, "40", file3P2), "Latest FileSlice
exists");
+ assertTrue(testTable.baseFileExists(p2, "40", file4P2), "Latest FileSlice
exists");
}
/**