This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-0.13.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 281b29eb7b3e6a48a0598f6b01b5148fe4afc541 Author: Yann Byron <[email protected]> AuthorDate: Sat Jan 28 14:24:33 2023 +0800 [HUDI-5629] Clean CDC log files for enable/disable scenario (#7767) --- .../hudi/table/action/clean/CleanPlanner.java | 26 +++++----------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java index 982800cc246..c6ff62ee764 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java @@ -29,10 +29,8 @@ import org.apache.hudi.common.model.HoodieCleaningPolicy; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieFileGroup; import org.apache.hudi.common.model.HoodieFileGroupId; -import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodieReplaceCommitMetadata; import org.apache.hudi.common.model.HoodieTableType; -import org.apache.hudi.common.table.cdc.HoodieCDCUtils; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieTimeline; @@ -62,7 +60,6 @@ import java.util.Date; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -436,23 +433,12 @@ public class CleanPlanner<T, I, K, O> implements Serializable { cleanPaths.add(new CleanFileInfo(dataFile.getBootstrapBaseFile().get().getPath(), true)); } } - if (hoodieTable.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) { - // If merge on read, then clean the log files for the commits as well - Predicate<HoodieLogFile> notCDCLogFile = - hoodieLogFile -> !hoodieLogFile.getFileName().endsWith(HoodieCDCUtils.CDC_LOGFILE_SUFFIX); - cleanPaths.addAll( - nextSlice.getLogFiles().filter(notCDCLogFile).map(lf -> new CleanFileInfo(lf.getPath().toString(), false)) - .collect(Collectors.toList())); - } - if (hoodieTable.getMetaClient().getTableConfig().isCDCEnabled()) { - // The cdc log files will be written out in cdc scenario, no matter the table type is mor or cow. - // Here we need to clean uo these cdc log files. - Predicate<HoodieLogFile> isCDCLogFile = - hoodieLogFile -> hoodieLogFile.getFileName().endsWith(HoodieCDCUtils.CDC_LOGFILE_SUFFIX); - cleanPaths.addAll( - nextSlice.getLogFiles().filter(isCDCLogFile).map(lf -> new CleanFileInfo(lf.getPath().toString(), false)) - .collect(Collectors.toList())); - } + + // clean the log files for the commits, which contain cdc log files in cdc scenario + // and normal log files for mor tables. + cleanPaths.addAll( + nextSlice.getLogFiles().map(lf -> new CleanFileInfo(lf.getPath().toString(), false)) + .collect(Collectors.toList())); return cleanPaths; }
