This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch release-0.13.0
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 281b29eb7b3e6a48a0598f6b01b5148fe4afc541
Author: Yann Byron <[email protected]>
AuthorDate: Sat Jan 28 14:24:33 2023 +0800

    [HUDI-5629] Clean CDC log files for enable/disable scenario (#7767)
---
 .../hudi/table/action/clean/CleanPlanner.java      | 26 +++++-----------------
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
index 982800cc246..c6ff62ee764 100644
--- 
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
+++ 
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
@@ -29,10 +29,8 @@ import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFileGroup;
 import org.apache.hudi.common.model.HoodieFileGroupId;
-import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
 import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.table.cdc.HoodieCDCUtils;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -62,7 +60,6 @@ import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.function.Predicate;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -436,23 +433,12 @@ public class CleanPlanner<T, I, K, O> implements 
Serializable {
         cleanPaths.add(new 
CleanFileInfo(dataFile.getBootstrapBaseFile().get().getPath(), true));
       }
     }
-    if (hoodieTable.getMetaClient().getTableType() == 
HoodieTableType.MERGE_ON_READ) {
-      // If merge on read, then clean the log files for the commits as well
-      Predicate<HoodieLogFile> notCDCLogFile =
-          hoodieLogFile -> 
!hoodieLogFile.getFileName().endsWith(HoodieCDCUtils.CDC_LOGFILE_SUFFIX);
-      cleanPaths.addAll(
-          nextSlice.getLogFiles().filter(notCDCLogFile).map(lf -> new 
CleanFileInfo(lf.getPath().toString(), false))
-              .collect(Collectors.toList()));
-    }
-    if (hoodieTable.getMetaClient().getTableConfig().isCDCEnabled()) {
-      // The cdc log files will be written out in cdc scenario, no matter the 
table type is mor or cow.
-      // Here we need to clean uo these cdc log files.
-      Predicate<HoodieLogFile> isCDCLogFile =
-          hoodieLogFile -> 
hoodieLogFile.getFileName().endsWith(HoodieCDCUtils.CDC_LOGFILE_SUFFIX);
-      cleanPaths.addAll(
-          nextSlice.getLogFiles().filter(isCDCLogFile).map(lf -> new 
CleanFileInfo(lf.getPath().toString(), false))
-              .collect(Collectors.toList()));
-    }
+
+    // clean the log files for the commits, which contain cdc log files in cdc 
scenario
+    // and normal log files for mor tables.
+    cleanPaths.addAll(
+        nextSlice.getLogFiles().map(lf -> new 
CleanFileInfo(lf.getPath().toString(), false))
+            .collect(Collectors.toList()));
     return cleanPaths;
   }
 

Reply via email to