nsivabalan commented on code in PR #6498:
URL: https://github.com/apache/hudi/pull/6498#discussion_r980376754


##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java:
##########
@@ -148,23 +148,52 @@ public List<String> 
getPartitionPathsToClean(Option<HoodieInstant> earliestRetai
    * @return list of partitions
    * @throws IOException
    */
-  private List<String> 
getPartitionPathsForCleanByCommits(Option<HoodieInstant> instantToRetain) 
throws IOException {
-    if (!instantToRetain.isPresent()) {
-      LOG.info("No earliest commit to retain. No need to scan partitions !!");
-      return Collections.emptyList();
-    }
+  private List<String> 
getPartitionPathsForCleanByCommits(Option<HoodieInstant> instantToRetain, 
HoodieCleaningPolicy cleaningPolicy) throws IOException {
+    if (cleaningPolicy != HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
+      if (!instantToRetain.isPresent()) {
+        LOG.info("No earliest commit to retain. No need to scan partitions 
!!");
+        return Collections.emptyList();
+      }
 
-    if (config.incrementalCleanerModeEnabled()) {
-      Option<HoodieInstant> lastClean = 
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
-      if (lastClean.isPresent()) {
-        if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
-          
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
-        } else {
-          HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils
+      if (config.incrementalCleanerModeEnabled()) {
+        Option<HoodieInstant> lastClean = 
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
+        if (lastClean.isPresent()) {
+          if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
+            
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
+          } else {
+            HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils
+                
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
+            if ((cleanMetadata.getEarliestCommitToRetain() != null)
+                && (cleanMetadata.getEarliestCommitToRetain().length() > 0)) {
+              return 
getPartitionPathsForIncrementalCleaning(cleanMetadata.getEarliestCommitToRetain(),
 instantToRetain);
+            }
+          }
+        }
+      }
+    } else {
+      // FILE_VERSIONS_RETAINED
+      if (config.incrementalCleanerModeEnabled()) {
+        // find the last completed commit from last clean and mark that as 
earliest commit to retain.
+        Option<HoodieInstant> lastClean = 
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
+        if (lastClean.isPresent()) {
+          if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
+            
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
+          } else {
+            HoodieCleanMetadata cleanMetadata = null;
+            try {
+              cleanMetadata = TimelineMetadataUtils
                   
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
-          if ((cleanMetadata.getEarliestCommitToRetain() != null)
-                  && (cleanMetadata.getEarliestCommitToRetain().length() > 0)) 
{
-            return getPartitionPathsForIncrementalCleaning(cleanMetadata, 
instantToRetain);
+              if 
(!StringUtils.isNullOrEmpty(cleanMetadata.getLastCompletedCommitTimestamp())) {

Review Comment:
   thats a valid point. lets take this as a separate bug fix. 



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java:
##########
@@ -148,23 +148,52 @@ public List<String> 
getPartitionPathsToClean(Option<HoodieInstant> earliestRetai
    * @return list of partitions
    * @throws IOException
    */
-  private List<String> 
getPartitionPathsForCleanByCommits(Option<HoodieInstant> instantToRetain) 
throws IOException {
-    if (!instantToRetain.isPresent()) {
-      LOG.info("No earliest commit to retain. No need to scan partitions !!");
-      return Collections.emptyList();
-    }
+  private List<String> 
getPartitionPathsForCleanByCommits(Option<HoodieInstant> instantToRetain, 
HoodieCleaningPolicy cleaningPolicy) throws IOException {
+    if (cleaningPolicy != HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
+      if (!instantToRetain.isPresent()) {
+        LOG.info("No earliest commit to retain. No need to scan partitions 
!!");
+        return Collections.emptyList();
+      }
 
-    if (config.incrementalCleanerModeEnabled()) {
-      Option<HoodieInstant> lastClean = 
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
-      if (lastClean.isPresent()) {
-        if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
-          
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
-        } else {
-          HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils
+      if (config.incrementalCleanerModeEnabled()) {
+        Option<HoodieInstant> lastClean = 
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
+        if (lastClean.isPresent()) {
+          if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
+            
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
+          } else {
+            HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils
+                
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
+            if ((cleanMetadata.getEarliestCommitToRetain() != null)
+                && (cleanMetadata.getEarliestCommitToRetain().length() > 0)) {
+              return 
getPartitionPathsForIncrementalCleaning(cleanMetadata.getEarliestCommitToRetain(),
 instantToRetain);
+            }
+          }
+        }
+      }
+    } else {
+      // FILE_VERSIONS_RETAINED
+      if (config.incrementalCleanerModeEnabled()) {
+        // find the last completed commit from last clean and mark that as 
earliest commit to retain.
+        Option<HoodieInstant> lastClean = 
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
+        if (lastClean.isPresent()) {
+          if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
+            
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
+          } else {
+            HoodieCleanMetadata cleanMetadata = null;
+            try {
+              cleanMetadata = TimelineMetadataUtils
                   
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
-          if ((cleanMetadata.getEarliestCommitToRetain() != null)
-                  && (cleanMetadata.getEarliestCommitToRetain().length() > 0)) 
{
-            return getPartitionPathsForIncrementalCleaning(cleanMetadata, 
instantToRetain);
+              if 
(!StringUtils.isNullOrEmpty(cleanMetadata.getLastCompletedCommitTimestamp())) {

Review Comment:
   https://issues.apache.org/jira/browse/HUDI-4921



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to