pratyakshsharma commented on a change in pull request #3646:
URL: https://github.com/apache/hudi/pull/3646#discussion_r801451351



##########
File path: 
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
##########
@@ -1267,6 +1271,154 @@ public void testKeepLatestCommits(boolean 
simulateFailureRetry, boolean enableIn
     assertTrue(testTable.baseFileExists(p0, "00000000000005", file3P0C2));
   }
 
+  /**
+   * Test cleaning policy based on number of hours retained policy. This test 
case covers the case when files will not be cleaned.
+   */
+  @ParameterizedTest
+  @MethodSource("argumentsForTestKeepLatestCommits")
+  public void testKeepXHoursNoCleaning(boolean simulateFailureRetry, boolean 
enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
+    HoodieWriteConfig config = 
HoodieWriteConfig.newBuilder().withPath(basePath)
+            
.withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
+            .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+                    .withIncrementalCleaningMode(enableIncrementalClean)
+                    
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
+                    
.withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
+                    
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).retainNumberOfHours(2).build())
+            .build();
+
+    HoodieTestTable testTable = HoodieTestTable.of(metaClient);
+    String p0 = "2020/01/01";
+    String p1 = "2020/01/02";
+    Map<String, List<BootstrapFileMapping>> bootstrapMapping = 
enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData(p0, p1) : null;
+
+    String file1P0C0 = enableBootstrapSourceClean ? 
bootstrapMapping.get(p0).get(0).getFileId()
+            : UUID.randomUUID().toString();
+    String file1P1C0 = enableBootstrapSourceClean ? 
bootstrapMapping.get(p1).get(0).getFileId()
+            : UUID.randomUUID().toString();
+    Instant instant = Instant.now();
+    ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant, 
ZoneId.systemDefault());
+    int minutesForFirstCommit = 90;
+    String firstCommitTs = 
HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForFirstCommit).toInstant()));
+    testTable.addInflightCommit(firstCommitTs).withBaseFilesInPartition(p0, 
file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
+
+    HoodieCommitMetadata commitMetadata = generateCommitMetadata(
+            Collections.unmodifiableMap(new HashMap<String, List<String>>() {
+              {
+                put(p0, CollectionUtils.createImmutableList(file1P0C0));
+                put(p1, CollectionUtils.createImmutableList(file1P1C0));
+              }
+            })
+    );
+    metaClient.getActiveTimeline().saveAsComplete(
+            new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, 
firstCommitTs),
+            
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+
+    List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, 
simulateFailureRetry);
+    assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions 
and clean any files");
+    assertTrue(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
+    assertTrue(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));
+
+    // make next commit, with 1 insert & 1 update per partition
+    int minutesForSecondCommit = 40;
+    String secondCommitTs = 
HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForSecondCommit).toInstant()));
+    Map<String, String> partitionAndFileId002 = 
testTable.addInflightCommit(secondCommitTs).getFileIdsWithBaseFilesInPartitions(p0,
 p1);
+    String file2P0C1 = partitionAndFileId002.get(p0);
+    String file2P1C1 = partitionAndFileId002.get(p1);
+    testTable.forCommit(secondCommitTs).withBaseFilesInPartition(p0, 
file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
+    commitMetadata = generateCommitMetadata(new HashMap<String, 
List<String>>() {
+      {
+        put(p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1));
+        put(p1, CollectionUtils.createImmutableList(file1P1C0, file2P1C1));
+      }
+    });
+    metaClient.getActiveTimeline().saveAsComplete(
+            new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, 
secondCommitTs),
+            
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+    List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, 
simulateFailureRetry);
+    assertEquals(0, hoodieCleanStatsTwo.size(), "Must not scan any partitions 
and clean any files");
+    assertTrue(testTable.baseFileExists(p0, secondCommitTs, file2P0C1));
+    assertTrue(testTable.baseFileExists(p1, secondCommitTs, file2P1C1));
+    assertTrue(testTable.baseFileExists(p0, secondCommitTs, file1P0C0));
+    assertTrue(testTable.baseFileExists(p1, secondCommitTs, file1P1C0));
+  }

Review comment:
       I can actually remove this test itself. The next test actually covers 
the entire scenario. Doing that. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to