pratyakshsharma commented on a change in pull request #3646:
URL: https://github.com/apache/hudi/pull/3646#discussion_r801451351
##########
File path:
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
##########
@@ -1267,6 +1271,154 @@ public void testKeepLatestCommits(boolean
simulateFailureRetry, boolean enableIn
assertTrue(testTable.baseFileExists(p0, "00000000000005", file3P0C2));
}
+ /**
+ * Test cleaning policy based on number of hours retained policy. This test
case covers the case when files will not be cleaned.
+ */
+ @ParameterizedTest
+ @MethodSource("argumentsForTestKeepLatestCommits")
+ public void testKeepXHoursNoCleaning(boolean simulateFailureRetry, boolean
enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
+ HoodieWriteConfig config =
HoodieWriteConfig.newBuilder().withPath(basePath)
+
.withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
+ .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+ .withIncrementalCleaningMode(enableIncrementalClean)
+
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
+
.withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
+
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).retainNumberOfHours(2).build())
+ .build();
+
+ HoodieTestTable testTable = HoodieTestTable.of(metaClient);
+ String p0 = "2020/01/01";
+ String p1 = "2020/01/02";
+ Map<String, List<BootstrapFileMapping>> bootstrapMapping =
enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData(p0, p1) : null;
+
+ String file1P0C0 = enableBootstrapSourceClean ?
bootstrapMapping.get(p0).get(0).getFileId()
+ : UUID.randomUUID().toString();
+ String file1P1C0 = enableBootstrapSourceClean ?
bootstrapMapping.get(p1).get(0).getFileId()
+ : UUID.randomUUID().toString();
+ Instant instant = Instant.now();
+ ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant,
ZoneId.systemDefault());
+ int minutesForFirstCommit = 90;
+ String firstCommitTs =
HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForFirstCommit).toInstant()));
+ testTable.addInflightCommit(firstCommitTs).withBaseFilesInPartition(p0,
file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
+
+ HoodieCommitMetadata commitMetadata = generateCommitMetadata(
+ Collections.unmodifiableMap(new HashMap<String, List<String>>() {
+ {
+ put(p0, CollectionUtils.createImmutableList(file1P0C0));
+ put(p1, CollectionUtils.createImmutableList(file1P1C0));
+ }
+ })
+ );
+ metaClient.getActiveTimeline().saveAsComplete(
+ new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION,
firstCommitTs),
+
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+
+ metaClient = HoodieTableMetaClient.reload(metaClient);
+
+ List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config,
simulateFailureRetry);
+ assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions
and clean any files");
+ assertTrue(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
+ assertTrue(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));
+
+ // make next commit, with 1 insert & 1 update per partition
+ int minutesForSecondCommit = 40;
+ String secondCommitTs =
HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForSecondCommit).toInstant()));
+ Map<String, String> partitionAndFileId002 =
testTable.addInflightCommit(secondCommitTs).getFileIdsWithBaseFilesInPartitions(p0,
p1);
+ String file2P0C1 = partitionAndFileId002.get(p0);
+ String file2P1C1 = partitionAndFileId002.get(p1);
+ testTable.forCommit(secondCommitTs).withBaseFilesInPartition(p0,
file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
+ commitMetadata = generateCommitMetadata(new HashMap<String,
List<String>>() {
+ {
+ put(p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1));
+ put(p1, CollectionUtils.createImmutableList(file1P1C0, file2P1C1));
+ }
+ });
+ metaClient.getActiveTimeline().saveAsComplete(
+ new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION,
secondCommitTs),
+
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+ List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config,
simulateFailureRetry);
+ assertEquals(0, hoodieCleanStatsTwo.size(), "Must not scan any partitions
and clean any files");
+ assertTrue(testTable.baseFileExists(p0, secondCommitTs, file2P0C1));
+ assertTrue(testTable.baseFileExists(p1, secondCommitTs, file2P1C1));
+ assertTrue(testTable.baseFileExists(p0, secondCommitTs, file1P0C0));
+ assertTrue(testTable.baseFileExists(p1, secondCommitTs, file1P1C0));
+ }
Review comment:
I can actually remove this test itself. The next test actually covers
the entire scenario. Doing that.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]