manojpec commented on a change in pull request #3825:
URL: https://github.com/apache/hudi/pull/3825#discussion_r732976497
##########
File path:
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
##########
@@ -635,47 +638,61 @@ private void testFailedInsertAndCleanByCommits(
}
/**
- * Test HoodieTable.clean() Cleaning by versions logic.
+ * Test Hudi COW Table Cleaner - Keep the latest file versions policy.
*/
@ParameterizedTest
@ValueSource(booleans = {false, true})
public void testKeepLatestFileVersions(Boolean enableBootstrapSourceClean)
throws Exception {
HoodieWriteConfig config =
HoodieWriteConfig.newBuilder().withPath(basePath)
-
.withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).enable(false).build())
+
.withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).enable(true).build())
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
.build();
- HoodieTestTable testTable = HoodieTestTable.of(metaClient);
- String p0 = "2020/01/01";
- String p1 = "2020/01/02";
- Map<String, List<BootstrapFileMapping>> bootstrapMapping =
enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData(p0, p1) : null;
+
+ HoodieTableMetadataWriter metadataWriter =
SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
+ HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient,
metadataWriter);
+
+ final String p0 = "2020/01/01";
+ final String p1 = "2020/01/02";
+ final Map<String, List<BootstrapFileMapping>> bootstrapMapping =
enableBootstrapSourceClean
+ ? generateBootstrapIndexAndSourceData(p0, p1) : null;
// make 1 commit, with 1 file per partition
- String file1P0C0 = enableBootstrapSourceClean ?
bootstrapMapping.get(p0).get(0).getFileId()
+ final String file1P0C0 = enableBootstrapSourceClean ?
bootstrapMapping.get(p0).get(0).getFileId()
: UUID.randomUUID().toString();
- String file1P1C0 = enableBootstrapSourceClean ?
bootstrapMapping.get(p1).get(0).getFileId()
+ final String file1P1C0 = enableBootstrapSourceClean ?
bootstrapMapping.get(p1).get(0).getFileId()
: UUID.randomUUID().toString();
- testTable.addCommit("00000000000001").withBaseFilesInPartition(p0,
file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
+
+ Map<String, List<Pair<String, Integer>>> c1PartitionToFilesNameLengthMap =
new HashMap<>();
+ c1PartitionToFilesNameLengthMap.put(p0,
Collections.singletonList(Pair.of(file1P0C0, 100)));
+ c1PartitionToFilesNameLengthMap.put(p1,
Collections.singletonList(Pair.of(file1P1C0, 200)));
+ testTable.doWriteOperation("00000000000001", WriteOperationType.INSERT,
Arrays.asList(p0, p1),
+ c1PartitionToFilesNameLengthMap, false, false);
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
assertEquals(0, hoodieCleanStatsOne.size(), "Must not clean any files");
assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
// make next commit, with 1 insert & 1 update per partition
- Map<String, String> partitionAndFileId002 =
testTable.addCommit("00000000000002")
- .withBaseFilesInPartition(p0, file1P0C0)
- .withBaseFilesInPartition(p1, file1P1C0)
- .getFileIdsWithBaseFilesInPartitions(p0, p1);
+ final String file2P0C1 = UUID.randomUUID().toString();
+ final String file2P1C1 = UUID.randomUUID().toString();
+ Map<String, List<Pair<String, Integer>>> c2PartitionToFilesNameLengthMap =
new HashMap<>();
+ c2PartitionToFilesNameLengthMap.put(p0, Arrays.asList(Pair.of(file1P0C0,
101), Pair.of(file2P0C1, 100)));
+ c2PartitionToFilesNameLengthMap.put(p1, Arrays.asList(Pair.of(file1P1C0,
201), Pair.of(file2P1C1, 200)));
+ testTable.doWriteOperation("00000000000002", WriteOperationType.UPSERT,
Collections.emptyList(),
+ c2PartitionToFilesNameLengthMap, false, false);
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, 1);
- // enableBootstrapSourceClean would delete the bootstrap base file as the
same time
HoodieCleanStat cleanStat = getCleanStat(hoodieCleanStatsTwo, p0);
+
+ // enableBootstrapSourceClean would delete the bootstrap base file at the
same time
Review comment:
fixed this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]