bvaradar commented on a change in pull request #1870:
URL: https://github.com/apache/hudi/pull/1870#discussion_r468375136
##########
File path:
hudi-client/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
##########
@@ -82,40 +83,45 @@ HoodieCleanerPlan requestClean(JavaSparkContext jsc) {
LOG.info("Using cleanerParallelism: " + cleanerParallelism);
jsc.setJobGroup(this.getClass().getSimpleName(), "Generates list of file
slices to be cleaned");
- Map<String, List<String>> cleanOps = jsc
+ Map<String, List<HoodieCleanFileInfo>> cleanOps = jsc
.parallelize(partitionsToClean, cleanerParallelism)
.map(partitionPathToClean -> Pair.of(partitionPathToClean,
planner.getDeletePaths(partitionPathToClean)))
.collect().stream()
- .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
+ .collect(Collectors.toMap(Pair::getKey,
+ (y) ->
y.getValue().stream().map(CleanFileInfo::toHoodieFileCleanInfo).collect(Collectors.toList())));
return new HoodieCleanerPlan(earliestInstant
.map(x -> new HoodieActionInstant(x.getTimestamp(), x.getAction(),
x.getState().name())).orElse(null),
- config.getCleanerPolicy().name(), cleanOps, 1);
+ config.getCleanerPolicy().name(), null,
CleanPlanner.LATEST_CLEAN_PLAN_VERSION, cleanOps);
} catch (IOException e) {
throw new HoodieIOException("Failed to schedule clean operation", e);
}
}
- private static PairFlatMapFunction<Iterator<Tuple2<String, String>>, String,
PartitionCleanStat> deleteFilesFunc(
- HoodieTable table) {
- return (PairFlatMapFunction<Iterator<Tuple2<String, String>>, String,
PartitionCleanStat>) iter -> {
+ private static PairFlatMapFunction<Iterator<Tuple2<String, CleanFileInfo>>,
String, PartitionCleanStat>
+ deleteFilesFunc(HoodieTable table) {
+ return (PairFlatMapFunction<Iterator<Tuple2<String, CleanFileInfo>>,
String, PartitionCleanStat>) iter -> {
Map<String, PartitionCleanStat> partitionCleanStatMap = new HashMap<>();
-
FileSystem fs = table.getMetaClient().getFs();
- Path basePath = new Path(table.getMetaClient().getBasePath());
while (iter.hasNext()) {
- Tuple2<String, String> partitionDelFileTuple = iter.next();
+ Tuple2<String, CleanFileInfo> partitionDelFileTuple = iter.next();
String partitionPath = partitionDelFileTuple._1();
- String delFileName = partitionDelFileTuple._2();
- Path deletePath =
FSUtils.getPartitionPath(FSUtils.getPartitionPath(basePath, partitionPath),
delFileName);
+ Path deletePath = new Path(partitionDelFileTuple._2().getFilePath());
String deletePathStr = deletePath.toString();
Boolean deletedFileResult = deleteFileAndGetResult(fs, deletePathStr);
if (!partitionCleanStatMap.containsKey(partitionPath)) {
partitionCleanStatMap.put(partitionPath, new
PartitionCleanStat(partitionPath));
}
+ boolean isBootstrapBasePathFile =
partitionDelFileTuple._2().isBootstrapBaseFile();
PartitionCleanStat partitionCleanStat =
partitionCleanStatMap.get(partitionPath);
- partitionCleanStat.addDeleteFilePatterns(deletePath.getName());
- partitionCleanStat.addDeletedFileResult(deletePath.getName(),
deletedFileResult);
+ if (isBootstrapBasePathFile) {
Review comment:
deletePath.toString() returns a full path whereas deletePath.getName()
returns only the file name. That was the reason why it was in if-else block.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]