bvaradar commented on code in PR #10242:
URL: https://github.com/apache/hudi/pull/10242#discussion_r1421181574
##########
hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineUtils.java:
##########
@@ -82,22 +84,40 @@ public static List<String>
getWrittenPartitions(HoodieTimeline timeline) {
* Does not include internal operations such as clean in the timeline.
*/
public static List<String> getDroppedPartitions(HoodieTimeline timeline) {
- HoodieTimeline replaceCommitTimeline =
timeline.getWriteTimeline().filterCompletedInstants().getCompletedReplaceTimeline();
+ HoodieTimeline completedTimeline =
timeline.getWriteTimeline().filterCompletedInstants();
+ HoodieTimeline replaceCommitTimeline =
completedTimeline.getCompletedReplaceTimeline();
- return replaceCommitTimeline.getInstantsAsStream().flatMap(instant -> {
- try {
- HoodieReplaceCommitMetadata commitMetadata =
HoodieReplaceCommitMetadata.fromBytes(
- replaceCommitTimeline.getInstantDetails(instant).get(),
HoodieReplaceCommitMetadata.class);
- if
(WriteOperationType.DELETE_PARTITION.equals(commitMetadata.getOperationType()))
{
- Map<String, List<String>> partitionToReplaceFileIds =
commitMetadata.getPartitionToReplaceFileIds();
- return partitionToReplaceFileIds.keySet().stream();
- } else {
- return Stream.empty();
- }
- } catch (IOException e) {
- throw new HoodieIOException("Failed to get partitions modified at " +
instant, e);
- }
- }).distinct().filter(partition ->
!partition.isEmpty()).collect(Collectors.toList());
+ Map<String, String> partitionToLatestDeleteTimestamp =
replaceCommitTimeline.getInstants().stream()
+ .map(instant -> {
+ try {
+ HoodieReplaceCommitMetadata commitMetadata =
HoodieReplaceCommitMetadata.fromBytes(
+ replaceCommitTimeline.getInstantDetails(instant).get(),
HoodieReplaceCommitMetadata.class);
+ return Pair.of(instant, commitMetadata);
+ } catch (IOException e) {
+ throw new HoodieIOException("Failed to get partitions modified at
" + instant, e);
+ }
+ })
+ .filter(pair -> isDeletePartition(pair.getRight().getOperationType()))
+ .flatMap(pair ->
pair.getRight().getPartitionToReplaceFileIds().keySet().stream()
+ .map(partition -> new AbstractMap.SimpleEntry<>(partition,
pair.getLeft().getTimestamp()))
+ ).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue,
(existing, replace) -> replace));
+
+ Map<String, String> partitionToLatestWriteTimestamp =
completedTimeline.getInstants().stream()
Review Comment:
@wecharyu : Instead of reading the entire commit timeline, can you remove
the completed instants which are <= min(timestamps in
partitionToLatestDeleteTimestamp.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]