danny0405 commented on code in PR #9879:
URL: https://github.com/apache/hudi/pull/9879#discussion_r1365304176
##########
hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java:
##########
@@ -873,23 +883,39 @@ public final Stream<FileSlice>
getLatestFileSlicesBeforeOrOn(String partitionStr
.filter(slice ->
!isFileGroupReplacedBeforeOrOn(slice.getFileGroupId(), maxCommitTime))
.map(fg -> fg.getAllFileSlicesBeforeOn(maxCommitTime));
if (includeFileSlicesInPendingCompaction) {
- return allFileSliceStream.map(sliceStream -> sliceStream.flatMap(slice
-> this.filterBaseFileAfterPendingCompaction(slice, false)))
- .map(sliceStream ->
Option.fromJavaOptional(sliceStream.findFirst())).filter(Option::isPresent).map(Option::get)
+ return
allFileSliceStream.map(this::getLatestFileSliceFilteringUncommittedFiles)
+ .filter(Option::isPresent).map(Option::get)
.map(this::addBootstrapBaseFileIfPresent);
} else {
return allFileSliceStream
.map(sliceStream ->
- Option.fromJavaOptional(sliceStream
+ getLatestFileSliceFilteringUncommittedFiles(sliceStream
.filter(slice ->
!isPendingCompactionScheduledForFileId(slice.getFileGroupId()))
- .filter(slice -> !slice.isEmpty())
- .findFirst()))
+ .filter(slice -> !slice.isEmpty())))
.filter(Option::isPresent).map(Option::get).map(this::addBootstrapBaseFileIfPresent);
}
} finally {
readLock.unlock();
}
}
+ /**
+ * Looks for the latest file slice that is not empty after filtering out the
uncommitted files.
+ *
+ * <p>Note: Checks from the latest file slice first to improve the
efficiency. There is no need to check
+ * every file slice, the uncommitted files only exist in the latest file
slice basically.
+ */
+ private Option<FileSlice>
getLatestFileSliceFilteringUncommittedFiles(Stream<FileSlice> fileSlices) {
Review Comment:
Yeah, the `TestHoodieTableFileSystemView` has several test cases can cover
this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]