danny0405 commented on code in PR #13653:
URL: https://github.com/apache/hudi/pull/13653#discussion_r2246928295
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java:
##########
@@ -317,56 +293,45 @@ private static String formatDeletePath(String path) {
return path.substring(path.indexOf(":") + 1);
}
- private List<StoragePathInfo> listBaseFilesToBeDeleted(String commit,
- String
basefileExtension,
- String partitionPath,
- HoodieStorage
storage) throws IOException {
- LOG.info("Collecting files to be cleaned/rolledback up for path " +
partitionPath + " and commit " + commit);
+ private List<StoragePath> listBaseFilesToBeDeleted(String commit,
+ String basefileExtension,
+ String partitionPath,
+ HoodieStorage storage)
throws IOException {
+ LOG.info("Collecting files to be cleaned/rolledback up for path {} and
commit {}", partitionPath, commit);
StoragePathFilter filter = (path) -> {
if (path.toString().contains(basefileExtension)) {
String fileCommitTime = FSUtils.getCommitTime(path.getName());
return commit.equals(fileCommitTime);
}
return false;
};
- return
storage.listDirectEntries(FSUtils.constructAbsolutePath(config.getBasePath(),
partitionPath), filter);
+ return
storage.listDirectEntries(FSUtils.constructAbsolutePath(config.getBasePath(),
partitionPath),
filter).stream().map(StoragePathInfo::getPath).collect(Collectors.toList());
}
- private List<StoragePathInfo> fetchFilesFromInstant(HoodieInstant
instantToRollback,
- String partitionPath,
String basePath,
- String
baseFileExtension, HoodieStorage storage,
-
Option<HoodieCommitMetadata> commitMetadataOptional,
- Boolean
isCommitMetadataCompleted,
- HoodieTableType
tableType) throws IOException {
- // go w/ commit metadata only for COW table. for MOR, we need to get
associated log files when commit corresponding to base file is rolledback.
- if (isCommitMetadataCompleted && tableType ==
HoodieTableType.COPY_ON_WRITE) {
- return fetchFilesFromCommitMetadata(instantToRollback, partitionPath,
basePath, commitMetadataOptional.get(),
- baseFileExtension, storage);
+ private List<StoragePath> fetchFilesFromInstant(HoodieInstant
instantToRollback,
+ String partitionPath, String
basePath,
+ String baseFileExtension,
HoodieStorage storage,
+ Option<HoodieCommitMetadata>
commitMetadataOptional,
+ boolean
isCommitMetadataCompleted,
+ HoodieTableType tableType,
+ HoodieTableVersion
tableVersion) throws IOException {
+ // for MOR tables with version < 8, listing is required to fetch the log
files associated with base files added by this commit.
+ if (isCommitMetadataCompleted && (tableType ==
HoodieTableType.COPY_ON_WRITE ||
tableVersion.greaterThanOrEquals(HoodieTableVersion.EIGHT))) {
+ return fetchFilesFromCommitMetadata(instantToRollback, partitionPath,
basePath, commitMetadataOptional.get(), baseFileExtension);
} else {
return fetchFilesFromListFiles(instantToRollback, partitionPath,
basePath, baseFileExtension, storage);
}
}
- private List<StoragePathInfo> fetchFilesFromCommitMetadata(HoodieInstant
instantToRollback,
- String
partitionPath,
- String basePath,
-
HoodieCommitMetadata commitMetadata,
- String
baseFileExtension,
- HoodieStorage
storage) throws IOException {
+ private List<StoragePath> fetchFilesFromCommitMetadata(HoodieInstant
instantToRollback,
+ String partitionPath,
+ String basePath,
+ HoodieCommitMetadata
commitMetadata,
+ String
baseFileExtension) {
StoragePathFilter pathFilter = getPathFilter(baseFileExtension,
instantToRollback.requestedTime());
- List<StoragePath> filePaths = getFilesFromCommitMetadata(basePath,
commitMetadata, partitionPath)
- .filter(entry -> {
- try {
- return storage.exists(entry);
- } catch (Exception e) {
- LOG.error("Exists check failed for " + entry.toString(), e);
- }
- // if any Exception is thrown, do not ignore. let's try to add the
file of interest to be deleted. we can't miss any files to be rolled back.
- return true;
- }).collect(Collectors.toList());
-
- return storage.listDirectEntries(filePaths, pathFilter);
+ return getFilesFromCommitMetadata(basePath, commitMetadata, partitionPath)
Review Comment:
The file existence check seems unnecessary because we can delete a file that
does not exist, cc @nsivabalan for the background of this check.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]