danny0405 commented on code in PR #13653:
URL: https://github.com/apache/hudi/pull/13653#discussion_r2246928295


##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java:
##########
@@ -317,56 +293,45 @@ private static String formatDeletePath(String path) {
     return path.substring(path.indexOf(":") + 1);
   }
 
-  private List<StoragePathInfo> listBaseFilesToBeDeleted(String commit,
-                                                         String 
basefileExtension,
-                                                         String partitionPath,
-                                                         HoodieStorage 
storage) throws IOException {
-    LOG.info("Collecting files to be cleaned/rolledback up for path " + 
partitionPath + " and commit " + commit);
+  private List<StoragePath> listBaseFilesToBeDeleted(String commit,
+                                                     String basefileExtension,
+                                                     String partitionPath,
+                                                     HoodieStorage storage) 
throws IOException {
+    LOG.info("Collecting files to be cleaned/rolledback up for path {} and 
commit {}", partitionPath, commit);
     StoragePathFilter filter = (path) -> {
       if (path.toString().contains(basefileExtension)) {
         String fileCommitTime = FSUtils.getCommitTime(path.getName());
         return commit.equals(fileCommitTime);
       }
       return false;
     };
-    return 
storage.listDirectEntries(FSUtils.constructAbsolutePath(config.getBasePath(), 
partitionPath), filter);
+    return 
storage.listDirectEntries(FSUtils.constructAbsolutePath(config.getBasePath(), 
partitionPath), 
filter).stream().map(StoragePathInfo::getPath).collect(Collectors.toList());
   }
 
-  private List<StoragePathInfo> fetchFilesFromInstant(HoodieInstant 
instantToRollback,
-                                                      String partitionPath, 
String basePath,
-                                                      String 
baseFileExtension, HoodieStorage storage,
-                                                      
Option<HoodieCommitMetadata> commitMetadataOptional,
-                                                      Boolean 
isCommitMetadataCompleted,
-                                                      HoodieTableType 
tableType) throws IOException {
-    // go w/ commit metadata only for COW table. for MOR, we need to get 
associated log files when commit corresponding to base file is rolledback.
-    if (isCommitMetadataCompleted && tableType == 
HoodieTableType.COPY_ON_WRITE) {
-      return fetchFilesFromCommitMetadata(instantToRollback, partitionPath, 
basePath, commitMetadataOptional.get(),
-          baseFileExtension, storage);
+  private List<StoragePath> fetchFilesFromInstant(HoodieInstant 
instantToRollback,
+                                                  String partitionPath, String 
basePath,
+                                                  String baseFileExtension, 
HoodieStorage storage,
+                                                  Option<HoodieCommitMetadata> 
commitMetadataOptional,
+                                                  boolean 
isCommitMetadataCompleted,
+                                                  HoodieTableType tableType,
+                                                  HoodieTableVersion 
tableVersion) throws IOException {
+    // for MOR tables with version < 8, listing is required to fetch the log 
files associated with base files added by this commit.
+    if (isCommitMetadataCompleted && (tableType == 
HoodieTableType.COPY_ON_WRITE || 
tableVersion.greaterThanOrEquals(HoodieTableVersion.EIGHT))) {
+      return fetchFilesFromCommitMetadata(instantToRollback, partitionPath, 
basePath, commitMetadataOptional.get(), baseFileExtension);
     } else {
       return fetchFilesFromListFiles(instantToRollback, partitionPath, 
basePath, baseFileExtension, storage);
     }
   }
 
-  private List<StoragePathInfo> fetchFilesFromCommitMetadata(HoodieInstant 
instantToRollback,
-                                                             String 
partitionPath,
-                                                             String basePath,
-                                                             
HoodieCommitMetadata commitMetadata,
-                                                             String 
baseFileExtension,
-                                                             HoodieStorage 
storage) throws IOException {
+  private List<StoragePath> fetchFilesFromCommitMetadata(HoodieInstant 
instantToRollback,
+                                                         String partitionPath,
+                                                         String basePath,
+                                                         HoodieCommitMetadata 
commitMetadata,
+                                                         String 
baseFileExtension) {
     StoragePathFilter pathFilter = getPathFilter(baseFileExtension,
         instantToRollback.requestedTime());
-    List<StoragePath> filePaths = getFilesFromCommitMetadata(basePath, 
commitMetadata, partitionPath)
-        .filter(entry -> {
-          try {
-            return storage.exists(entry);
-          } catch (Exception e) {
-            LOG.error("Exists check failed for " + entry.toString(), e);
-          }
-          // if any Exception is thrown, do not ignore. let's try to add the 
file of interest to be deleted. we can't miss any files to be rolled back.
-          return true;
-        }).collect(Collectors.toList());
-
-    return storage.listDirectEntries(filePaths, pathFilter);
+    return getFilesFromCommitMetadata(basePath, commitMetadata, partitionPath)

Review Comment:
   The file existence check seems unnecessary because we can delete a file that 
does not exist, cc @nsivabalan for the background of this check.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to