lokeshj1703 commented on code in PR #13007:
URL: https://github.com/apache/hudi/pull/13007#discussion_r2013575672


##########
hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java:
##########
@@ -110,6 +115,10 @@ public boolean isAtomicCreationSupported() {
     return supportAtomicCreation != null && supportAtomicCreation;
   }
 
+  public boolean getListStatusFriendly() {
+    return listStatusFriendly != null && listStatusFriendly;

Review Comment:
   Addressed



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java:
##########
@@ -96,6 +96,21 @@ static HoodieRollbackStat 
mergeRollbackStat(HoodieRollbackStat stat1, HoodieRoll
     return new HoodieRollbackStat(stat1.getPartitionPath(), 
successDeleteFiles, failedDeleteFiles, commandBlocksCount, 
logFilesFromFailedCommit);
   }
 
+  static HoodieRollbackRequest mergeRollbackRequest(HoodieRollbackRequest 
rollbackRequest1, HoodieRollbackRequest rollbackRequest2) {

Review Comment:
   Addressed. Removed the tests around it since there is no production use.



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/DirectWriteMarkers.java:
##########
@@ -115,15 +108,64 @@ public Set<String> 
createdAndMergedDataPaths(HoodieEngineContext context, int pa
         HoodieStorage storage = HoodieStorageUtils.getStorage(path, 
storageConf);
         return storage.listFiles(path).stream()
             .map(pathInfo -> pathInfo.getPath().toString())
-            .filter(pathStr -> 
pathStr.contains(HoodieTableMetaClient.MARKER_EXTN)
-                && !pathStr.endsWith(IOType.APPEND.name()))
+            .filter(pathStr -> NOT_APPEND_MARKER_PREDICATE.test(pathStr))
             .map(this::translateMarkerToDataPath);
       }, parallelism));
     }
 
     return dataFiles;
   }
 
+  public Set<String> getAppendedLogPaths(HoodieEngineContext context, int 
parallelism) throws IOException {
+    Set<String> logFiles = new HashSet<>();
+    List<String> subDirectories = 
getSubDirectoriesByMarkerCondition(storage.listDirectEntries(markerDirPath), 
logFiles, APPEND_MARKER_PREDICATE);
+
+    if (subDirectories.size() > 0) {
+      parallelism = Math.min(subDirectories.size(), parallelism);
+      StorageConfiguration<?> storageConf = storage.getConf();
+      context.setJobStatus(this.getClass().getSimpleName(), "Obtaining marker 
files for all created, merged paths");
+      logFiles.addAll(context.flatMap(subDirectories, directory -> {
+        Queue<StoragePath> candidatesDirs = new LinkedList<>();
+        candidatesDirs.add(new StoragePath(directory));
+        List<String> result = new ArrayList<>();
+        while (!candidatesDirs.isEmpty()) {
+          StoragePath path = candidatesDirs.remove();
+          HoodieStorage storage = HoodieStorageUtils.getStorage(path, 
storageConf);
+          List<StoragePathInfo> storagePathInfos = 
storage.listDirectEntries(path);
+          for (StoragePathInfo pathInfo : storagePathInfos) {
+            if (pathInfo.isDirectory()) {
+              candidatesDirs.add(pathInfo.getPath());
+            } else {
+              String pathStr = pathInfo.getPath().toString();
+              if (APPEND_MARKER_PREDICATE.test(pathStr)) {
+                result.add(translateMarkerToDataPath(pathStr));
+              }
+            }
+          }
+        }
+        return result.stream();
+      }, parallelism));
+    }
+
+    return logFiles;
+  }
+
+  private List<String> 
getSubDirectoriesByMarkerCondition(List<StoragePathInfo> topLevelInfoList, 
Set<String> dataFiles, Predicate<String> pathCondition) {
+    List<String> subDirectories = new ArrayList<>();
+    for (StoragePathInfo topLevelInfo: topLevelInfoList) {
+      if (topLevelInfo.isFile()) {
+        String pathStr = topLevelInfo.getPath().toString();
+        if (pathCondition.test(pathStr)) {
+          dataFiles.add(translateMarkerToDataPath(pathStr));

Review Comment:
   Addressed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to