anmolanmol1234 commented on code in PR #7421:
URL: https://github.com/apache/hadoop/pull/7421#discussion_r1982792319


##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java:
##########
@@ -1903,39 +1916,57 @@ private List<AbfsHttpHeader> 
getMetadataHeadersList(final Hashtable<String, Stri
    * This is to handle duplicate listing entries returned by Blob Endpoint for
    * implicit paths that also has a marker file created for them.
    * This will retain entry corresponding to marker file and remove the 
BlobPrefix entry.
+   * This will also filter out all the rename pending json files in listing 
output.
    * @param listResultSchema List of entries returned by Blob Endpoint.
+   * @param uri URI to be used for path conversion.
    * @return List of entries after removing duplicates.
    */
-  private BlobListResultSchema removeDuplicateEntries(BlobListResultSchema 
listResultSchema) {
-    List<BlobListResultEntrySchema> uniqueEntries = new ArrayList<>();
+  private ListResponseData filterDuplicateEntriesAndRenamePendingFiles(
+      BlobListResultSchema listResultSchema, URI uri) throws IOException {
+    List<FileStatus> fileStatuses = new ArrayList<>();
+    Map<Path, Integer> renamePendingJsonPaths = new HashMap<>();
     TreeMap<String, BlobListResultEntrySchema> nameToEntryMap = new 
TreeMap<>();
 
     for (BlobListResultEntrySchema entry : listResultSchema.paths()) {
       if (StringUtils.isNotEmpty(entry.eTag())) {
         // This is a blob entry. It is either a file or a marker blob.
         // In both cases we will add this.
         nameToEntryMap.put(entry.name(), entry);
+        fileStatuses.add(getVersionedFileStatusFromEntry(entry, uri));
+
+        if (isRenamePendingJsonPathEntry(entry)) {
+          renamePendingJsonPaths.put(entry.path(), 
entry.contentLength().intValue());
+        }
       } else {
         // This is a BlobPrefix entry. It is a directory with file inside
         // This might have already been added as a marker blob.
         if (!nameToEntryMap.containsKey(entry.name())) {
           nameToEntryMap.put(entry.name(), entry);
+          fileStatuses.add(getVersionedFileStatusFromEntry(entry, uri));
         }
       }
     }
 
-    uniqueEntries.addAll(nameToEntryMap.values());
-    listResultSchema.withPaths(uniqueEntries);
-    return listResultSchema;
+    ListResponseData listResponseData = new ListResponseData();
+    listResponseData.setFileStatusList(fileStatuses);
+    listResponseData.setRenamePendingJsonPaths(renamePendingJsonPaths);
+    listResponseData.setContinuationToken(listResultSchema.getNextMarker());
+    return listResponseData;
+  }
+
+  private boolean isRenamePendingJsonPathEntry(BlobListResultEntrySchema 
entry) {

Review Comment:
   Can be simplified to this :- private boolean 
isRenamePendingJsonPathEntry(BlobListResultEntrySchema entry) {
       String path = entry.path() != null ? entry.path().toUri().getPath() : 
null;
       return path != null && !entry.path().isRoot() && isAtomicRenameKey(path) 
&& path.endsWith(RenameAtomicity.SUFFIX);
   }
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to