bhattmanish98 commented on code in PR #7614: URL: https://github.com/apache/hadoop/pull/7614#discussion_r2044568686
########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListResponseData.java: ########## @@ -40,15 +40,15 @@ public class ListResponseData { * Returns the list of FileStatus objects. * @return the list of FileStatus objects */ - public List<FileStatus> getFileStatusList() { + public List<VersionedFileStatus> getFileStatusList() { Review Comment: Javadoc update is needed. `Returns list of VersionedFileStatus objects` ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java: ########## @@ -1299,6 +1310,38 @@ public String listStatus(final Path path, final String startFrom, return continuation; } + /** + * This is to handle duplicate listing entries returned by Blob Endpoint for + * implicit paths that also has a marker file created for them. + * This will retain the entry corresponding to the marker file + * and remove the BlobPrefix entry corresponding to implicit directory. + * @param nameToEntryMap to keep track of paths already added to the list. + * @param fileStatusListInCurrItr the list of file statuses returned in the current iteration. + * @param fileStatuses the final list of file statuses to be returned. + */ + private void filterDuplicateEntriesForBlobClient( + TreeMap<String, VersionedFileStatus> nameToEntryMap, + List<VersionedFileStatus> fileStatusListInCurrItr, + List<FileStatus> fileStatuses) { + for (VersionedFileStatus fileStatus : fileStatusListInCurrItr) { + String entryName = fileStatus.getPath().getName(); + if (StringUtils.isNotEmpty(fileStatus.getEtag())) { + // This is a blob entry. It is either a file or a marker blob. + // In both cases, we will add this. + nameToEntryMap.put(entryName, fileStatus); + fileStatuses.add(fileStatus); + } else { Review Comment: It can be replaced with else if instead of nested if. ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListResponseData.java: ########## @@ -40,15 +40,15 @@ public class ListResponseData { * Returns the list of FileStatus objects. * @return the list of FileStatus objects */ - public List<FileStatus> getFileStatusList() { + public List<VersionedFileStatus> getFileStatusList() { return fileStatusList; } /** * Sets the list of FileStatus objects. * @param fileStatusList the list of FileStatus objects */ - public void setFileStatusList(final List<FileStatus> fileStatusList) { + public void setFileStatusList(final List<VersionedFileStatus> fileStatusList) { Review Comment: Same as above, javadoc update is needed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org