felipecrv commented on code in PR #39361:
URL: https://github.com/apache/arrow/pull/39361#discussion_r1442187077


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -1033,47 +1042,88 @@ class AzureFileSystem::Impl {
       return info;
     } catch (const Storage::StorageException& exception) {
       if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
-        ARROW_ASSIGN_OR_RAISE(auto hns_support,
-                              HierarchicalNamespaceSupport(adlfs_client));
-        if (hns_support == HNSSupport::kContainerNotFound ||
-            hns_support == HNSSupport::kEnabled) {
-          // If the hierarchical namespace is enabled, then the storage 
account will
-          // have explicit directories. Neither a file nor a directory was 
found.
-          info.set_type(FileType::NotFound);
+        return FileInfo{location.all, FileType::NotFound};
+      }
+      return ExceptionToStatus(
+          exception, "GetProperties for '", file_client.GetUrl(),
+          "' failed. GetFileInfo is unable to determine whether the path 
exists.");
+    }
+  }
+
+  /// On flat namespace accounts there are no real directories. Directories are
+  /// implied by empty directory marker blobs with names ending in "/" or there
+  /// being blobs with names starting with the directory path.
+  ///
+  /// \pre location.path is not empty.
+  Result<FileInfo> GetFileInfo(const Blobs::BlobContainerClient& 
container_client,
+                               const AzureLocation& location) {
+    DCHECK(!location.path.empty());
+    Blobs::ListBlobsOptions options;
+    options.Prefix = internal::RemoveTrailingSlash(location.path);
+    options.PageSizeHint = 1;
+
+    try {
+      FileInfo info{location.all};
+      auto list_response = container_client.ListBlobsByHierarchy(kDelimiter, 
options);
+      // Since PageSizeHint=1, we expect at most one entry in either Blobs or
+      // BlobPrefixes. A BlobPrefix always ends with kDelimiter ("/"), so we 
can
+      // distinguish between a directory and a file by checking if we received 
a
+      // prefix or a blob.
+      if (!list_response.BlobPrefixes.empty()) {
+        // Ensure the returned BlobPrefixes[0] string doesn't contain more 
characters than
+        // the requested Prefix. For instance, if we request with 
Prefix="dir/abra" and
+        // the container contains "dir/abracadabra/" but not "dir/abra/", we 
will get back
+        // "dir/abracadabra/" in the BlobPrefixes list. If "dir/abra/" existed,
+        // it would be returned instead because it comes before 
"dir/abracadabra/" in the
+        // lexicographic order guaranteed by ListBlobsByHierarchy.
+        const auto& blob_prefix = list_response.BlobPrefixes[0];
+        if (blob_prefix == internal::EnsureTrailingSlash(location.path)) {
+          info.set_type(FileType::Directory);
           return info;
         }
-        // On flat namespace accounts there are no real directories. 
Directories are only
-        // implied by using `/` in the blob name.
-        Blobs::ListBlobsOptions list_blob_options;
-        // If listing the prefix `path.path_to_file` with trailing slash 
returns at least
-        // one result then `path` refers to an implied directory.
-        list_blob_options.Prefix = 
internal::EnsureTrailingSlash(location.path);
-        // We only need to know if there is at least one result, so minimise 
page size
-        // for efficiency.
-        list_blob_options.PageSizeHint = 1;
-
-        try {
-          auto paged_list_result =
-              blob_service_client_->GetBlobContainerClient(location.container)
-                  .ListBlobs(list_blob_options);
-          auto file_type = paged_list_result.Blobs.size() > 0 ? 
FileType::Directory
-                                                              : 
FileType::NotFound;
-          info.set_type(file_type);
+      }
+      if (!list_response.Blobs.empty()) {
+        const auto& blob = list_response.Blobs[0];
+        if (blob.Name == location.path) {

Review Comment:
   It's necessary as @Tom-Newton explained above. And a bunch of tests fail if 
I comment it out.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to