felipecrv commented on code in PR #39009:
URL: https://github.com/apache/arrow/pull/39009#discussion_r1419687092


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -815,6 +815,233 @@ class AzureFileSystem::Impl {
     }
   }
 
+ private:
+  template <typename OnContainer>
+  Status ListContainers(const Azure::Core::Context& context,
+                        OnContainer&& on_container) const {
+    Azure::Storage::Blobs::ListBlobContainersOptions options;
+    // Deleted containers are not returned.
+    options.Include = 
Azure::Storage::Blobs::Models::ListBlobContainersIncludeFlags::None;
+    try {
+      auto container_list_response =
+          blob_service_client_->ListBlobContainers(options, context);
+      for (; container_list_response.HasPage();
+           container_list_response.MoveToNextPage(context)) {
+        for (const auto& container : container_list_response.BlobContainers) {
+          RETURN_NOT_OK(on_container(container));
+        }
+      }
+    } catch (const Azure::Storage::StorageException& exception) {
+      return internal::ExceptionToStatus("Failed to list account containers.", 
exception);
+    }
+    return Status::OK();
+  }
+
+  static FileInfo FileInfoFromBlob(const std::string& container,
+                                   const 
Azure::Storage::Blobs::Models::BlobItem& blob) {
+    if (blob.Name.back() == internal::kSep) {
+      return DirectoryFileInfoFromPath(container + internal::kSep + blob.Name);
+    }
+    std::string path;
+    path.reserve(container.size() + 1 + blob.Name.size());
+    path += container;
+    path += internal::kSep;
+    path += blob.Name;
+    FileInfo info{std::move(path), FileType::File};
+    info.set_size(blob.BlobSize);
+    
info.set_mtime(std::chrono::system_clock::time_point{blob.Details.LastModified});
+    return info;
+  }
+
+  static FileInfo DirectoryFileInfoFromPath(const std::string& path) {
+    return FileInfo{std::string{internal::RemoveTrailingSlash(path)},
+                    FileType::Directory};
+  }
+
+  static std::string_view BasenameView(std::string_view s) {
+    auto offset = s.find_last_of(internal::kSep);
+    auto tail = (offset == std::string_view::npos) ? s : s.substr(offset);
+    return internal::RemoveTrailingSlash(tail, /*preserve_root=*/false);
+  }
+
+  /// \brief List the blobs at the root of a container or some dir in a 
container.
+  ///
+  /// \pre container_client is the client for the container named like the 
first
+  /// segment of select.base_dir.
+  Status GetFileInfoWithSelectorFromContainer(
+      const Azure::Storage::Blobs::BlobContainerClient& container_client,
+      const Azure::Core::Context& context, Azure::Nullable<int32_t> 
page_size_hint,
+      const FileSelector& select, FileInfoVector* acc_results) {
+    ARROW_ASSIGN_OR_RAISE(auto base_location, 
AzureLocation::FromString(select.base_dir));
+
+    bool found = false;
+    Azure::Storage::Blobs::ListBlobsOptions options;
+    if (internal::GetAbstractPathDepth(base_location.path) == 0) {
+      // If the base_dir is the root of the container, then we want to list 
all blobs in
+      // the container and the Prefix should be empty and not even include the 
trailing
+      // slash because the container itself represents the `<container>/` 
directory.
+      options.Prefix = {};
+      found = true;  // Unless the container itself is not found later!
+    } else {
+      options.Prefix = internal::EnsureTrailingSlash(base_location.path);
+    }
+    options.PageSizeHint = page_size_hint;
+    options.Include = 
Azure::Storage::Blobs::Models::ListBlobsIncludeFlags::Metadata;
+
+    // When Prefix.Value() contains a trailing slash and we find a blob that
+    // matches it completely, it is an empty directory marker blob for the
+    // directory we're listing from, and we should skip it.
+    auto is_empty_dir_marker =
+        [&options](const Azure::Storage::Blobs::Models::BlobItem& blob) 
noexcept -> bool {
+      return options.Prefix.HasValue() && blob.Name == options.Prefix.Value();

Review Comment:
   I can see why your check works, but this is a very indirect way to go about 
this. I will implement it, but with a comment explaining why works.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to