This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 06d4495173 GH-41034: [C++][FS][Azure] Adjust 
DeleteDir/DeleteDirContents/GetFileInfoSelector behaviors against Azure for 
generic filesystem tests (#41068)
06d4495173 is described below

commit 06d44951736b5a6e5836f2ea2fc988d935ea7f32
Author: Sutou Kouhei <[email protected]>
AuthorDate: Thu Apr 11 08:59:58 2024 +0900

    GH-41034: [C++][FS][Azure] Adjust 
DeleteDir/DeleteDirContents/GetFileInfoSelector behaviors against Azure for 
generic filesystem tests (#41068)
    
    ### Rationale for this change
    
    They are failing:
    
    ```text
    [  FAILED  ] TestAzureHierarchicalNSGeneric.DeleteDir
    [  FAILED  ] TestAzureHierarchicalNSGeneric.DeleteDirContents
    [  FAILED  ] TestAzureHierarchicalNSGeneric.GetFileInfoSelector
    ```
    
    ### What changes are included in this PR?
    
    * `DeleteDir()`: Check not a directory case
    * `DeleteDirContents()`: Check not a directory case
    * `GetFileInfoSelector()`:
      * Add not a directory check for input
      * Add support for returning metadata for directory
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * GitHub Issue: #41034
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 83 +++++++++++++++++++++++++++++++-
 cpp/src/arrow/filesystem/azurefs_test.cc |  6 ++-
 2 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc 
b/cpp/src/arrow/filesystem/azurefs.cc
index 84733a824e..bb8309a247 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -1179,6 +1179,15 @@ Status CreateContainerIfNotExists(const std::string& 
container_name,
   }
 }
 
+FileInfo FileInfoFromPath(std::string_view container,
+                          const DataLake::Models::PathItem& path) {
+  FileInfo info{internal::ConcatAbstractPath(container, path.Name),
+                path.IsDirectory ? FileType::Directory : FileType::File};
+  info.set_size(path.FileSize);
+  info.set_mtime(std::chrono::system_clock::time_point{path.LastModified});
+  return info;
+}
+
 FileInfo DirectoryFileInfoFromPath(std::string_view path) {
   return FileInfo{std::string{internal::RemoveTrailingSlash(path)}, 
FileType::Directory};
 }
@@ -1576,7 +1585,7 @@ class AzureFileSystem::Impl {
   }
 
  private:
-  /// \pref location.container is not empty.
+  /// \pre location.container is not empty.
   template <typename ContainerClient>
   Status CheckDirExists(const ContainerClient& container_client,
                         const AzureLocation& location) {
@@ -1623,6 +1632,50 @@ class AzureFileSystem::Impl {
     return result;
   }
 
+  /// \brief List the paths at the root of a filesystem or some dir in a 
filesystem.
+  ///
+  /// \pre adlfs_client is the client for the filesystem named like the first
+  /// segment of select.base_dir.
+  Status GetFileInfoWithSelectorFromFileSystem(
+      const DataLake::DataLakeFileSystemClient& adlfs_client,
+      const Core::Context& context, Azure::Nullable<int32_t> page_size_hint,
+      const FileSelector& select, FileInfoVector* acc_results) {
+    ARROW_ASSIGN_OR_RAISE(auto base_location, 
AzureLocation::FromString(select.base_dir));
+
+    auto directory_client = 
adlfs_client.GetDirectoryClient(base_location.path);
+    bool found = false;
+    DataLake::ListPathsOptions options;
+    options.PageSizeHint = page_size_hint;
+
+    try {
+      auto list_response = directory_client.ListPaths(select.recursive, 
options, context);
+      for (; list_response.HasPage(); list_response.MoveToNextPage(context)) {
+        if (list_response.Paths.empty()) {
+          continue;
+        }
+        found = true;
+        for (const auto& path : list_response.Paths) {
+          if (path.Name == base_location.path && !path.IsDirectory) {
+            return NotADir(base_location);
+          }
+          acc_results->push_back(FileInfoFromPath(base_location.container, 
path));
+        }
+      }
+    } catch (const Storage::StorageException& exception) {
+      if (IsContainerNotFound(exception) || exception.ErrorCode == 
"PathNotFound") {
+        found = false;
+      } else {
+        return ExceptionToStatus(exception,
+                                 "Failed to list paths in a directory: ", 
select.base_dir,
+                                 ": ", directory_client.GetUrl());
+      }
+    }
+
+    return found || select.allow_not_found
+               ? Status::OK()
+               : ::arrow::fs::internal::PathNotFound(select.base_dir);
+  }
+
   /// \brief List the blobs at the root of a container or some dir in a 
container.
   ///
   /// \pre container_client is the client for the container named like the 
first
@@ -1772,6 +1825,20 @@ class AzureFileSystem::Impl {
       return VisitContainers(context, std::move(on_container));
     }
 
+    auto adlfs_client = GetFileSystemClient(base_location.container);
+    ARROW_ASSIGN_OR_RAISE(auto hns_support, 
HierarchicalNamespaceSupport(adlfs_client));
+    if (hns_support == HNSSupport::kContainerNotFound) {
+      if (select.allow_not_found) {
+        return Status::OK();
+      } else {
+        return ::arrow::fs::internal::PathNotFound(select.base_dir);
+      }
+    }
+    if (hns_support == HNSSupport::kEnabled) {
+      return GetFileInfoWithSelectorFromFileSystem(adlfs_client, context, 
page_size_hint,
+                                                   select, acc_results);
+    }
+    DCHECK_EQ(hns_support, HNSSupport::kDisabled);
     auto container_client =
         blob_service_client_->GetBlobContainerClient(base_location.container);
     return GetFileInfoWithSelectorFromContainer(container_client, context, 
page_size_hint,
@@ -2157,6 +2224,17 @@ class AzureFileSystem::Impl {
                                Azure::Nullable<std::string> lease_id = {}) {
     DCHECK(!location.container.empty());
     DCHECK(!location.path.empty());
+    ARROW_ASSIGN_OR_RAISE(auto file_info, GetFileInfo(adlfs_client, location, 
lease_id));
+    if (file_info.type() == FileType::NotFound) {
+      if (require_dir_to_exist) {
+        return PathNotFound(location);
+      } else {
+        return Status::OK();
+      }
+    }
+    if (file_info.type() != FileType::Directory) {
+      return NotADir(location);
+    }
     auto directory_client = adlfs_client.GetDirectoryClient(
         std::string(internal::RemoveTrailingSlash(location.path)));
     DataLake::DeleteDirectoryOptions options;
@@ -2200,6 +2278,9 @@ class AzureFileSystem::Impl {
                   kDelimiter, path.Name, ": ", sub_directory_client.GetUrl());
             }
           } else {
+            if (path.Name == location.path) {
+              return NotADir(location);
+            }
             auto sub_file_client = adlfs_client.GetFileClient(path.Name);
             try {
               sub_file_client.Delete();
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc 
b/cpp/src/arrow/filesystem/azurefs_test.cc
index 24031e313f..ed09bfc2fa 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -392,7 +392,7 @@ class TestGeneric : public ::testing::Test, public 
GenericFileSystemTest {
   bool allow_move_dir() const override { return false; }
   bool allow_move_file() const override { return true; }
   bool allow_append_to_file() const override { return true; }
-  bool have_directory_mtimes() const override { return false; }
+  bool have_directory_mtimes() const override { return true; }
   bool have_flaky_directory_tree_deletion() const override { return false; }
   bool have_file_metadata() const override { return true; }
   // calloc() used in libxml2's xmlNewGlobalState() is detected as a
@@ -429,6 +429,8 @@ class TestAzuriteGeneric : public TestGeneric {
  protected:
   // Azurite doesn't support moving files over containers.
   bool allow_move_file() const override { return false; }
+  // Azurite doesn't support directory mtime.
+  bool have_directory_mtimes() const override { return false; }
   // DeleteDir() doesn't work with Azurite on macOS
   bool have_flaky_directory_tree_deletion() const override {
     return env_->HasSubmitBatchBug();
@@ -449,6 +451,8 @@ class TestAzureFlatNSGeneric : public TestGeneric {
  protected:
   // Flat namespace account doesn't support moving files over containers.
   bool allow_move_file() const override { return false; }
+  // Flat namespace account doesn't support directory mtime.
+  bool have_directory_mtimes() const override { return false; }
 };
 
 class TestAzureHierarchicalNSGeneric : public TestGeneric {

Reply via email to