felipecrv commented on code in PR #38888:
URL: https://github.com/apache/arrow/pull/38888#discussion_r1407044310


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -970,6 +970,78 @@ class AzureFileSystem::Impl {
     return stream;
   }
 
+ private:
+  Status DeleteDirContentsWihtoutHierarchicalNamespace(const AzureLocation& 
location,
+                                                       bool missing_dir_ok) {
+    auto container_client =
+        blob_service_client_->GetBlobContainerClient(location.container);
+    Azure::Storage::Blobs::ListBlobsOptions options;
+    options.Prefix = internal::EnsureTrailingSlash(location.path);
+    // 
https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks
+    //
+    // Only supports up to 256 subrequests in a single batch. The
+    // size of the body for a batch request can't exceed 4 MB.
+    const int32_t kNumMaxRequestsInBatch = 256;
+    options.PageSizeHint = kNumMaxRequestsInBatch;
+    try {
+      auto list_response = container_client.ListBlobs(options);
+      if (!missing_dir_ok && list_response.Blobs.empty()) {
+        return Status::IOError("Specified directory doesn't exist: ", 
location.path, ": ",
+                               container_client.GetUrl());

Review Comment:
   Shouldn't this be a `PathNotFound` error?



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -970,6 +970,78 @@ class AzureFileSystem::Impl {
     return stream;
   }
 
+ private:
+  Status DeleteDirContentsWihtoutHierarchicalNamespace(const AzureLocation& 
location,
+                                                       bool missing_dir_ok) {
+    auto container_client =
+        blob_service_client_->GetBlobContainerClient(location.container);
+    Azure::Storage::Blobs::ListBlobsOptions options;
+    options.Prefix = internal::EnsureTrailingSlash(location.path);
+    // 
https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks
+    //
+    // Only supports up to 256 subrequests in a single batch. The
+    // size of the body for a batch request can't exceed 4 MB.
+    const int32_t kNumMaxRequestsInBatch = 256;
+    options.PageSizeHint = kNumMaxRequestsInBatch;
+    try {
+      auto list_response = container_client.ListBlobs(options);
+      if (!missing_dir_ok && list_response.Blobs.empty()) {
+        return Status::IOError("Specified directory doesn't exist: ", 
location.path, ": ",
+                               container_client.GetUrl());
+      }
+      while (list_response.HasPage() && !list_response.Blobs.empty()) {

Review Comment:
   This could be me being overly cautious, but maybe we shouldn't stop when 
`Blobs.empty()`, but instead run `continue;` and evaluate `MoveToNextPage()` 
again?



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -1017,69 +1089,67 @@ class AzureFileSystem::Impl {
             exception);
       }
     } else {
-      auto container_client =
-          blob_service_client_->GetBlobContainerClient(location.container);
-      Azure::Storage::Blobs::ListBlobsOptions options;
-      options.Prefix = internal::EnsureTrailingSlash(location.path);
-      // 
https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks
-      //
-      // Only supports up to 256 subrequests in a single batch. The
-      // size of the body for a batch request can't exceed 4 MB.
-      const int32_t kNumMaxRequestsInBatch = 256;
-      options.PageSizeHint = kNumMaxRequestsInBatch;
+      return DeleteDirContentsWihtoutHierarchicalNamespace(location, true);

Review Comment:
   `/*missing_dir_ok=*/true`



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -1017,69 +1089,67 @@ class AzureFileSystem::Impl {
             exception);
       }
     } else {
-      auto container_client =
-          blob_service_client_->GetBlobContainerClient(location.container);
-      Azure::Storage::Blobs::ListBlobsOptions options;
-      options.Prefix = internal::EnsureTrailingSlash(location.path);
-      // 
https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks
-      //
-      // Only supports up to 256 subrequests in a single batch. The
-      // size of the body for a batch request can't exceed 4 MB.
-      const int32_t kNumMaxRequestsInBatch = 256;
-      options.PageSizeHint = kNumMaxRequestsInBatch;
+      return DeleteDirContentsWihtoutHierarchicalNamespace(location, true);
+    }
+  }
+
+  Status DeleteDirContents(const AzureLocation& location, bool missing_dir_ok) 
{
+    if (location.container.empty()) {
+      return internal::InvalidDeleteDirContents(location.all);
+    }
+    if (location.path.empty()) {
+      return internal::InvalidDeleteDirContents(location.all);

Review Comment:
   Shouldn't this be interpreted as deleting the contents of the container?



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -970,6 +970,78 @@ class AzureFileSystem::Impl {
     return stream;
   }
 
+ private:
+  Status DeleteDirContentsWihtoutHierarchicalNamespace(const AzureLocation& 
location,
+                                                       bool missing_dir_ok) {
+    auto container_client =
+        blob_service_client_->GetBlobContainerClient(location.container);
+    Azure::Storage::Blobs::ListBlobsOptions options;
+    options.Prefix = internal::EnsureTrailingSlash(location.path);
+    // 
https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks
+    //
+    // Only supports up to 256 subrequests in a single batch. The
+    // size of the body for a batch request can't exceed 4 MB.
+    const int32_t kNumMaxRequestsInBatch = 256;
+    options.PageSizeHint = kNumMaxRequestsInBatch;
+    try {
+      auto list_response = container_client.ListBlobs(options);
+      if (!missing_dir_ok && list_response.Blobs.empty()) {
+        return Status::IOError("Specified directory doesn't exist: ", 
location.path, ": ",
+                               container_client.GetUrl());
+      }
+      while (list_response.HasPage() && !list_response.Blobs.empty()) {
+        auto batch = container_client.CreateBatch();
+        std::vector<Azure::Storage::DeferredResponse<
+            Azure::Storage::Blobs::Models::DeleteBlobResult>>
+            deferred_responses;
+        for (const auto& blob_item : list_response.Blobs) {
+          deferred_responses.push_back(batch.DeleteBlob(blob_item.Name));

Review Comment:
   Skip when the `blob_item.Name` matches the `options.Prefix` because that 
could be the empty-dir marker blob (an empty blob ending with a `/`).



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -970,6 +970,78 @@ class AzureFileSystem::Impl {
     return stream;
   }
 
+ private:
+  Status DeleteDirContentsWihtoutHierarchicalNamespace(const AzureLocation& 
location,
+                                                       bool missing_dir_ok) {
+    auto container_client =
+        blob_service_client_->GetBlobContainerClient(location.container);
+    Azure::Storage::Blobs::ListBlobsOptions options;
+    options.Prefix = internal::EnsureTrailingSlash(location.path);
+    // 
https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks
+    //
+    // Only supports up to 256 subrequests in a single batch. The
+    // size of the body for a batch request can't exceed 4 MB.
+    const int32_t kNumMaxRequestsInBatch = 256;
+    options.PageSizeHint = kNumMaxRequestsInBatch;
+    try {
+      auto list_response = container_client.ListBlobs(options);
+      if (!missing_dir_ok && list_response.Blobs.empty()) {
+        return Status::IOError("Specified directory doesn't exist: ", 
location.path, ": ",
+                               container_client.GetUrl());
+      }
+      while (list_response.HasPage() && !list_response.Blobs.empty()) {

Review Comment:
   Another thing: instead of `while`, you can use `for (; 
list_response.HasPage(); list_response.MoveToNextPage()) {` here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to