felipecrv commented on code in PR #38793:
URL: https://github.com/apache/arrow/pull/38793#discussion_r1400791660


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -969,6 +969,95 @@ class AzureFileSystem::Impl {
     RETURN_NOT_OK(stream->Init());
     return stream;
   }
+
+  Status DeleteDir(const AzureLocation& location) {
+    if (location.container.empty()) {
+      return Status::Invalid("Cannot delete an empty container");
+    }
+
+    if (location.path.empty()) {
+      auto container_client =
+          blob_service_client_->GetBlobContainerClient(location.container);
+      try {
+        auto response = container_client.Delete();
+        if (response.Value.Deleted) {
+          return Status::OK();
+        } else {
+          return StatusFromErrorResponse(
+              container_client.GetUrl(), response.RawResponse.get(),
+              "Failed to delete a container: " + location.container);
+        }
+      } catch (const Azure::Storage::StorageException& exception) {
+        return internal::ExceptionToStatus(
+            "Failed to delete a container: " + location.container + ": " +
+                container_client.GetUrl(),
+            exception);
+      }
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
+                          hierarchical_namespace_.Enabled(location.container));
+    if (hierarchical_namespace_enabled) {
+      auto directory_client =
+          datalake_service_client_->GetFileSystemClient(location.container)
+              .GetDirectoryClient(location.path);
+      try {
+        auto response = directory_client.DeleteRecursive();
+        if (response.Value.Deleted) {
+          return Status::OK();
+        } else {
+          return StatusFromErrorResponse(
+              directory_client.GetUrl(), response.RawResponse.get(),
+              "Failed to delete a directory: " + location.path);
+        }
+      } catch (const Azure::Storage::StorageException& exception) {
+        return internal::ExceptionToStatus(
+            "Failed to delete a directory: " + location.path + ": " +
+                directory_client.GetUrl(),
+            exception);
+      }
+    } else {
+      auto container_client =
+          blob_service_client_->GetBlobContainerClient(location.container);
+      Azure::Storage::Blobs::ListBlobsOptions options;
+      options.Prefix = internal::EnsureTrailingSlash(location.path);
+      // 
https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks
+      //
+      // Only supports up to 256 subrequests in a single batch. The
+      // size of the body for a batch request can't exceed 4 MB.
+      const int32_t kNumMaxRequestsInBatch = 256;
+      options.PageSizeHint = kNumMaxRequestsInBatch;
+      auto list_response = container_client.ListBlobs(options);
+      while (list_response.HasPage() && !list_response.Blobs.empty()) {
+        auto batch = container_client.CreateBatch();
+        std::vector<Azure::Storage::DeferredResponse<
+            Azure::Storage::Blobs::Models::DeleteBlobResult>>
+            deferred_responses;
+        for (const auto& blob_item : list_response.Blobs) {
+          deferred_responses.push_back(batch.DeleteBlob(blob_item.Name));
+        }
+        try {
+          container_client.SubmitBatch(batch);
+        } catch (const Azure::Storage::StorageException& exception) {
+          return internal::ExceptionToStatus(
+              "Failed to delete blobs in a directory: " + location.path + ": " 
+
+                  container_client.GetUrl(),
+              exception);
+        }
+        for (size_t i = 0; i < deferred_responses.size(); ++i) {
+          const auto& deferred_response = deferred_responses[i];
+          auto delete_response = deferred_response.GetResponse();
+          if (!delete_response.Value.Deleted) {
+            const auto& blob_item = list_response.Blobs[i];
+            return Status::IOError("Failed to delete a blob: ", blob_item.Name,
+                                   ": " + container_client.GetUrl());

Review Comment:
   I would save this on a `Status` and let all the loops continue, then check 
the saved status at the end.
   
   This way we don't waste the effort made to list the blobs and a caller 
retrying after we return an error at the end is more likely to have less work 
to do.



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -969,6 +969,95 @@ class AzureFileSystem::Impl {
     RETURN_NOT_OK(stream->Init());
     return stream;
   }
+
+  Status DeleteDir(const AzureLocation& location) {
+    if (location.container.empty()) {
+      return Status::Invalid("Cannot delete an empty container");
+    }
+
+    if (location.path.empty()) {
+      auto container_client =
+          blob_service_client_->GetBlobContainerClient(location.container);
+      try {
+        auto response = container_client.Delete();
+        if (response.Value.Deleted) {
+          return Status::OK();
+        } else {
+          return StatusFromErrorResponse(
+              container_client.GetUrl(), response.RawResponse.get(),
+              "Failed to delete a container: " + location.container);
+        }
+      } catch (const Azure::Storage::StorageException& exception) {
+        return internal::ExceptionToStatus(
+            "Failed to delete a container: " + location.container + ": " +
+                container_client.GetUrl(),
+            exception);
+      }
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
+                          hierarchical_namespace_.Enabled(location.container));
+    if (hierarchical_namespace_enabled) {
+      auto directory_client =
+          datalake_service_client_->GetFileSystemClient(location.container)
+              .GetDirectoryClient(location.path);
+      try {
+        auto response = directory_client.DeleteRecursive();
+        if (response.Value.Deleted) {
+          return Status::OK();
+        } else {
+          return StatusFromErrorResponse(
+              directory_client.GetUrl(), response.RawResponse.get(),
+              "Failed to delete a directory: " + location.path);
+        }
+      } catch (const Azure::Storage::StorageException& exception) {
+        return internal::ExceptionToStatus(
+            "Failed to delete a directory: " + location.path + ": " +
+                directory_client.GetUrl(),
+            exception);
+      }
+    } else {
+      auto container_client =
+          blob_service_client_->GetBlobContainerClient(location.container);
+      Azure::Storage::Blobs::ListBlobsOptions options;
+      options.Prefix = internal::EnsureTrailingSlash(location.path);
+      // 
https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks
+      //
+      // Only supports up to 256 subrequests in a single batch. The
+      // size of the body for a batch request can't exceed 4 MB.
+      const int32_t kNumMaxRequestsInBatch = 256;
+      options.PageSizeHint = kNumMaxRequestsInBatch;
+      auto list_response = container_client.ListBlobs(options);
+      while (list_response.HasPage() && !list_response.Blobs.empty()) {

Review Comment:
   How are the `ListBlobs` errors handled here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to