mapleFU commented on code in PR #38793: URL: https://github.com/apache/arrow/pull/38793#discussion_r1401518276
########## cpp/src/arrow/filesystem/azurefs.cc: ########## @@ -969,6 +969,113 @@ class AzureFileSystem::Impl { RETURN_NOT_OK(stream->Init()); return stream; } + + Status DeleteDir(const AzureLocation& location) { + if (location.container.empty()) { + return Status::Invalid("Cannot delete an empty container"); + } + + if (location.path.empty()) { + auto container_client = + blob_service_client_->GetBlobContainerClient(location.container); + try { + auto response = container_client.Delete(); + if (response.Value.Deleted) { + return Status::OK(); + } else { + return StatusFromErrorResponse( + container_client.GetUrl(), response.RawResponse.get(), + "Failed to delete a container: " + location.container); + } + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete a container: " + location.container + ": " + + container_client.GetUrl(), + exception); + } + } + + ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, + hierarchical_namespace_.Enabled(location.container)); + if (hierarchical_namespace_enabled) { + auto directory_client = + datalake_service_client_->GetFileSystemClient(location.container) + .GetDirectoryClient(location.path); + try { + auto response = directory_client.DeleteRecursive(); + if (response.Value.Deleted) { + return Status::OK(); + } else { + return StatusFromErrorResponse( + directory_client.GetUrl(), response.RawResponse.get(), + "Failed to delete a directory: " + location.path); + } + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete a directory: " + location.path + ": " + + directory_client.GetUrl(), + exception); + } + } else { + auto container_client = + blob_service_client_->GetBlobContainerClient(location.container); + Azure::Storage::Blobs::ListBlobsOptions options; + options.Prefix = internal::EnsureTrailingSlash(location.path); + // https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks + // + // Only supports up to 256 subrequests in a single batch. The + // size of the body for a batch request can't exceed 4 MB. + const int32_t kNumMaxRequestsInBatch = 256; + options.PageSizeHint = kNumMaxRequestsInBatch; + try { + auto list_response = container_client.ListBlobs(options); + while (list_response.HasPage() && !list_response.Blobs.empty()) { + auto batch = container_client.CreateBatch(); + std::vector<Azure::Storage::DeferredResponse< + Azure::Storage::Blobs::Models::DeleteBlobResult>> + deferred_responses; + for (const auto& blob_item : list_response.Blobs) { + deferred_responses.push_back(batch.DeleteBlob(blob_item.Name)); + } + try { + container_client.SubmitBatch(batch); + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete blobs in a directory: " + location.path + ": " + + container_client.GetUrl(), + exception); + } Review Comment: 😮 wow...Seems you're right ########## cpp/src/arrow/filesystem/azurefs.cc: ########## @@ -969,6 +969,113 @@ class AzureFileSystem::Impl { RETURN_NOT_OK(stream->Init()); return stream; } + + Status DeleteDir(const AzureLocation& location) { + if (location.container.empty()) { + return Status::Invalid("Cannot delete an empty container"); + } + + if (location.path.empty()) { + auto container_client = + blob_service_client_->GetBlobContainerClient(location.container); + try { + auto response = container_client.Delete(); + if (response.Value.Deleted) { + return Status::OK(); + } else { + return StatusFromErrorResponse( + container_client.GetUrl(), response.RawResponse.get(), + "Failed to delete a container: " + location.container); + } + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete a container: " + location.container + ": " + + container_client.GetUrl(), + exception); + } + } + + ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, + hierarchical_namespace_.Enabled(location.container)); + if (hierarchical_namespace_enabled) { + auto directory_client = + datalake_service_client_->GetFileSystemClient(location.container) + .GetDirectoryClient(location.path); + try { + auto response = directory_client.DeleteRecursive(); + if (response.Value.Deleted) { + return Status::OK(); + } else { + return StatusFromErrorResponse( + directory_client.GetUrl(), response.RawResponse.get(), + "Failed to delete a directory: " + location.path); + } + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete a directory: " + location.path + ": " + + directory_client.GetUrl(), + exception); + } + } else { + auto container_client = + blob_service_client_->GetBlobContainerClient(location.container); + Azure::Storage::Blobs::ListBlobsOptions options; + options.Prefix = internal::EnsureTrailingSlash(location.path); + // https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks + // + // Only supports up to 256 subrequests in a single batch. The + // size of the body for a batch request can't exceed 4 MB. + const int32_t kNumMaxRequestsInBatch = 256; + options.PageSizeHint = kNumMaxRequestsInBatch; + try { + auto list_response = container_client.ListBlobs(options); + while (list_response.HasPage() && !list_response.Blobs.empty()) { + auto batch = container_client.CreateBatch(); + std::vector<Azure::Storage::DeferredResponse< + Azure::Storage::Blobs::Models::DeleteBlobResult>> + deferred_responses; + for (const auto& blob_item : list_response.Blobs) { + deferred_responses.push_back(batch.DeleteBlob(blob_item.Name)); + } + try { + container_client.SubmitBatch(batch); + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete blobs in a directory: " + location.path + ": " + + container_client.GetUrl(), + exception); + } Review Comment: 😮 wow...Seems you're right -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org