Tom-Newton commented on code in PR #38888:
URL: https://github.com/apache/arrow/pull/38888#discussion_r1411101893
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -970,6 +970,79 @@ class AzureFileSystem::Impl {
return stream;
}
+ private:
+ Status DeleteDirContentsWihtoutHierarchicalNamespace(const AzureLocation&
location,
+ bool missing_dir_ok) {
+ auto container_client =
+ blob_service_client_->GetBlobContainerClient(location.container);
+ Azure::Storage::Blobs::ListBlobsOptions options;
+ options.Prefix = internal::EnsureTrailingSlash(location.path);
Review Comment:
Maybe just leave a comment saying that `EnsureTrailingSlash` doesn't alter
empty string.
##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -666,6 +666,119 @@ TEST_F(AzuriteFileSystemTest, DeleteDirUri) {
ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" +
PreexistingContainerPath()));
}
+TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) {
+#ifdef __APPLE__
+ GTEST_SKIP() << "This test fails by an Azurite problem: "
+ "https://github.com/Azure/Azurite/pull/2302";
+#endif
+ const auto container_path = RandomContainerName();
+ const auto directory_path =
+ internal::ConcatAbstractPath(container_path, RandomDirectoryName());
+ const auto sub_directory_path = internal::ConcatAbstractPath(directory_path,
"new-sub");
+ const auto sub_blob_path = internal::ConcatAbstractPath(sub_directory_path,
"sub.txt");
+ const auto top_blob_path = internal::ConcatAbstractPath(directory_path,
"top.txt");
+ ASSERT_OK(fs_->CreateDir(sub_directory_path, true));
+ ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path));
+ ASSERT_OK(output->Write(std::string_view("sub")));
+ ASSERT_OK(output->Close());
+ ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(top_blob_path));
+ ASSERT_OK(output->Write(std::string_view("top")));
+ ASSERT_OK(output->Close());
Review Comment:
Could all of this be extracted to a member function of
`AzureFileSystemTest`? It looks like this has been copy pasted in 3 of the
tests.
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -1017,69 +1089,67 @@ class AzureFileSystem::Impl {
exception);
}
} else {
- auto container_client =
- blob_service_client_->GetBlobContainerClient(location.container);
- Azure::Storage::Blobs::ListBlobsOptions options;
- options.Prefix = internal::EnsureTrailingSlash(location.path);
- //
https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks
- //
- // Only supports up to 256 subrequests in a single batch. The
- // size of the body for a batch request can't exceed 4 MB.
- const int32_t kNumMaxRequestsInBatch = 256;
- options.PageSizeHint = kNumMaxRequestsInBatch;
+ return DeleteDirContentsWihtoutHierarchicalNamespace(location, true);
+ }
+ }
+
+ Status DeleteDirContents(const AzureLocation& location, bool missing_dir_ok)
{
+ if (location.container.empty()) {
+ return internal::InvalidDeleteDirContents(location.all);
+ }
+ if (location.path.empty()) {
+ return internal::InvalidDeleteDirContents(location.all);
Review Comment:
Overall I'm happy with how this is currently implemented.
For the sake of argument though... I think `allow_container_deletion` is
relevant to `DeleteDirContents` in the case of `location.container.empty()`. I
would expect the behaviour in this case to be to delete all containers in the
storage account. However, I'm happy to stick with just returning an error
because it seems quite dangerous.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]