This is an automated email from the ASF dual-hosted git repository.
felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 0dbbd43ca9 GH-40052: [C++][FS][Azure] Fix CreateDir and DeleteDir
trailing slash issues on hierarchical namespace accounts (#40054)
0dbbd43ca9 is described below
commit 0dbbd43ca9133912d1809394727784560cc5e797
Author: Thomas Newton <[email protected]>
AuthorDate: Tue Feb 13 18:15:10 2024 +0000
GH-40052: [C++][FS][Azure] Fix CreateDir and DeleteDir trailing slash
issues on hierarchical namespace accounts (#40054)
### Rationale for this change
There were the following failure cases
```
fs->CreateDir("directory/")
```
```
fs->DeleteDir("directory/")
```
They fail with
```
Failed to delete a directory: directory/:
https://tomtesthns.blob.core.windows.net/ea119933-c9d3-11ee-989a-71cec6336ac8/directory/
Azure Error: [InvalidUri] 400 The request URI is invalid.
The request URI is invalid.
RequestId:c9ad826a-101f-0005-5be0-5d0db4000000
Time:2024-02-12T18:24:12.9974541Z
Request ID: c9ad826a-101f-0005-5be0-5d0db4000000
```
### What changes are included in this PR?
Add tests to cover these cases.
Remove trailing slashes to avoid these issues.
### Are these changes tested?
Yes. I added new test cases to cover these cases. I was rather torn about
whether to add precise tests like I've done or to duplicate every test case we
had and run it a second time with trailing slashes.
### Are there any user-facing changes?
Fixed bug on `CreateDir` and `DeleteDir`.
* Closes: #40052
Authored-by: Thomas Newton <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
cpp/src/arrow/filesystem/azurefs.cc | 6 ++-
cpp/src/arrow/filesystem/azurefs_test.cc | 64 ++++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 2 deletions(-)
diff --git a/cpp/src/arrow/filesystem/azurefs.cc
b/cpp/src/arrow/filesystem/azurefs.cc
index d4bb445701..1175059193 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -1635,7 +1635,8 @@ class AzureFileSystem::Impl {
return CreateDirTemplate(
adlfs_client,
[](const auto& adlfs_client, const auto& location) {
- auto directory_client =
adlfs_client.GetDirectoryClient(location.path);
+ auto directory_client = adlfs_client.GetDirectoryClient(
+ std::string(internal::RemoveTrailingSlash(location.path)));
directory_client.CreateIfNotExists();
},
location, recursive);
@@ -1860,7 +1861,8 @@ class AzureFileSystem::Impl {
Azure::Nullable<std::string> lease_id = {}) {
DCHECK(!location.container.empty());
DCHECK(!location.path.empty());
- auto directory_client = adlfs_client.GetDirectoryClient(location.path);
+ auto directory_client = adlfs_client.GetDirectoryClient(
+ std::string(internal::RemoveTrailingSlash(location.path)));
DataLake::DeleteDirectoryOptions options;
options.AccessConditions.LeaseId = std::move(lease_id);
try {
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc
b/cpp/src/arrow/filesystem/azurefs_test.cc
index c39a5b7d22..42f38f1ed6 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -698,6 +698,14 @@ class TestAzureFileSystem : public ::testing::Test {
AssertFileInfo(fs(), dir1, FileType::Directory);
}
+ void TestCreateDirOnRootWithTrailingSlash() {
+ auto dir1 = PreexistingData::RandomContainerName(rng_) + "/";
+
+ AssertFileInfo(fs(), dir1, FileType::NotFound);
+ ASSERT_OK(fs()->CreateDir(dir1, false));
+ AssertFileInfo(fs(), dir1, FileType::Directory);
+ }
+
void TestCreateDirOnExistingContainer() {
auto data = SetUpPreexistingData();
auto dir1 = data.RandomDirectoryPath(rng_);
@@ -758,6 +766,15 @@ class TestAzureFileSystem : public ::testing::Test {
AssertFileInfo(fs(), subdir5, FileType::Directory);
}
+ void TestCreateDirOnExistingContainerWithTrailingSlash() {
+ auto data = SetUpPreexistingData();
+ auto dir1 = data.RandomDirectoryPath(rng_) + "/";
+
+ AssertFileInfo(fs(), dir1, FileType::NotFound);
+ ASSERT_OK(fs()->CreateDir(dir1, /*recursive=*/false));
+ AssertFileInfo(fs(), dir1, FileType::Directory);
+ }
+
void TestCreateDirOnMissingContainer() {
auto container1 = PreexistingData::RandomContainerName(rng_);
auto container2 = PreexistingData::RandomContainerName(rng_);
@@ -844,6 +861,21 @@ class TestAzureFileSystem : public ::testing::Test {
AssertFileInfo(fs(), blob_path, FileType::NotFound);
}
+ void TestNonEmptyDirWithTrailingSlash() {
+ if (HasSubmitBatchBug()) {
+ GTEST_SKIP() << kSubmitBatchBugMessage;
+ }
+ auto data = SetUpPreexistingData();
+ const auto directory_path = data.RandomDirectoryPath(rng_);
+ const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt");
+ ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path));
+ ASSERT_OK(output->Write("hello"));
+ ASSERT_OK(output->Close());
+ AssertFileInfo(fs(), blob_path, FileType::File);
+ ASSERT_OK(fs()->DeleteDir(directory_path + "/"));
+ AssertFileInfo(fs(), blob_path, FileType::NotFound);
+ }
+
void TestDeleteDirSuccessHaveDirectory() {
if (HasSubmitBatchBug()) {
GTEST_SKIP() << kSubmitBatchBugMessage;
@@ -873,6 +905,20 @@ class TestAzureFileSystem : public ::testing::Test {
}
}
+ void TestDeleteDirContentsSuccessExistWithTrailingSlash() {
+ if (HasSubmitBatchBug()) {
+ GTEST_SKIP() << kSubmitBatchBugMessage;
+ }
+ auto preexisting_data = SetUpPreexistingData();
+ HierarchicalPaths paths;
+ CreateHierarchicalData(&paths);
+ ASSERT_OK(fs()->DeleteDirContents(paths.directory + "/"));
+ AssertFileInfo(fs(), paths.directory, FileType::Directory);
+ for (const auto& sub_path : paths.sub_paths) {
+ AssertFileInfo(fs(), sub_path, FileType::NotFound);
+ }
+ }
+
void TestDeleteDirContentsSuccessNonexistent() {
if (HasSubmitBatchBug()) {
GTEST_SKIP() << kSubmitBatchBugMessage;
@@ -1466,6 +1512,10 @@ TYPED_TEST(TestAzureFileSystemOnAllEnvs,
CreateDirWithEmptyPath) {
TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirOnRoot) {
this->TestCreateDirOnRoot(); }
+TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirOnRootWithTrailingSlash) {
+ this->TestCreateDirOnRootWithTrailingSlash();
+}
+
// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/
HNS)
// combined with the two scenarios for AzureFileSystem::cached_hns_support_ --
unknown and
// known according to the environment.
@@ -1496,6 +1546,11 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios,
CreateDirOnExistingContainer) {
this->TestCreateDirOnExistingContainer();
}
+TYPED_TEST(TestAzureFileSystemOnAllScenarios,
+ CreateDirOnExistingContainerWithTrailingSlash) {
+ this->TestCreateDirOnExistingContainerWithTrailingSlash();
+}
+
TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnMissingContainer) {
this->TestCreateDirOnMissingContainer();
}
@@ -1512,6 +1567,10 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios,
DeleteDirSuccessHaveBlob) {
this->TestDeleteDirSuccessHaveBlob();
}
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, NonEmptyDirWithTrailingSlash) {
+ this->TestNonEmptyDirWithTrailingSlash();
+}
+
TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessHaveDirectory) {
this->TestDeleteDirSuccessHaveDirectory();
}
@@ -1520,6 +1579,11 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios,
DeleteDirContentsSuccessExist) {
this->TestDeleteDirContentsSuccessExist();
}
+TYPED_TEST(TestAzureFileSystemOnAllScenarios,
+ DeleteDirContentsSuccessExistWithTrailingSlash) {
+ this->TestDeleteDirContentsSuccessExistWithTrailingSlash();
+}
+
TYPED_TEST(TestAzureFileSystemOnAllScenarios,
DeleteDirContentsSuccessNonexistent) {
this->TestDeleteDirContentsSuccessNonexistent();
}