felipecrv commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1425914623
##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -377,199 +487,337 @@ class AzureFileSystemTest : public ::testing::Test {
strlen(kSubData));
AssertFileInfo(infos[10], "container/somefile", FileType::File,
strlen(kSomeData));
AssertFileInfo(infos[11], "empty-container", FileType::Directory);
- AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
- AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
}
-};
-class AzuriteFileSystemTest : public AzureFileSystemTest {
- Result<AzureOptions> MakeOptions() override {
- EXPECT_THAT(GetAzuriteEnv(), NotNull());
- ARROW_EXPECT_OK(GetAzuriteEnv()->status());
- ARROW_ASSIGN_OR_RAISE(debug_log_start_,
GetAzuriteEnv()->GetDebugLogSize());
- AzureOptions options;
- options.backend = AzureBackend::Azurite;
- ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
- GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
- return options;
+ bool WithHierarchicalNamespace() const {
+ EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+ return env->WithHierarchicalNamespace();
}
- void TearDown() override {
- AzureFileSystemTest::TearDown();
- if (HasFailure()) {
- // XXX: This may not include all logs in the target test because
- // Azurite doesn't flush debug logs immediately... You may want
- // to check the log manually...
- ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_));
+ // Tests that are called from more than one implementation of
AzureFileSystemTest
+
+ void DetectHierarchicalNamespaceTest();
+ void GetFileInfoObjectTest();
+ void GetFileInfoObjectWithNestedStructureTest();
+
+ void DeleteDirSuccessEmptyTest() {
+ auto data = SetUpPreexistingData();
+ const auto directory_path =
+ internal::ConcatAbstractPath(data.container_name,
data.RandomDirectoryName(rng_));
+
+ if (WithHierarchicalNamespace()) {
+ ASSERT_OK(fs_->CreateDir(directory_path, true));
+ arrow::fs::AssertFileInfo(fs_.get(), directory_path,
FileType::Directory);
+ ASSERT_OK(fs_->DeleteDir(directory_path));
+ arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+ } else {
+ // There is only virtual directory without hierarchical namespace
+ // support. So the CreateDir() and DeleteDir() do nothing.
+ ASSERT_OK(fs_->CreateDir(directory_path));
+ arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+ ASSERT_OK(fs_->DeleteDir(directory_path));
+ arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
}
}
- int64_t debug_log_start_ = 0;
-};
+ void CreateDirSuccessContainerAndDirectoryTest() {
+ auto data = SetUpPreexistingData();
+ const auto path = data.RandomDirectoryPath(rng_);
+ ASSERT_OK(fs_->CreateDir(path, false));
+ if (WithHierarchicalNamespace()) {
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+ } else {
+ // There is only virtual directory without hierarchical namespace
+ // support. So the CreateDir() does nothing.
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+ }
+ }
-class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest {
- Result<AzureOptions> MakeOptions() override {
- AzureOptions options;
- const auto account_key = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_KEY");
- const auto account_name = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME");
- if (account_key && account_name) {
- RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name,
account_key));
- return options;
+ void CreateDirRecursiveSuccessContainerOnlyTest() {
+ auto container_name = PreexistingData::RandomContainerName(rng_);
+ ASSERT_OK(fs_->CreateDir(container_name, true));
+ arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+ }
+
+ void CreateDirRecursiveSuccessDirectoryOnlyTest() {
+ auto data = SetUpPreexistingData();
+ const auto parent = data.RandomDirectoryPath(rng_);
+ const auto path = internal::ConcatAbstractPath(parent, "new-sub");
+ ASSERT_OK(fs_->CreateDir(path, true));
+ if (WithHierarchicalNamespace()) {
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+ } else {
+ // There is only virtual directory without hierarchical namespace
+ // support. So the CreateDir() does nothing.
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
}
- return Status::Cancelled(
- "Connection details not provided for a real flat namespace "
- "account.");
}
-};
-// How to enable this test:
-//
-// You need an Azure account. You should be able to create a free
-// account at https://azure.microsoft.com/en-gb/free/ . You should be
-// able to create a storage account through the portal Web UI.
-//
-// See also the official document how to create a storage account:
-//
https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
-//
-// A few suggestions on configuration:
-//
-// * Use Standard general-purpose v2 not premium
-// * Use LRS redundancy
-// * Obviously you need to enable hierarchical namespace.
-// * Set the default access tier to hot
-// * SFTP, NFS and file shares are not required.
-class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest {
- Result<AzureOptions> MakeOptions() override {
- AzureOptions options;
- const auto account_key =
std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY");
- const auto account_name =
std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME");
- if (account_key && account_name) {
- RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name,
account_key));
- return options;
+ void CreateDirRecursiveSuccessContainerAndDirectoryTest() {
+ auto data = SetUpPreexistingData();
+ const auto parent = data.RandomDirectoryPath(rng_);
+ const auto path = internal::ConcatAbstractPath(parent, "new-sub");
+ ASSERT_OK(fs_->CreateDir(path, true));
+ if (WithHierarchicalNamespace()) {
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+ arrow::fs::AssertFileInfo(fs_.get(), data.container_name,
FileType::Directory);
+ } else {
+ // There is only virtual directory without hierarchical namespace
+ // support. So the CreateDir() does nothing.
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+ arrow::fs::AssertFileInfo(fs_.get(), data.container_name,
FileType::Directory);
}
- return Status::Cancelled(
- "Connection details not provided for a real hierarchical namespace "
- "account.");
}
-};
-TEST_F(AzureFlatNamespaceFileSystemTest, DetectHierarchicalNamespace) {
- auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
- ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
- ASSERT_OK_AND_EQ(false,
hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+ void DeleteDirContentsSuccessNonexistentTest() {
+ auto data = SetUpPreexistingData();
+ const auto directory_path = data.RandomDirectoryPath(rng_);
+ ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
+ arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+ }
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DetectHierarchicalNamespace) {
- auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
- ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
- ASSERT_OK_AND_EQ(true,
hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+ void DeleteDirContentsFailureNonexistentTest() {
+ auto data = SetUpPreexistingData();
+ const auto directory_path = data.RandomDirectoryPath(rng_);
+ ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
+ }
+};
-TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) {
- auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
- ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
- ASSERT_OK_AND_EQ(false,
hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+void AzureFileSystemTest::DetectHierarchicalNamespaceTest() {
+ // Check the environments are implemented and injected here correctly.
+ auto expected = WithHierarchicalNamespace();
-TEST_F(AzuriteFileSystemTest,
DetectHierarchicalNamespaceFailsWithMissingContainer) {
+ auto data = SetUpPreexistingData();
auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
- ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container"));
+ ASSERT_OK_AND_EQ(expected,
hierarchical_namespace.Enabled(data.container_name));
}
-TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) {
- AssertFileInfo(fs_.get(), "", FileType::Directory);
-
- // URI
- ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://"));
-}
-
-TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) {
- AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory);
+void AzureFileSystemTest::GetFileInfoObjectTest() {
+ auto data = SetUpPreexistingData();
+ auto object_properties =
+ blob_service_client_->GetBlobContainerClient(data.container_name)
+ .GetBlobClient(data.kObjectName)
+ .GetProperties()
+ .Value;
- AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound);
+ AssertFileInfo(fs_.get(), data.ObjectPath(), FileType::File,
+
std::chrono::system_clock::time_point{object_properties.LastModified},
+ static_cast<int64_t>(object_properties.BlobSize));
// URI
- ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" +
PreexistingContainerName()));
+ ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" +
std::string{data.kObjectName}));
}
-void AzureFileSystemTest::RunGetFileInfoObjectWithNestedStructureTest() {
+void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() {
+ auto data = SetUpPreexistingData();
// Adds detailed tests to handle cases of different edge cases
// with directory naming conventions (e.g. with and without slashes).
constexpr auto kObjectName =
"test-object-dir/some_other_dir/another_dir/foo";
ASSERT_OK_AND_ASSIGN(
auto output,
- fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName,
/*metadata=*/{}));
- const std::string_view data(kLoremIpsum);
- ASSERT_OK(output->Write(data));
+ fs_->OpenOutputStream(data.ContainerPath() + kObjectName,
/*metadata=*/{}));
+ const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum);
+ ASSERT_OK(output->Write(lorem_ipsum));
ASSERT_OK(output->Close());
// 0 is immediately after "/" lexicographically, ensure that this doesn't
// cause unexpected issues.
- ASSERT_OK_AND_ASSIGN(output,
- fs_->OpenOutputStream(
- PreexistingContainerPath() +
"test-object-dir/some_other_dir0",
- /*metadata=*/{}));
- ASSERT_OK(output->Write(data));
- ASSERT_OK(output->Close());
ASSERT_OK_AND_ASSIGN(
- output, fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName +
"0",
- /*metadata=*/{}));
- ASSERT_OK(output->Write(data));
+ output,
+ fs_->OpenOutputStream(data.ContainerPath() +
"test-object-dir/some_other_dir0",
+ /*metadata=*/{}));
+ ASSERT_OK(output->Write(lorem_ipsum));
+ ASSERT_OK(output->Close());
+ ASSERT_OK_AND_ASSIGN(output,
+ fs_->OpenOutputStream(data.ContainerPath() +
kObjectName + "0",
+ /*metadata=*/{}));
+ ASSERT_OK(output->Write(lorem_ipsum));
ASSERT_OK(output->Close());
- AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName,
FileType::File);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName + "/",
- FileType::NotFound);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir",
+ AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName,
FileType::File);
+ AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName + "/",
FileType::NotFound);
+ AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir",
FileType::Directory);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/",
+ AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/",
FileType::Directory);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() +
"test-object-dir/some_other_dir",
+ AssertFileInfo(fs_.get(), data.ContainerPath() +
"test-object-dir/some_other_dir",
FileType::Directory);
- AssertFileInfo(fs_.get(),
- PreexistingContainerPath() +
"test-object-dir/some_other_dir/",
+ AssertFileInfo(fs_.get(), data.ContainerPath() +
"test-object-dir/some_other_dir/",
FileType::Directory);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-di",
- FileType::NotFound);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() +
"test-object-dir/some_other_di",
+ AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-di",
FileType::NotFound);
+ AssertFileInfo(fs_.get(), data.ContainerPath() +
"test-object-dir/some_other_di",
FileType::NotFound);
+
+ if (WithHierarchicalNamespace()) {
+ datalake_service_client_->GetFileSystemClient(data.container_name)
+ .GetDirectoryClient("test-empty-object-dir")
+ .Create();
+
+ AssertFileInfo(fs_.get(), data.ContainerPath() + "test-empty-object-dir",
+ FileType::Directory);
+ }
}
-TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) {
- RunGetFileInfoObjectWithNestedStructureTest();
+template <class AzureEnvClass>
+class AzureFileSystemTestImpl : public AzureFileSystemTest {
+ public:
+ using AzureFileSystemTest::AzureFileSystemTest;
+
+ Result<BaseAzureEnv*> GetAzureEnv() const final { return
AzureEnvClass::GetInstance(); }
+};
+
+// How to enable the non-Azurite tests:
+//
+// You need an Azure account. You should be able to create a free account [1].
+// Through the portal Web UI, you should create a storage account [2].
+//
+// A few suggestions on configuration:
+//
+// * Use Standard general-purpose v2 not premium
+// * Use LRS redundancy
+// * Set the default access tier to hot
+// * SFTP, NFS and file shares are not required.
+//
+// You must not enable Hierarchical Namespace on the storage account used for
+// AzureFlatNSFileSystemTest, but you must enable it on the storage account
+// used for AzureHierarchicalNSFileSystemTest.
+//
+// The credentials should be placed in the correct environment variables:
+//
+// * AZURE_FLAT_NAMESPACE_ACCOUNT_NAME
+// * AZURE_FLAT_NAMESPACE_ACCOUNT_KEY
+// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME
+// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY
+//
+// [1]: https://azure.microsoft.com/en-gb/free/
+// [2]:
+//
https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
+using AzureFlatNSFileSystemTest = AzureFileSystemTestImpl<AzureFlatNSEnv>;
+using AzureHierarchicalNSFileSystemTest =
AzureFileSystemTestImpl<AzureHierarchicalNSEnv>;
+using AzuriteFileSystemTest = AzureFileSystemTestImpl<AzuriteEnv>;
+
+// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/
HNS)
+
+template <class AzureEnvClass>
+using AzureFileSystemTestOnAllEnvs = AzureFileSystemTestImpl<AzureEnvClass>;
+
+using AllEnvironments =
+ ::testing::Types<AzuriteEnv, AzureFlatNSEnv, AzureHierarchicalNSEnv>;
+
+TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments);
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) {
+ this->DetectHierarchicalNamespaceTest();
}
-TEST_F(AzureHierarchicalNamespaceFileSystemTest,
GetFileInfoObjectWithNestedStructure) {
- RunGetFileInfoObjectWithNestedStructureTest();
- datalake_service_client_->GetFileSystemClient(PreexistingContainerName())
- .GetDirectoryClient("test-empty-object-dir")
- .Create();
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) {
+ this->GetFileInfoObjectTest();
+}
- AssertFileInfo(fs_.get(), PreexistingContainerPath() +
"test-empty-object-dir",
- FileType::Directory);
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, DeleteDirSuccessEmpty) {
+ this->DeleteDirSuccessEmptyTest();
}
-void AzureFileSystemTest::RunGetFileInfoObjectTest() {
- auto object_properties =
- blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
- .GetBlobClient(PreexistingObjectName())
- .GetProperties()
- .Value;
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObjectWithNestedStructure)
{
+ this->GetFileInfoObjectWithNestedStructureTest();
+}
- AssertFileInfo(fs_.get(), PreexistingObjectPath(), FileType::File,
-
std::chrono::system_clock::time_point(object_properties.LastModified),
- static_cast<int64_t>(object_properties.BlobSize));
+TYPED_TEST(AzureFileSystemTestOnAllEnvs,
CreateDirSuccessContainerAndDirectory) {
+ this->CreateDirSuccessContainerAndDirectoryTest();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs,
CreateDirRecursiveSuccessContainerOnly) {
+ this->CreateDirRecursiveSuccessContainerOnlyTest();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs,
CreateDirRecursiveSuccessDirectoryOnly) {
+ this->CreateDirRecursiveSuccessDirectoryOnlyTest();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs,
CreateDirRecursiveSuccessContainerAndDirectory) {
+ this->CreateDirRecursiveSuccessContainerAndDirectoryTest();
+}
+
+// Tests using a real storage account *with Hierarchical Namespace enabled*
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirFailureNonexistent) {
+ auto data = SetUpPreexistingData();
+ const auto path = data.RandomDirectoryPath(rng_);
+ ASSERT_RAISES(IOError, fs_->DeleteDir(path));
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveBlob) {
+ auto data = SetUpPreexistingData();
+ const auto directory_path = data.RandomDirectoryPath(rng_);
+ const auto blob_path = internal::ConcatAbstractPath(directory_path,
"hello.txt");
+ ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
+ ASSERT_OK(output->Write(std::string_view("hello")));
+ ASSERT_OK(output->Close());
+ arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File);
+ ASSERT_OK(fs_->DeleteDir(directory_path));
+ arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveDirectory) {
+ auto data = SetUpPreexistingData();
+ const auto parent = data.RandomDirectoryPath(rng_);
+ const auto path = internal::ConcatAbstractPath(parent, "new-sub");
+ ASSERT_OK(fs_->CreateDir(path, true));
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+ ASSERT_OK(fs_->DeleteDir(parent));
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessExist) {
+ auto preexisting_data = SetUpPreexistingData();
+ HierarchicalPaths paths;
+ CreateHierarchicalData(&paths);
+ ASSERT_OK(fs_->DeleteDirContents(paths.directory));
+ arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory);
+ for (const auto& sub_path : paths.sub_paths) {
+ arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
+ }
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessNonexistent)
{
+ this->DeleteDirContentsSuccessNonexistentTest();
+}
+
+TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsFailureNonexistent)
{
+ this->DeleteDirContentsFailureNonexistentTest();
+}
+
+// Tests using Azurite (the local Azure emulator)
+
+TEST_F(AzuriteFileSystemTest,
DetectHierarchicalNamespaceFailsWithMissingContainer) {
+ auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
+ ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
+ ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container"));
Review Comment:
`IOError`. Changing.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]