Tom-Newton commented on code in PR #38505:
URL: https://github.com/apache/arrow/pull/38505#discussion_r1387288044
##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -216,23 +227,223 @@ class TestAzureFileSystem : public ::testing::Test {
void UploadLines(const std::vector<std::string>& lines, const char*
path_to_file,
int total_size) {
// TODO(GH-38333): Switch to using Azure filesystem to write once its
implemented.
- auto blob_client =
service_client_->GetBlobContainerClient(PreexistingContainerName())
- .GetBlockBlobClient(path_to_file);
+ auto blob_client =
+
blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
+ .GetBlockBlobClient(path_to_file);
std::string all_lines = std::accumulate(lines.begin(), lines.end(),
std::string(""));
blob_client.UploadFrom(reinterpret_cast<const uint8_t*>(all_lines.data()),
total_size);
}
};
-TEST_F(TestAzureFileSystem, OpenInputStreamString) {
+class AzuriteFileSystemTest : public AzureFileSystemTest {
+ Result<AzureOptions> MakeOptions() {
+ EXPECT_THAT(GetAzuriteEnv(), NotNull());
+ ARROW_EXPECT_OK(GetAzuriteEnv()->status());
+ AzureOptions options;
+ options.backend = AzureBackend::Azurite;
+ ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
+ GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
+ return options;
+ }
+};
+
+class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest {
+ Result<AzureOptions> MakeOptions() override {
+ AzureOptions options;
+ if (char* account_name = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME"))
{
+ char* account_key = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_KEY");
+ EXPECT_THAT(account_key, NotNull());
+ ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(account_name,
account_key));
+ return options;
+ }
+ return Status::Cancelled(
+ "Connection details not provided for a real flat namespace "
+ "account.");
+ }
+};
+
+class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest {
+ Result<AzureOptions> MakeOptions() override {
+ AzureOptions options;
+ if (char* account_name =
std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME")) {
+ char* account_key =
std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY");
+ EXPECT_THAT(account_key, NotNull());
+ ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(account_name,
account_key));
+ return options;
+ }
+ return Status::Cancelled(
+ "Connection details not provided for a real hierachical namespace "
+ "account.");
+ }
+};
+
+TEST_F(AzureFlatNamespaceFileSystemTest, DetectHierarchicalNamespace) {
+ auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
+ ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_));
+ ASSERT_OK_AND_EQ(false,
hierarchical_namespace.Enabled(PreexistingContainerName()));
+}
+
+TEST_F(AzureHierarchicalNamespaceFileSystemTest, DetectHierarchicalNamespace) {
+ auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
+ ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_));
+ ASSERT_OK_AND_EQ(true,
hierarchical_namespace.Enabled(PreexistingContainerName()));
+}
+
+TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) {
+ auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
+ ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_));
+ ASSERT_OK_AND_EQ(false,
hierarchical_namespace.Enabled(PreexistingContainerName()));
+}
+
+TEST_F(AzuriteFileSystemTest,
DetectHierarchicalNamespaceFailsWithMissingContainer) {
+ auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
+ ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_));
+ ASSERT_NOT_OK(hierarchical_namespace.Enabled("non-existent-container"));
+}
+
+TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) {
+ arrow::fs::AssertFileInfo(fs_.get(), "", FileType::Directory);
+
+ // URI
+ ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://"));
+}
+
+TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) {
+ arrow::fs::AssertFileInfo(fs_.get(), PreexistingContainerName(),
FileType::Directory);
+
+ arrow::fs::AssertFileInfo(fs_.get(), "non-existent-container",
FileType::NotFound);
+
+ // URI
+ ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" +
PreexistingContainerName()));
+}
+
+TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) {
+ // Adds detailed tests to handle cases of different edge cases
+ // with directory naming conventions (e.g. with and without slashes).
+ constexpr auto kObjectName =
"test-object-dir/some_other_dir/another_dir/foo";
+ // TODO(GH-38333): Switch to using Azure filesystem to write once its
implemented.
+ blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
+ .GetBlockBlobClient(kObjectName)
+ .UploadFrom(reinterpret_cast<const uint8_t*>(kLoremIpsum),
strlen(kLoremIpsum));
+
+ // 0 is immediately after "/" lexicographically, ensure that this doesn't
+ // cause unexpected issues.
+ // TODO(GH-38333): Switch to using Azure filesystem to write once its
implemented.
+ blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
+ .GetBlockBlobClient("test-object-dir/some_other_dir0")
+ .UploadFrom(reinterpret_cast<const uint8_t*>(kLoremIpsum),
strlen(kLoremIpsum));
+
+ blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
+ .GetBlockBlobClient(std::string(kObjectName) + "0")
+ .UploadFrom(reinterpret_cast<const uint8_t*>(kLoremIpsum),
strlen(kLoremIpsum));
+
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName,
FileType::File);
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName + "/",
+ FileType::NotFound);
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir",
+ FileType::Directory);
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/",
+ FileType::Directory);
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() +
"test-object-dir/some_other_dir",
+ FileType::Directory);
+ AssertFileInfo(fs_.get(),
+ PreexistingContainerPath() +
"test-object-dir/some_other_dir/",
+ FileType::Directory);
+
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-di",
+ FileType::NotFound);
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() +
"test-object-dir/some_other_di",
+ FileType::NotFound);
+}
+
+TEST_F(AzureHierarchicalNamespaceFileSystemTest,
GetFileInfoObjectWithNestedStructure) {
+ // Adds detailed tests to handle cases of different edge cases
+ // with directory naming conventions (e.g. with and without slashes).
+ constexpr auto kObjectName =
"test-object-dir/some_other_dir/another_dir/foo";
+ // TODO(GH-38333): Switch to using Azure filesystem to write once its
implemented.
+ blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
+ .GetBlockBlobClient(kObjectName)
+ .UploadFrom(reinterpret_cast<const uint8_t*>(kLoremIpsum),
strlen(kLoremIpsum));
+
+ // 0 is immediately after "/" lexicographically, ensure that this doesn't
+ // cause unexpected issues.
+ // TODO(GH-38333): Switch to using Azure filesystem to write once its
implemented.
+ blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
+ .GetBlockBlobClient("test-object-dir/some_other_dir0")
+ .UploadFrom(reinterpret_cast<const uint8_t*>(kLoremIpsum),
strlen(kLoremIpsum));
+
+ blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
+ .GetBlockBlobClient(std::string(kObjectName) + "0")
+ .UploadFrom(reinterpret_cast<const uint8_t*>(kLoremIpsum),
strlen(kLoremIpsum));
+
+ datalake_service_client_->GetFileSystemClient(PreexistingContainerName())
+ .GetDirectoryClient("test-empty-object-dir")
+ .Create();
+
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName,
FileType::File);
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName + "/",
+ FileType::NotFound);
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir",
+ FileType::Directory);
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/",
+ FileType::Directory);
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() +
"test-object-dir/some_other_dir",
+ FileType::Directory);
+ AssertFileInfo(fs_.get(),
+ PreexistingContainerPath() +
"test-object-dir/some_other_dir/",
+ FileType::Directory);
+
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-di",
+ FileType::NotFound);
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() +
"test-object-dir/some_other_di",
+ FileType::NotFound);
+
+ AssertFileInfo(fs_.get(), PreexistingContainerPath() +
"test-empty-object-dir",
+ FileType::Directory);
Review Comment:
I think I'm happy to leave out such an assertion at least initially. If it
was python I would have done it seems like mocking in C++ would be more
complicated even if I did understand the language :sweat_smile:
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]