kou commented on code in PR #39207:
URL: https://github.com/apache/arrow/pull/39207#discussion_r1424774080
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
#include "arrow/util/logging.h"
#include "arrow/util/string.h"
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
// -----------------------------------------------------------------------
// AzureOptions Implementation
AzureOptions::AzureOptions() = default;
+AzureOptions::~AzureOptions() = default;
+
bool AzureOptions::Equals(const AzureOptions& other) const {
- return (account_dfs_url == other.account_dfs_url &&
- account_blob_url == other.account_blob_url &&
- credentials_kind == other.credentials_kind &&
- default_metadata == other.default_metadata);
+ // TODO(GH-38598): update here when more auth methods are added.
+ const bool ret = backend == other.backend &&
Review Comment:
How about using more meaningful name such as `equal`?
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
#include "arrow/util/logging.h"
#include "arrow/util/string.h"
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
// -----------------------------------------------------------------------
// AzureOptions Implementation
AzureOptions::AzureOptions() = default;
+AzureOptions::~AzureOptions() = default;
+
bool AzureOptions::Equals(const AzureOptions& other) const {
- return (account_dfs_url == other.account_dfs_url &&
- account_blob_url == other.account_blob_url &&
- credentials_kind == other.credentials_kind &&
- default_metadata == other.default_metadata);
+ // TODO(GH-38598): update here when more auth methods are added.
+ const bool ret = backend == other.backend &&
+ default_metadata == other.default_metadata &&
+ account_blob_url_ == other.account_blob_url_ &&
+ account_dfs_url_ == other.account_dfs_url_ &&
+ credential_kind_ == other.credential_kind_;
+ if (!ret) {
+ return false;
+ }
+ switch (credential_kind_) {
+ case CredentialKind::kAnonymous:
+ return true;
+ case CredentialKind::kStorageSharedKeyCredential:
+ return storage_shared_key_credential_->AccountName ==
+ other.storage_shared_key_credential_->AccountName;
+ }
+ DCHECK(false);
+ return false;
Review Comment:
How about using `default`?
```suggestion
default:
DCHECK(false);
return false;
}
```
We may want to add a comment something like "never happen".
##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -193,51 +265,123 @@ TEST(AzureFileSystem, OptionsCompare) {
EXPECT_TRUE(options.Equals(options));
}
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+ using RNG = std::mt19937_64;
+
public:
+ const std::string container_name;
+ static constexpr char const* kObjectName = "test-object-name";
+
+ static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
+ public:
+ explicit PreexistingData(RNG& rng) :
container_name{RandomContainerName(rng)} {}
+
+ // Accessors
+ std::string ContainerPath() const { return container_name + '/'; }
+ std::string ObjectPath() const { return ContainerPath() + kObjectName; }
+ std::string NotFoundObjectPath() const { return ContainerPath() +
"not-found"; }
+
+ std::string RandomDirectoryPath(RNG& rng) {
+ return internal::ConcatAbstractPath(container_name,
RandomDirectoryName(rng));
+ }
+
+ // Utilities
+
+ static std::string RandomContainerName(RNG& rng) { return RandomChars(32,
rng); }
+ static std::string RandomDirectoryName(RNG& rng) { return RandomChars(32,
rng); }
+
+ static std::string RandomLine(int lineno, std::size_t width, RNG& rng) {
+ auto line = std::to_string(lineno) + ": ";
+ line += RandomChars(width - line.size() - 1, rng);
+ line += '\n';
+ return line;
+ }
+
+ static std::size_t RandomIndex(std::size_t end, RNG& rng) {
+ return std::uniform_int_distribution<std::size_t>(0, end - 1)(rng);
+ }
+
+ static std::string RandomChars(std::size_t count, RNG& rng) {
+ auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
+ std::uniform_int_distribution<std::size_t> d(0, fillers.size() - 1);
+ std::string s;
+ std::generate_n(std::back_inserter(s), count, [&] { return
fillers[d(rng)]; });
+ return s;
+ }
+};
+
+class AzureFileSystemTest : public ::testing::Test {
+ protected:
+ // Set in constructor
+ std::mt19937_64 rng_;
+
+ // Set in SetUp()
+ int64_t debug_log_start_ = 0;
+ bool set_up_succeeded_ = false;
+ AzureOptions options_;
+
std::shared_ptr<FileSystem> fs_;
std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
- std::unique_ptr<Files::DataLake::DataLakeServiceClient>
datalake_service_client_;
- AzureOptions options_;
- std::mt19937_64 generator_;
- std::string container_name_;
- bool suite_skipped_ = false;
+ std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
- AzureFileSystemTest() : generator_(std::random_device()()) {}
+ public:
+ AzureFileSystemTest() : rng_(std::random_device()()) {}
+
+ virtual Result<BaseAzureEnv*> GetAzureEnv() const = 0;
- virtual Result<AzureOptions> MakeOptions() = 0;
+ static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
+ AzureOptions options;
+ options.backend = env->backend();
+ ARROW_EXPECT_OK(
+ options.ConfigureAccountKeyCredential(env->account_name(),
env->account_key()));
+ return options;
+ }
void SetUp() override {
- auto options = MakeOptions();
- if (options.ok()) {
- options_ = *options;
+ auto make_options = [this]() -> Result<AzureOptions> {
+ ARROW_ASSIGN_OR_RAISE(auto env, GetAzureEnv());
+ EXPECT_THAT(env, NotNull());
+ ARROW_ASSIGN_OR_RAISE(debug_log_start_, env->GetDebugLogSize());
+ return MakeOptions(env);
+ };
+ auto options_res = make_options();
+ if (!options_res.ok() && options_res.status().IsCancelled()) {
+ GTEST_SKIP() << options_res.status().message();
} else {
- suite_skipped_ = true;
- GTEST_SKIP() << options.status().message();
+ EXPECT_OK_AND_ASSIGN(options_, std::move(options_res));
Review Comment:
Do we need this explicit `std::move()`?
##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -71,56 +72,113 @@ using ::testing::Not;
using ::testing::NotNull;
namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+ std::string account_name_;
+ std::string account_key_;
+
+ BaseAzureEnv(std::string account_name, std::string account_key)
+ : account_name_(std::move(account_name)),
account_key_(std::move(account_key)) {}
-class AzuriteEnv : public ::testing::Environment {
public:
- AzuriteEnv() {
- account_name_ = "devstoreaccount1";
- account_key_ =
-
"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
- "KBHBeksoGMGw==";
- auto exe_path = bp::search_path("azurite");
- if (exe_path.empty()) {
- auto error = std::string("Could not find Azurite emulator.");
- status_ = Status::Invalid(error);
- return;
+ ~BaseAzureEnv() override = default;
+
+ const std::string& account_name() const { return account_name_; }
+ const std::string& account_key() const { return account_key_; }
+
+ virtual AzureBackend backend() const = 0;
+
+ virtual bool WithHierarchicalNamespace() const { return false; }
+
+ virtual Result<int64_t> GetDebugLogSize() { return 0; }
+ virtual Status DumpDebugLog(int64_t position) {
+ return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+ }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ protected:
+ static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+ ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make());
+ auto* heap_ptr = instance.release();
+ ::testing::AddGlobalTestEnvironment(heap_ptr);
+ return heap_ptr;
+ }
+
+ using BaseAzureEnv::BaseAzureEnv;
+
+ static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+ const std::string& account_name_var, const std::string& account_key_var)
{
+ const auto account_name = std::getenv(account_name_var.c_str());
+ const auto account_key = std::getenv(account_key_var.c_str());
+ if (!account_name) {
+ return Status::Cancelled(account_name_var + " not set.");
}
- auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
- auto debug_log_path_result = temp_dir_->path().Join("debug.log");
- if (!debug_log_path_result.ok()) {
- status_ = debug_log_path_result.status();
- return;
+ if (!account_key) {
+ return Status::Cancelled(account_key_var + " not set.");
}
- debug_log_path_ = *debug_log_path_result;
- server_process_ =
- bp::child(boost::this_process::environment(), exe_path, "--silent",
"--location",
- temp_dir_->path().ToString(), "--debug",
debug_log_path_.ToString());
- if (!(server_process_.valid() && server_process_.running())) {
- auto error = "Could not start Azurite emulator.";
- server_process_.terminate();
- server_process_.wait();
- status_ = Status::Invalid(error);
- return;
+ return std::unique_ptr<AzureEnvClass>{new AzureEnvClass(account_name,
account_key)};
+ }
+
+ public:
+ ~AzureEnvImpl() override = default;
+
+ static Result<BaseAzureEnv*> GetInstance() {
+ static auto env = MakeAndAddToGlobalTestEnvironment();
+ if (env.ok()) {
+ return env;
}
- status_ = Status::OK();
+ return env.status();
Review Comment:
Can we just always return `env`?
(`return env`)
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
#include "arrow/util/logging.h"
#include "arrow/util/string.h"
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
// -----------------------------------------------------------------------
// AzureOptions Implementation
AzureOptions::AzureOptions() = default;
+AzureOptions::~AzureOptions() = default;
+
bool AzureOptions::Equals(const AzureOptions& other) const {
- return (account_dfs_url == other.account_dfs_url &&
- account_blob_url == other.account_blob_url &&
- credentials_kind == other.credentials_kind &&
- default_metadata == other.default_metadata);
+ // TODO(GH-38598): update here when more auth methods are added.
+ const bool ret = backend == other.backend &&
+ default_metadata == other.default_metadata &&
+ account_blob_url_ == other.account_blob_url_ &&
+ account_dfs_url_ == other.account_dfs_url_ &&
+ credential_kind_ == other.credential_kind_;
+ if (!ret) {
+ return false;
+ }
+ switch (credential_kind_) {
+ case CredentialKind::kAnonymous:
+ return true;
+ case CredentialKind::kStorageSharedKeyCredential:
+ return storage_shared_key_credential_->AccountName ==
+ other.storage_shared_key_credential_->AccountName;
+ }
+ DCHECK(false);
+ return false;
}
-Status AzureOptions::ConfigureAccountKeyCredentials(const std::string&
account_name,
- const std::string&
account_key) {
- if (this->backend == AzureBackend::Azurite) {
- account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
- account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string&
account_name,
+ const std::string&
account_key) {
+ if (this->backend == AzureBackend::kAzurite) {
+ account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
+ account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
} else {
- account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
- account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+ account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
+ account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
}
- storage_credentials_provider =
-
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
-
account_key);
- credentials_kind = AzureCredentialsKind::StorageCredentials;
+ credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
+ storage_shared_key_credential_ =
+ std::make_shared<Storage::StorageSharedKeyCredential>(account_name,
account_key);
return Status::OK();
}
+std::unique_ptr<Blobs::BlobServiceClient>
AzureOptions::MakeBlobServiceClient() const {
+ switch (credential_kind_) {
+ case CredentialKind::kAnonymous:
+ break;
+ case CredentialKind::kStorageSharedKeyCredential:
+ return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+
storage_shared_key_credential_);
+ }
+ DCHECK(false) << "AzureOptions doesn't contain a valid auth configuration";
+ return nullptr;
+}
+
+std::unique_ptr<DataLake::DataLakeServiceClient>
AzureOptions::MakeDataLakeServiceClient()
+ const {
+ switch (credential_kind_) {
+ case CredentialKind::kAnonymous:
+ break;
+ case CredentialKind::kStorageSharedKeyCredential:
+ return std::make_unique<DataLake::DataLakeServiceClient>(
+ account_dfs_url_, storage_shared_key_credential_);
+ }
+ DCHECK(false) << "AzureOptions doesn't contain a valid auth configuration";
+ return nullptr;
Review Comment:
ditto.
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -33,42 +33,85 @@
#include "arrow/util/logging.h"
#include "arrow/util/string.h"
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Storage = Azure::Storage;
// -----------------------------------------------------------------------
// AzureOptions Implementation
AzureOptions::AzureOptions() = default;
+AzureOptions::~AzureOptions() = default;
+
bool AzureOptions::Equals(const AzureOptions& other) const {
- return (account_dfs_url == other.account_dfs_url &&
- account_blob_url == other.account_blob_url &&
- credentials_kind == other.credentials_kind &&
- default_metadata == other.default_metadata);
+ // TODO(GH-38598): update here when more auth methods are added.
+ const bool ret = backend == other.backend &&
+ default_metadata == other.default_metadata &&
+ account_blob_url_ == other.account_blob_url_ &&
+ account_dfs_url_ == other.account_dfs_url_ &&
+ credential_kind_ == other.credential_kind_;
+ if (!ret) {
+ return false;
+ }
+ switch (credential_kind_) {
+ case CredentialKind::kAnonymous:
+ return true;
+ case CredentialKind::kStorageSharedKeyCredential:
+ return storage_shared_key_credential_->AccountName ==
+ other.storage_shared_key_credential_->AccountName;
+ }
+ DCHECK(false);
+ return false;
}
-Status AzureOptions::ConfigureAccountKeyCredentials(const std::string&
account_name,
- const std::string&
account_key) {
- if (this->backend == AzureBackend::Azurite) {
- account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
- account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string&
account_name,
+ const std::string&
account_key) {
+ if (this->backend == AzureBackend::kAzurite) {
+ account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
+ account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
} else {
- account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
- account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+ account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
+ account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
}
- storage_credentials_provider =
-
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
-
account_key);
- credentials_kind = AzureCredentialsKind::StorageCredentials;
+ credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
+ storage_shared_key_credential_ =
+ std::make_shared<Storage::StorageSharedKeyCredential>(account_name,
account_key);
return Status::OK();
}
+std::unique_ptr<Blobs::BlobServiceClient>
AzureOptions::MakeBlobServiceClient() const {
+ switch (credential_kind_) {
+ case CredentialKind::kAnonymous:
+ break;
+ case CredentialKind::kStorageSharedKeyCredential:
+ return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+
storage_shared_key_credential_);
+ }
+ DCHECK(false) << "AzureOptions doesn't contain a valid auth configuration";
+ return nullptr;
Review Comment:
How about using `Result<std::unique_ptr<Blobs::BlobServiceClient>>` instead?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]