This is an automated email from the ASF dual-hosted git repository.
felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 56991d3efd GH-39292 [C++][FS]: Remove the AzureBackend enum and add
more flexible connection options (#39293)
56991d3efd is described below
commit 56991d3efd57e610f5ab604086e19753bd8c834b
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Tue Dec 19 10:59:50 2023 -0300
GH-39292 [C++][FS]: Remove the AzureBackend enum and add more flexible
connection options (#39293)
### Rationale for this change
It's good to avoid mentioning the specific test environment in the
implementation code.
### What changes are included in this PR?
- Removal of the enum
- Removal of the `AzureOptions::backend` class member
- Addition of more options to `AzureOptions`
- Removal of some private string members of `AzureOptions` -- the URLs are
built on-the-fly when the clients are instantiated now
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Changes to the public interface (`azurefs.h`) that won't affect users
because the `AzureFS` implementation is not used yet.
* Closes: #39292
Authored-by: Felipe Oliveira Carvalho <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
cpp/src/arrow/filesystem/azurefs.cc | 61 +++++++++++++++++++++-----------
cpp/src/arrow/filesystem/azurefs.h | 51 ++++++++++++++++----------
cpp/src/arrow/filesystem/azurefs_test.cc | 21 +++++++++--
3 files changed, 91 insertions(+), 42 deletions(-)
diff --git a/cpp/src/arrow/filesystem/azurefs.cc
b/cpp/src/arrow/filesystem/azurefs.cc
index dd267aac36..1aa3e86a6f 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -51,10 +51,12 @@ AzureOptions::~AzureOptions() = default;
bool AzureOptions::Equals(const AzureOptions& other) const {
// TODO(GH-38598): update here when more auth methods are added.
- const bool equals = backend == other.backend &&
+ const bool equals = blob_storage_authority == other.blob_storage_authority &&
+ dfs_storage_authority == other.dfs_storage_authority &&
+ blob_storage_scheme == other.blob_storage_scheme &&
+ dfs_storage_scheme == other.dfs_storage_scheme &&
default_metadata == other.default_metadata &&
- account_blob_url_ == other.account_blob_url_ &&
- account_dfs_url_ == other.account_dfs_url_ &&
+ account_name_ == other.account_name_ &&
credential_kind_ == other.credential_kind_;
if (!equals) {
return false;
@@ -72,42 +74,59 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
return false;
}
-void AzureOptions::SetUrlsForAccountName(const std::string& account_name) {
- if (this->backend == AzureBackend::kAzurite) {
- account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
- account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
- } else {
- account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
- account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
+namespace {
+std::string BuildBaseUrl(const std::string& scheme, const std::string&
authority,
+ const std::string& account_name) {
+ std::string url;
+ url += scheme + "://";
+ if (!authority.empty()) {
+ if (authority[0] == '.') {
+ url += account_name;
+ url += authority;
+ } else {
+ url += authority;
+ url += "/";
+ url += account_name;
+ }
}
+ url += "/";
+ return url;
}
+} // namespace
-Status AzureOptions::ConfigureDefaultCredential(const std::string&
account_name) {
- AzureOptions::SetUrlsForAccountName(account_name);
- credential_kind_ = CredentialKind::kTokenCredential;
- token_credential_ =
std::make_shared<Azure::Identity::DefaultAzureCredential>();
- return Status::OK();
+std::string AzureOptions::AccountBlobUrl(const std::string& account_name)
const {
+ return BuildBaseUrl(blob_storage_scheme, blob_storage_authority,
account_name);
+}
+
+std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const
{
+ return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
}
Status AzureOptions::ConfigureAccountKeyCredential(const std::string&
account_name,
const std::string&
account_key) {
- AzureOptions::SetUrlsForAccountName(account_name);
credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
+ account_name_ = account_name;
storage_shared_key_credential_ =
std::make_shared<Storage::StorageSharedKeyCredential>(account_name,
account_key);
return Status::OK();
}
+Status AzureOptions::ConfigureDefaultCredential(const std::string&
account_name) {
+ credential_kind_ = CredentialKind::kTokenCredential;
+ token_credential_ =
std::make_shared<Azure::Identity::DefaultAzureCredential>();
+ return Status::OK();
+}
+
Result<std::unique_ptr<Blobs::BlobServiceClient>>
AzureOptions::MakeBlobServiceClient()
const {
switch (credential_kind_) {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
- return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+ return
std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
- return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+ return
std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
storage_shared_key_credential_);
}
return Status::Invalid("AzureOptions doesn't contain a valid auth
configuration");
@@ -119,11 +138,11 @@ AzureOptions::MakeDataLakeServiceClient() const {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
- return
std::make_unique<DataLake::DataLakeServiceClient>(account_dfs_url_,
-
token_credential_);
+ return std::make_unique<DataLake::DataLakeServiceClient>(
+ AccountDfsUrl(account_name_), token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(
- account_dfs_url_, storage_shared_key_credential_);
+ AccountDfsUrl(account_name_), storage_shared_key_credential_);
}
return Status::Invalid("AzureOptions doesn't contain a valid auth
configuration");
}
diff --git a/cpp/src/arrow/filesystem/azurefs.h
b/cpp/src/arrow/filesystem/azurefs.h
index b2c7010ff3..35c140b109 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -43,17 +43,37 @@ class DataLakeServiceClient;
namespace arrow::fs {
-enum class AzureBackend {
- /// \brief Official Azure Remote Backend
- kAzure,
- /// \brief Local Simulated Storage
- kAzurite
-};
-
/// Options for the AzureFileSystem implementation.
struct ARROW_EXPORT AzureOptions {
- /// \brief The backend to connect to: Azure or Azurite (for testing).
- AzureBackend backend = AzureBackend::kAzure;
+ /// \brief hostname[:port] of the Azure Blob Storage Service.
+ ///
+ /// If the hostname is a relative domain name (one that starts with a '.'),
then storage
+ /// account URLs will be constructed by prepending the account name to the
hostname.
+ /// If the hostname is a fully qualified domain name, then the hostname will
be used
+ /// as-is and the account name will follow the hostname in the URL path.
+ ///
+ /// Default: ".blob.core.windows.net"
+ std::string blob_storage_authority = ".blob.core.windows.net";
+
+ /// \brief hostname[:port] of the Azure Data Lake Storage Gen 2 Service.
+ ///
+ /// If the hostname is a relative domain name (one that starts with a '.'),
then storage
+ /// account URLs will be constructed by prepending the account name to the
hostname.
+ /// If the hostname is a fully qualified domain name, then the hostname will
be used
+ /// as-is and the account name will follow the hostname in the URL path.
+ ///
+ /// Default: ".dfs.core.windows.net"
+ std::string dfs_storage_authority = ".dfs.core.windows.net";
+
+ /// \brief Azure Blob Storage connection transport.
+ ///
+ /// Default: "https"
+ std::string blob_storage_scheme = "https";
+
+ /// \brief Azure Data Lake Storage Gen 2 connection transport.
+ ///
+ /// Default: "https"
+ std::string dfs_storage_scheme = "https";
// TODO(GH-38598): Add support for more auth methods.
// std::string connection_string;
@@ -65,22 +85,17 @@ struct ARROW_EXPORT AzureOptions {
std::shared_ptr<const KeyValueMetadata> default_metadata;
private:
- std::string account_blob_url_;
- std::string account_dfs_url_;
-
enum class CredentialKind {
kAnonymous,
kTokenCredential,
kStorageSharedKeyCredential,
} credential_kind_ = CredentialKind::kAnonymous;
+ std::string account_name_;
+ std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
storage_shared_key_credential_;
- std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
-
- void SetUrlsForAccountName(const std::string& account_name);
-
public:
AzureOptions();
~AzureOptions();
@@ -92,8 +107,8 @@ struct ARROW_EXPORT AzureOptions {
bool Equals(const AzureOptions& other) const;
- const std::string& AccountBlobUrl() const { return account_blob_url_; }
- const std::string& AccountDfsUrl() const { return account_dfs_url_; }
+ std::string AccountBlobUrl(const std::string& account_name) const;
+ std::string AccountDfsUrl(const std::string& account_name) const;
Result<std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>>
MakeBlobServiceClient() const;
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc
b/cpp/src/arrow/filesystem/azurefs_test.cc
index 799f3992a2..8a39c4c554 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -73,6 +73,13 @@ namespace Blobs = Azure::Storage::Blobs;
namespace Core = Azure::Core;
namespace DataLake = Azure::Storage::Files::DataLake;
+enum class AzureBackend {
+ /// \brief Official Azure Remote Backend
+ kAzure,
+ /// \brief Local Simulated Storage
+ kAzurite
+};
+
class BaseAzureEnv : public ::testing::Environment {
protected:
std::string account_name_;
@@ -265,8 +272,6 @@ class AzureHierarchicalNSEnv : public
AzureEnvImpl<AzureHierarchicalNSEnv> {
TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
AzureOptions options;
- options.backend = AzureBackend::kAzurite; // Irrelevant for this test
because it
- // doesn't connect to the server.
ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
EXPECT_OK_AND_ASSIGN(auto default_credential_fs,
AzureFileSystem::Make(options));
}
@@ -352,7 +357,17 @@ class TestAzureFileSystem : public ::testing::Test {
static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
AzureOptions options;
- options.backend = env->backend();
+ switch (env->backend()) {
+ case AzureBackend::kAzurite:
+ options.blob_storage_authority = "127.0.0.1:10000";
+ options.dfs_storage_authority = "127.0.0.1:10000";
+ options.blob_storage_scheme = "http";
+ options.dfs_storage_scheme = "http";
+ break;
+ case AzureBackend::kAzure:
+ // Use the default values
+ break;
+ }
ARROW_EXPECT_OK(
options.ConfigureAccountKeyCredential(env->account_name(),
env->account_key()));
return options;