This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 56991d3efd GH-39292 [C++][FS]: Remove the AzureBackend enum and add 
more flexible connection options (#39293)
56991d3efd is described below

commit 56991d3efd57e610f5ab604086e19753bd8c834b
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Tue Dec 19 10:59:50 2023 -0300

    GH-39292 [C++][FS]: Remove the AzureBackend enum and add more flexible 
connection options (#39293)
    
    ### Rationale for this change
    
    It's good to avoid mentioning the specific test environment in the 
implementation code.
    
    ### What changes are included in this PR?
    
     - Removal of the enum
     - Removal of the `AzureOptions::backend` class member
     - Addition of more options to `AzureOptions`
     - Removal of some private string members of `AzureOptions` -- the URLs are 
built on-the-fly when the clients are instantiated now
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Changes to the public interface (`azurefs.h`) that won't affect users 
because the `AzureFS` implementation is not used yet.
    * Closes: #39292
    
    Authored-by: Felipe Oliveira Carvalho <[email protected]>
    Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 61 +++++++++++++++++++++-----------
 cpp/src/arrow/filesystem/azurefs.h       | 51 ++++++++++++++++----------
 cpp/src/arrow/filesystem/azurefs_test.cc | 21 +++++++++--
 3 files changed, 91 insertions(+), 42 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc 
b/cpp/src/arrow/filesystem/azurefs.cc
index dd267aac36..1aa3e86a6f 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -51,10 +51,12 @@ AzureOptions::~AzureOptions() = default;
 
 bool AzureOptions::Equals(const AzureOptions& other) const {
   // TODO(GH-38598): update here when more auth methods are added.
-  const bool equals = backend == other.backend &&
+  const bool equals = blob_storage_authority == other.blob_storage_authority &&
+                      dfs_storage_authority == other.dfs_storage_authority &&
+                      blob_storage_scheme == other.blob_storage_scheme &&
+                      dfs_storage_scheme == other.dfs_storage_scheme &&
                       default_metadata == other.default_metadata &&
-                      account_blob_url_ == other.account_blob_url_ &&
-                      account_dfs_url_ == other.account_dfs_url_ &&
+                      account_name_ == other.account_name_ &&
                       credential_kind_ == other.credential_kind_;
   if (!equals) {
     return false;
@@ -72,42 +74,59 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
   return false;
 }
 
-void AzureOptions::SetUrlsForAccountName(const std::string& account_name) {
-  if (this->backend == AzureBackend::kAzurite) {
-    account_blob_url_ = "http://127.0.0.1:10000/"; + account_name + "/";
-    account_dfs_url_ = "http://127.0.0.1:10000/"; + account_name + "/";
-  } else {
-    account_dfs_url_ = "https://"; + account_name + ".dfs.core.windows.net/";
-    account_blob_url_ = "https://"; + account_name + ".blob.core.windows.net/";
+namespace {
+std::string BuildBaseUrl(const std::string& scheme, const std::string& 
authority,
+                         const std::string& account_name) {
+  std::string url;
+  url += scheme + "://";
+  if (!authority.empty()) {
+    if (authority[0] == '.') {
+      url += account_name;
+      url += authority;
+    } else {
+      url += authority;
+      url += "/";
+      url += account_name;
+    }
   }
+  url += "/";
+  return url;
 }
+}  // namespace
 
-Status AzureOptions::ConfigureDefaultCredential(const std::string& 
account_name) {
-  AzureOptions::SetUrlsForAccountName(account_name);
-  credential_kind_ = CredentialKind::kTokenCredential;
-  token_credential_ = 
std::make_shared<Azure::Identity::DefaultAzureCredential>();
-  return Status::OK();
+std::string AzureOptions::AccountBlobUrl(const std::string& account_name) 
const {
+  return BuildBaseUrl(blob_storage_scheme, blob_storage_authority, 
account_name);
+}
+
+std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const 
{
+  return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
 }
 
 Status AzureOptions::ConfigureAccountKeyCredential(const std::string& 
account_name,
                                                    const std::string& 
account_key) {
-  AzureOptions::SetUrlsForAccountName(account_name);
   credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
+  account_name_ = account_name;
   storage_shared_key_credential_ =
       std::make_shared<Storage::StorageSharedKeyCredential>(account_name, 
account_key);
   return Status::OK();
 }
 
+Status AzureOptions::ConfigureDefaultCredential(const std::string& 
account_name) {
+  credential_kind_ = CredentialKind::kTokenCredential;
+  token_credential_ = 
std::make_shared<Azure::Identity::DefaultAzureCredential>();
+  return Status::OK();
+}
+
 Result<std::unique_ptr<Blobs::BlobServiceClient>> 
AzureOptions::MakeBlobServiceClient()
     const {
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
       break;
     case CredentialKind::kTokenCredential:
-      return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+      return 
std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
                                                         token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
-      return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+      return 
std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
                                                         
storage_shared_key_credential_);
   }
   return Status::Invalid("AzureOptions doesn't contain a valid auth 
configuration");
@@ -119,11 +138,11 @@ AzureOptions::MakeDataLakeServiceClient() const {
     case CredentialKind::kAnonymous:
       break;
     case CredentialKind::kTokenCredential:
-      return 
std::make_unique<DataLake::DataLakeServiceClient>(account_dfs_url_,
-                                                               
token_credential_);
+      return std::make_unique<DataLake::DataLakeServiceClient>(
+          AccountDfsUrl(account_name_), token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
       return std::make_unique<DataLake::DataLakeServiceClient>(
-          account_dfs_url_, storage_shared_key_credential_);
+          AccountDfsUrl(account_name_), storage_shared_key_credential_);
   }
   return Status::Invalid("AzureOptions doesn't contain a valid auth 
configuration");
 }
diff --git a/cpp/src/arrow/filesystem/azurefs.h 
b/cpp/src/arrow/filesystem/azurefs.h
index b2c7010ff3..35c140b109 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -43,17 +43,37 @@ class DataLakeServiceClient;
 
 namespace arrow::fs {
 
-enum class AzureBackend {
-  /// \brief Official Azure Remote Backend
-  kAzure,
-  /// \brief Local Simulated Storage
-  kAzurite
-};
-
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
-  /// \brief The backend to connect to: Azure or Azurite (for testing).
-  AzureBackend backend = AzureBackend::kAzure;
+  /// \brief hostname[:port] of the Azure Blob Storage Service.
+  ///
+  /// If the hostname is a relative domain name (one that starts with a '.'), 
then storage
+  /// account URLs will be constructed by prepending the account name to the 
hostname.
+  /// If the hostname is a fully qualified domain name, then the hostname will 
be used
+  /// as-is and the account name will follow the hostname in the URL path.
+  ///
+  /// Default: ".blob.core.windows.net"
+  std::string blob_storage_authority = ".blob.core.windows.net";
+
+  /// \brief hostname[:port] of the Azure Data Lake Storage Gen 2 Service.
+  ///
+  /// If the hostname is a relative domain name (one that starts with a '.'), 
then storage
+  /// account URLs will be constructed by prepending the account name to the 
hostname.
+  /// If the hostname is a fully qualified domain name, then the hostname will 
be used
+  /// as-is and the account name will follow the hostname in the URL path.
+  ///
+  /// Default: ".dfs.core.windows.net"
+  std::string dfs_storage_authority = ".dfs.core.windows.net";
+
+  /// \brief Azure Blob Storage connection transport.
+  ///
+  /// Default: "https"
+  std::string blob_storage_scheme = "https";
+
+  /// \brief Azure Data Lake Storage Gen 2 connection transport.
+  ///
+  /// Default: "https"
+  std::string dfs_storage_scheme = "https";
 
   // TODO(GH-38598): Add support for more auth methods.
   // std::string connection_string;
@@ -65,22 +85,17 @@ struct ARROW_EXPORT AzureOptions {
   std::shared_ptr<const KeyValueMetadata> default_metadata;
 
  private:
-  std::string account_blob_url_;
-  std::string account_dfs_url_;
-
   enum class CredentialKind {
     kAnonymous,
     kTokenCredential,
     kStorageSharedKeyCredential,
   } credential_kind_ = CredentialKind::kAnonymous;
 
+  std::string account_name_;
+  std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
   std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
       storage_shared_key_credential_;
 
-  std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
-
-  void SetUrlsForAccountName(const std::string& account_name);
-
  public:
   AzureOptions();
   ~AzureOptions();
@@ -92,8 +107,8 @@ struct ARROW_EXPORT AzureOptions {
 
   bool Equals(const AzureOptions& other) const;
 
-  const std::string& AccountBlobUrl() const { return account_blob_url_; }
-  const std::string& AccountDfsUrl() const { return account_dfs_url_; }
+  std::string AccountBlobUrl(const std::string& account_name) const;
+  std::string AccountDfsUrl(const std::string& account_name) const;
 
   Result<std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>>
   MakeBlobServiceClient() const;
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc 
b/cpp/src/arrow/filesystem/azurefs_test.cc
index 799f3992a2..8a39c4c554 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -73,6 +73,13 @@ namespace Blobs = Azure::Storage::Blobs;
 namespace Core = Azure::Core;
 namespace DataLake = Azure::Storage::Files::DataLake;
 
+enum class AzureBackend {
+  /// \brief Official Azure Remote Backend
+  kAzure,
+  /// \brief Local Simulated Storage
+  kAzurite
+};
+
 class BaseAzureEnv : public ::testing::Environment {
  protected:
   std::string account_name_;
@@ -265,8 +272,6 @@ class AzureHierarchicalNSEnv : public 
AzureEnvImpl<AzureHierarchicalNSEnv> {
 
 TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
   AzureOptions options;
-  options.backend = AzureBackend::kAzurite;  // Irrelevant for this test 
because it
-                                             // doesn't connect to the server.
   ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
   EXPECT_OK_AND_ASSIGN(auto default_credential_fs, 
AzureFileSystem::Make(options));
 }
@@ -352,7 +357,17 @@ class TestAzureFileSystem : public ::testing::Test {
 
   static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
     AzureOptions options;
-    options.backend = env->backend();
+    switch (env->backend()) {
+      case AzureBackend::kAzurite:
+        options.blob_storage_authority = "127.0.0.1:10000";
+        options.dfs_storage_authority = "127.0.0.1:10000";
+        options.blob_storage_scheme = "http";
+        options.dfs_storage_scheme = "http";
+        break;
+      case AzureBackend::kAzure:
+        // Use the default values
+        break;
+    }
     ARROW_EXPECT_OK(
         options.ConfigureAccountKeyCredential(env->account_name(), 
env->account_key()));
     return options;

Reply via email to