kou commented on code in PR #12914:
URL: https://github.com/apache/arrow/pull/12914#discussion_r930542169


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(
+        "Cannot find account name in Azure Blob Storage connection string: '",
+        connection_string, "'");
+  }
+  auto pos_colon = connection_string.find(';');
+  pos_colon = connection_string.find(';', pos_colon + 1);
+  if (pos_colon == std::string::npos) {
+    return Status::IOError("Invalid Azure Blob Storage connection string: '",
+                           connection_string, "' passed");
+  }
+  std::string account_name = connection_string.substr(pos_text + text.size(), 
pos_colon);
+  return account_name;
+}
+
+Status AzureOptions::ConfigureAnonymousCredentials(const std::string& 
account_name) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  credentials_kind = AzureCredentialsKind::Anonymous;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& 
account_name,
+                                                    const std::string& 
account_key) {
+  if (this->is_azurite) {
+    account_blob_url = "http://127.0.0.1:10000/"; + account_name + "/";
+    account_dfs_url = "http://127.0.0.1:10000/"; + account_name + "/";
+  } else {
+    account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+    account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  }
+  storage_credentials_provider =
+      
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+                                                                   
account_key);
+  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureConnectionStringCredentials(
+    const std::string& connection_string_uri) {
+  ARROW_ASSIGN_OR_RAISE(auto account_name,
+                        
GetAccountNameFromConnectionString(connection_string_uri));
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  connection_string = connection_string_uri;
+  credentials_kind = AzureCredentialsKind::ConnectionString;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureServicePrincipleCredentials(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  service_principle_credentials_provider =
+      std::make_shared<Azure::Identity::ClientSecretCredential>(tenant_id, 
client_id,
+                                                                client_secret);
+  credentials_kind = AzureCredentialsKind::ServicePrincipleCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureSasCredentials(const std::string& uri) {
+  Uri url;
+  RETURN_NOT_OK(url.Parse(uri));
+  sas_token = "?" + url.query_string();
+  account_blob_url = url.scheme() + "://" + url.host() + kSep;
+  account_dfs_url = std::regex_replace(account_blob_url, std::regex(".blob"), 
".dfs");

Review Comment:
   Do we need to escape `.` in `".blob"`?



##########
cpp/src/arrow/filesystem/azurefs.h:
##########
@@ -0,0 +1,176 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/uri.h"
+
+namespace Azure {
+namespace Core {
+namespace Credentials {
+
+class TokenCredential;
+
+}  // namespace Credentials
+}  // namespace Core
+namespace Storage {
+
+class StorageSharedKeyCredential;
+
+}  // namespace Storage
+}  // namespace Azure
+
+namespace arrow {
+namespace fs {
+
+enum class AzureCredentialsKind : int8_t {
+  /// Anonymous access (no credentials used), public
+  Anonymous,
+  /// Use explicitly-provided access key pair
+  StorageCredentials,
+  /// Use ServicePrincipleCredentials
+  ServicePrincipleCredentials,
+  /// Use Sas Token to authenticate
+  Sas,
+  /// Use Connection String
+  ConnectionString
+};
+
+/// Options for the AzureFileSystem implementation.
+struct ARROW_EXPORT AzureOptions {
+  std::string scheme;
+  std::string account_dfs_url;
+  std::string account_blob_url;
+  bool is_azurite = false;
+  AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+
+  std::string sas_token;
+  std::string connection_string;
+  std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
+      storage_credentials_provider;
+  std::shared_ptr<Azure::Core::Credentials::TokenCredential>
+      service_principle_credentials_provider;
+
+  AzureOptions();
+
+  Result<std::string> GetAccountNameFromConnectionString(
+      const std::string& connectionString);

Review Comment:
   ```suggestion
         const std::string& connection_string);
   ```



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(
+        "Cannot find account name in Azure Blob Storage connection string: '",
+        connection_string, "'");
+  }
+  auto pos_colon = connection_string.find(';');
+  pos_colon = connection_string.find(';', pos_colon + 1);
+  if (pos_colon == std::string::npos) {
+    return Status::IOError("Invalid Azure Blob Storage connection string: '",
+                           connection_string, "' passed");
+  }
+  std::string account_name = connection_string.substr(pos_text + text.size(), 
pos_colon);
+  return account_name;
+}
+
+Status AzureOptions::ConfigureAnonymousCredentials(const std::string& 
account_name) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  credentials_kind = AzureCredentialsKind::Anonymous;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& 
account_name,
+                                                    const std::string& 
account_key) {
+  if (this->is_azurite) {
+    account_blob_url = "http://127.0.0.1:10000/"; + account_name + "/";
+    account_dfs_url = "http://127.0.0.1:10000/"; + account_name + "/";
+  } else {
+    account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+    account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  }
+  storage_credentials_provider =
+      
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+                                                                   
account_key);
+  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureConnectionStringCredentials(
+    const std::string& connection_string_uri) {
+  ARROW_ASSIGN_OR_RAISE(auto account_name,
+                        
GetAccountNameFromConnectionString(connection_string_uri));
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  connection_string = connection_string_uri;
+  credentials_kind = AzureCredentialsKind::ConnectionString;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureServicePrincipleCredentials(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  service_principle_credentials_provider =
+      std::make_shared<Azure::Identity::ClientSecretCredential>(tenant_id, 
client_id,
+                                                                client_secret);
+  credentials_kind = AzureCredentialsKind::ServicePrincipleCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureSasCredentials(const std::string& uri) {
+  Uri url;
+  RETURN_NOT_OK(url.Parse(uri));
+  sas_token = "?" + url.query_string();
+  account_blob_url = url.scheme() + "://" + url.host() + kSep;
+  account_dfs_url = std::regex_replace(account_blob_url, std::regex(".blob"), 
".dfs");
+  credentials_kind = AzureCredentialsKind::Sas;
+  return Status::OK();
+}
+
+bool AzureOptions::Equals(const AzureOptions& other) const {
+  return (scheme == other.scheme && account_dfs_url == other.account_dfs_url &&
+          account_blob_url == other.account_blob_url &&
+          credentials_kind == other.credentials_kind);
+}
+
+Result<AzureOptions> AzureOptions::FromAnonymous(const std::string& 
account_name) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromAccountKey(const std::string& 
account_name,
+                                                  const std::string& 
account_key) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, 
account_key));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromConnectionString(
+    const std::string& connection_string) {
+  AzureOptions options;
+  
RETURN_NOT_OK(options.ConfigureConnectionStringCredentials(connection_string));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromServicePrincipleCredential(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureServicePrincipleCredentials(account_name, 
tenant_id,
+                                                             client_id, 
client_secret));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromSas(const std::string& uri) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureSasCredentials(uri));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const std::string& uri_string,
+                                           std::string* out_path) {
+  Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  return FromUri(uri, out_path);
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const Uri& uri, std::string* 
out_path) {
+  // uri =
+  // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+  AzureOptions options;
+  // host = accountName.dfs.core.windows.net
+  const auto host = uri.host();
+  // path_to_blob = /pathToBlob/
+  const auto path_to_blob = uri.path();
+  std::string account_name;
+  if (host.empty()) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  auto pos = host.find('.');
+  if (pos == std::string::npos) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  std::string full_path = path_to_blob;
+  // account_name = accountName
+  account_name = host.substr(0, pos);
+  if (full_path.empty()) {
+    full_path = account_name;
+  } else {
+    if (full_path[0] != '/') {
+      return Status::IOError("Azure Blob Storage URI should be absolute, not 
relative");
+    }
+    // full_path = accountName/pathToBlob/
+    full_path = account_name + path_to_blob;
+  }
+  if (out_path != nullptr) {
+    *out_path = std::string(internal::RemoveTrailingSlash(full_path));
+  }
+  // scheme = https
+  options.scheme = uri.scheme();
+  // query_string = sas_token_key=sas_token_value
+  const auto query_string = uri.query_string();
+  if (!query_string.empty()) {
+    // Accepted Uri =
+    // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+    RETURN_NOT_OK(options.ConfigureSasCredentials(uri.scheme() + "://" + host +
+                                                  path_to_blob + "?" + 
query_string));
+  } else {
+    RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  }
+  return options;
+}
+
+namespace {
+
+struct AzurePath {
+  std::string full_path;
+  std::string container;
+  std::string path_to_file;
+  std::vector<std::string> path_to_file_parts;
+
+  static Result<AzurePath> FromString(const std::string& s) {
+    // 
https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/testfile.txt
+    // container = synapsemlfs
+    // account_name = synapsemladlsgen2
+    // path_to_file = testdir/testfile.txt
+    // path_to_file_parts = [testdir, testfile.txt]
+
+    // Expected input here => s = synapsemlfs/testdir/testfile.txt
+    auto src = internal::RemoveTrailingSlash(s);
+    if ((src.find("127.0.0.1") != std::string::npos)) {
+      RETURN_NOT_OK(FromLocalHostString(&src));
+    }
+    if (internal::IsLikelyUri(src)) {
+      RETURN_NOT_OK(ExtractBlobPath(&src));
+    }
+    src = internal::RemoveLeadingSlash(src);
+    auto first_sep = src.find_first_of(kSep);
+    if (first_sep == 0) {
+      return Status::IOError("Path cannot start with a separator ('", s, "')");
+    }
+    if (first_sep == std::string::npos) {
+      return AzurePath{std::string(src), std::string(src), "", {}};
+    }
+    AzurePath path;
+    path.full_path = std::string(src);
+    path.container = std::string(src.substr(0, first_sep));
+    path.path_to_file = std::string(src.substr(first_sep + 1));
+    path.path_to_file_parts = internal::SplitAbstractPath(path.path_to_file);
+    RETURN_NOT_OK(Validate(&path));
+    return path;
+  }
+
+  static Status FromLocalHostString(util::string_view* src) {
+    // src = http://127.0.0.1:10000/accountName/pathToBlob
+    auto port = src->find("127.0.0.1");
+    // src = 127.0.0.1:10000/accountName/pathToBlob
+    *src = src->substr(port);
+    auto first_sep = src->find_first_of(kSep);
+    if (first_sep == std::string::npos) {
+      return Status::IOError("Missing account name in Azure Blob Storage URI");
+    }
+    // src = accountName/pathToBlob
+    *src = src->substr(first_sep + 1);
+    auto sec_sep = src->find_first_of(kSep);
+    if (sec_sep == std::string::npos) {
+      return Status::IOError("Missing container name in Azure Blob Storage 
URI");
+    }
+    // src = pathToBlob
+    *src = src->substr(sec_sep + 1);
+    return Status::OK();
+  }
+
+  // Removes scheme, host and port from the uri
+  static Status ExtractBlobPath(util::string_view* src) {
+    std::string text = ".core.windows.net";
+    auto pos = src->find(text);
+    if (pos == std::string::npos) {
+      return Status::IOError("Invalid Azure blob storage URI provided: ", src);
+    }
+    pos = src->find("/", pos);
+    if (pos == std::string::npos) {
+      *src = "";
+    } else {
+      *src = src->substr(pos + 1);
+    }
+    return Status::OK();
+  }
+
+  static Status Validate(const AzurePath* path) {
+    auto result = 
internal::ValidateAbstractPathParts(path->path_to_file_parts);

Review Comment:
   How about using `status` not `result` because it's an `arrow::Status` not 
`arrow::Result`?
   
   ```suggestion
       auto status = 
internal::ValidateAbstractPathParts(path->path_to_file_parts);
   ```



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(
+        "Cannot find account name in Azure Blob Storage connection string: '",
+        connection_string, "'");
+  }
+  auto pos_colon = connection_string.find(';');
+  pos_colon = connection_string.find(';', pos_colon + 1);
+  if (pos_colon == std::string::npos) {
+    return Status::IOError("Invalid Azure Blob Storage connection string: '",
+                           connection_string, "' passed");
+  }
+  std::string account_name = connection_string.substr(pos_text + text.size(), 
pos_colon);
+  return account_name;
+}
+
+Status AzureOptions::ConfigureAnonymousCredentials(const std::string& 
account_name) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  credentials_kind = AzureCredentialsKind::Anonymous;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& 
account_name,
+                                                    const std::string& 
account_key) {
+  if (this->is_azurite) {
+    account_blob_url = "http://127.0.0.1:10000/"; + account_name + "/";
+    account_dfs_url = "http://127.0.0.1:10000/"; + account_name + "/";
+  } else {
+    account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+    account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  }
+  storage_credentials_provider =
+      
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+                                                                   
account_key);
+  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureConnectionStringCredentials(
+    const std::string& connection_string_uri) {
+  ARROW_ASSIGN_OR_RAISE(auto account_name,
+                        
GetAccountNameFromConnectionString(connection_string_uri));
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  connection_string = connection_string_uri;
+  credentials_kind = AzureCredentialsKind::ConnectionString;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureServicePrincipleCredentials(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  service_principle_credentials_provider =
+      std::make_shared<Azure::Identity::ClientSecretCredential>(tenant_id, 
client_id,
+                                                                client_secret);
+  credentials_kind = AzureCredentialsKind::ServicePrincipleCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureSasCredentials(const std::string& uri) {
+  Uri url;
+  RETURN_NOT_OK(url.Parse(uri));
+  sas_token = "?" + url.query_string();
+  account_blob_url = url.scheme() + "://" + url.host() + kSep;
+  account_dfs_url = std::regex_replace(account_blob_url, std::regex(".blob"), 
".dfs");
+  credentials_kind = AzureCredentialsKind::Sas;
+  return Status::OK();
+}
+
+bool AzureOptions::Equals(const AzureOptions& other) const {
+  return (scheme == other.scheme && account_dfs_url == other.account_dfs_url &&
+          account_blob_url == other.account_blob_url &&
+          credentials_kind == other.credentials_kind);
+}
+
+Result<AzureOptions> AzureOptions::FromAnonymous(const std::string& 
account_name) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromAccountKey(const std::string& 
account_name,
+                                                  const std::string& 
account_key) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, 
account_key));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromConnectionString(
+    const std::string& connection_string) {
+  AzureOptions options;
+  
RETURN_NOT_OK(options.ConfigureConnectionStringCredentials(connection_string));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromServicePrincipleCredential(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureServicePrincipleCredentials(account_name, 
tenant_id,
+                                                             client_id, 
client_secret));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromSas(const std::string& uri) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureSasCredentials(uri));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const std::string& uri_string,
+                                           std::string* out_path) {
+  Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  return FromUri(uri, out_path);
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const Uri& uri, std::string* 
out_path) {
+  // uri =
+  // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+  AzureOptions options;
+  // host = accountName.dfs.core.windows.net
+  const auto host = uri.host();
+  // path_to_blob = /pathToBlob/
+  const auto path_to_blob = uri.path();
+  std::string account_name;
+  if (host.empty()) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  auto pos = host.find('.');
+  if (pos == std::string::npos) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  std::string full_path = path_to_blob;
+  // account_name = accountName
+  account_name = host.substr(0, pos);
+  if (full_path.empty()) {
+    full_path = account_name;
+  } else {
+    if (full_path[0] != '/') {
+      return Status::IOError("Azure Blob Storage URI should be absolute, not 
relative");
+    }
+    // full_path = accountName/pathToBlob/
+    full_path = account_name + path_to_blob;
+  }
+  if (out_path != nullptr) {
+    *out_path = std::string(internal::RemoveTrailingSlash(full_path));
+  }
+  // scheme = https
+  options.scheme = uri.scheme();
+  // query_string = sas_token_key=sas_token_value
+  const auto query_string = uri.query_string();
+  if (!query_string.empty()) {
+    // Accepted Uri =
+    // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+    RETURN_NOT_OK(options.ConfigureSasCredentials(uri.scheme() + "://" + host +
+                                                  path_to_blob + "?" + 
query_string));
+  } else {
+    RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  }
+  return options;
+}
+
+namespace {
+
+struct AzurePath {
+  std::string full_path;
+  std::string container;
+  std::string path_to_file;
+  std::vector<std::string> path_to_file_parts;
+
+  static Result<AzurePath> FromString(const std::string& s) {
+    // 
https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/testfile.txt
+    // container = synapsemlfs
+    // account_name = synapsemladlsgen2
+    // path_to_file = testdir/testfile.txt
+    // path_to_file_parts = [testdir, testfile.txt]
+
+    // Expected input here => s = synapsemlfs/testdir/testfile.txt
+    auto src = internal::RemoveTrailingSlash(s);
+    if ((src.find("127.0.0.1") != std::string::npos)) {
+      RETURN_NOT_OK(FromLocalHostString(&src));
+    }
+    if (internal::IsLikelyUri(src)) {
+      RETURN_NOT_OK(ExtractBlobPath(&src));
+    }
+    src = internal::RemoveLeadingSlash(src);
+    auto first_sep = src.find_first_of(kSep);
+    if (first_sep == 0) {
+      return Status::IOError("Path cannot start with a separator ('", s, "')");
+    }
+    if (first_sep == std::string::npos) {
+      return AzurePath{std::string(src), std::string(src), "", {}};
+    }
+    AzurePath path;
+    path.full_path = std::string(src);
+    path.container = std::string(src.substr(0, first_sep));
+    path.path_to_file = std::string(src.substr(first_sep + 1));
+    path.path_to_file_parts = internal::SplitAbstractPath(path.path_to_file);
+    RETURN_NOT_OK(Validate(&path));
+    return path;
+  }
+
+  static Status FromLocalHostString(util::string_view* src) {
+    // src = http://127.0.0.1:10000/accountName/pathToBlob
+    auto port = src->find("127.0.0.1");
+    // src = 127.0.0.1:10000/accountName/pathToBlob
+    *src = src->substr(port);
+    auto first_sep = src->find_first_of(kSep);
+    if (first_sep == std::string::npos) {
+      return Status::IOError("Missing account name in Azure Blob Storage URI");
+    }
+    // src = accountName/pathToBlob
+    *src = src->substr(first_sep + 1);
+    auto sec_sep = src->find_first_of(kSep);
+    if (sec_sep == std::string::npos) {
+      return Status::IOError("Missing container name in Azure Blob Storage 
URI");
+    }
+    // src = pathToBlob
+    *src = src->substr(sec_sep + 1);
+    return Status::OK();
+  }
+
+  // Removes scheme, host and port from the uri
+  static Status ExtractBlobPath(util::string_view* src) {
+    std::string text = ".core.windows.net";
+    auto pos = src->find(text);
+    if (pos == std::string::npos) {
+      return Status::IOError("Invalid Azure blob storage URI provided: ", src);
+    }
+    pos = src->find("/", pos);
+    if (pos == std::string::npos) {
+      *src = "";
+    } else {
+      *src = src->substr(pos + 1);
+    }
+    return Status::OK();
+  }
+
+  static Status Validate(const AzurePath* path) {
+    auto result = 
internal::ValidateAbstractPathParts(path->path_to_file_parts);
+    if (!result.ok()) {
+      return Status::Invalid(result.message(), " in path ", path->full_path);
+    } else {
+      return result;
+    }
+  }
+
+  AzurePath parent() const {
+    DCHECK(!path_to_file_parts.empty());

Review Comment:
   ```suggestion
       DCHECK(has_parent());
   ```



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(
+        "Cannot find account name in Azure Blob Storage connection string: '",
+        connection_string, "'");
+  }
+  auto pos_colon = connection_string.find(';');
+  pos_colon = connection_string.find(';', pos_colon + 1);

Review Comment:
   Does this work with `AccountName=account`?



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(
+        "Cannot find account name in Azure Blob Storage connection string: '",
+        connection_string, "'");
+  }
+  auto pos_colon = connection_string.find(';');
+  pos_colon = connection_string.find(';', pos_colon + 1);
+  if (pos_colon == std::string::npos) {
+    return Status::IOError("Invalid Azure Blob Storage connection string: '",
+                           connection_string, "' passed");
+  }
+  std::string account_name = connection_string.substr(pos_text + text.size(), 
pos_colon);
+  return account_name;
+}
+
+Status AzureOptions::ConfigureAnonymousCredentials(const std::string& 
account_name) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  credentials_kind = AzureCredentialsKind::Anonymous;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& 
account_name,
+                                                    const std::string& 
account_key) {
+  if (this->is_azurite) {
+    account_blob_url = "http://127.0.0.1:10000/"; + account_name + "/";
+    account_dfs_url = "http://127.0.0.1:10000/"; + account_name + "/";
+  } else {
+    account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+    account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  }
+  storage_credentials_provider =
+      
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+                                                                   
account_key);
+  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureConnectionStringCredentials(
+    const std::string& connection_string_uri) {
+  ARROW_ASSIGN_OR_RAISE(auto account_name,
+                        
GetAccountNameFromConnectionString(connection_string_uri));
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  connection_string = connection_string_uri;
+  credentials_kind = AzureCredentialsKind::ConnectionString;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureServicePrincipleCredentials(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  service_principle_credentials_provider =
+      std::make_shared<Azure::Identity::ClientSecretCredential>(tenant_id, 
client_id,
+                                                                client_secret);
+  credentials_kind = AzureCredentialsKind::ServicePrincipleCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureSasCredentials(const std::string& uri) {
+  Uri url;
+  RETURN_NOT_OK(url.Parse(uri));
+  sas_token = "?" + url.query_string();
+  account_blob_url = url.scheme() + "://" + url.host() + kSep;
+  account_dfs_url = std::regex_replace(account_blob_url, std::regex(".blob"), 
".dfs");
+  credentials_kind = AzureCredentialsKind::Sas;
+  return Status::OK();
+}
+
+bool AzureOptions::Equals(const AzureOptions& other) const {
+  return (scheme == other.scheme && account_dfs_url == other.account_dfs_url &&
+          account_blob_url == other.account_blob_url &&
+          credentials_kind == other.credentials_kind);
+}
+
+Result<AzureOptions> AzureOptions::FromAnonymous(const std::string& 
account_name) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromAccountKey(const std::string& 
account_name,
+                                                  const std::string& 
account_key) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, 
account_key));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromConnectionString(
+    const std::string& connection_string) {
+  AzureOptions options;
+  
RETURN_NOT_OK(options.ConfigureConnectionStringCredentials(connection_string));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromServicePrincipleCredential(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureServicePrincipleCredentials(account_name, 
tenant_id,
+                                                             client_id, 
client_secret));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromSas(const std::string& uri) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureSasCredentials(uri));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const std::string& uri_string,
+                                           std::string* out_path) {
+  Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  return FromUri(uri, out_path);
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const Uri& uri, std::string* 
out_path) {
+  // uri =
+  // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+  AzureOptions options;
+  // host = accountName.dfs.core.windows.net
+  const auto host = uri.host();
+  // path_to_blob = /pathToBlob/
+  const auto path_to_blob = uri.path();
+  std::string account_name;
+  if (host.empty()) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  auto pos = host.find('.');
+  if (pos == std::string::npos) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  std::string full_path = path_to_blob;
+  // account_name = accountName
+  account_name = host.substr(0, pos);
+  if (full_path.empty()) {
+    full_path = account_name;
+  } else {
+    if (full_path[0] != '/') {
+      return Status::IOError("Azure Blob Storage URI should be absolute, not 
relative");
+    }
+    // full_path = accountName/pathToBlob/
+    full_path = account_name + path_to_blob;
+  }
+  if (out_path != nullptr) {
+    *out_path = std::string(internal::RemoveTrailingSlash(full_path));
+  }
+  // scheme = https
+  options.scheme = uri.scheme();
+  // query_string = sas_token_key=sas_token_value
+  const auto query_string = uri.query_string();
+  if (!query_string.empty()) {
+    // Accepted Uri =
+    // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+    RETURN_NOT_OK(options.ConfigureSasCredentials(uri.scheme() + "://" + host +
+                                                  path_to_blob + "?" + 
query_string));
+  } else {
+    RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  }
+  return options;
+}
+
+namespace {
+
+struct AzurePath {
+  std::string full_path;
+  std::string container;
+  std::string path_to_file;
+  std::vector<std::string> path_to_file_parts;
+
+  static Result<AzurePath> FromString(const std::string& s) {
+    // 
https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/testfile.txt
+    // container = synapsemlfs
+    // account_name = synapsemladlsgen2
+    // path_to_file = testdir/testfile.txt
+    // path_to_file_parts = [testdir, testfile.txt]
+
+    // Expected input here => s = synapsemlfs/testdir/testfile.txt
+    auto src = internal::RemoveTrailingSlash(s);
+    if ((src.find("127.0.0.1") != std::string::npos)) {

Review Comment:
   Does this work with 
`https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/127.0.0.1.txt`?



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(
+        "Cannot find account name in Azure Blob Storage connection string: '",
+        connection_string, "'");
+  }
+  auto pos_colon = connection_string.find(';');
+  pos_colon = connection_string.find(';', pos_colon + 1);
+  if (pos_colon == std::string::npos) {

Review Comment:
   Does this work with `DefaultEndpointsProtocol=https;AccountName=account`?



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(

Review Comment:
   `Status::Invalid` may be better because this is not an IO related operation.



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(
+        "Cannot find account name in Azure Blob Storage connection string: '",
+        connection_string, "'");
+  }
+  auto pos_colon = connection_string.find(';');
+  pos_colon = connection_string.find(';', pos_colon + 1);
+  if (pos_colon == std::string::npos) {
+    return Status::IOError("Invalid Azure Blob Storage connection string: '",
+                           connection_string, "' passed");
+  }
+  std::string account_name = connection_string.substr(pos_text + text.size(), 
pos_colon);
+  return account_name;
+}
+
+Status AzureOptions::ConfigureAnonymousCredentials(const std::string& 
account_name) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  credentials_kind = AzureCredentialsKind::Anonymous;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& 
account_name,
+                                                    const std::string& 
account_key) {
+  if (this->is_azurite) {
+    account_blob_url = "http://127.0.0.1:10000/"; + account_name + "/";
+    account_dfs_url = "http://127.0.0.1:10000/"; + account_name + "/";
+  } else {
+    account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+    account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  }
+  storage_credentials_provider =
+      
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+                                                                   
account_key);
+  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureConnectionStringCredentials(
+    const std::string& connection_string_uri) {
+  ARROW_ASSIGN_OR_RAISE(auto account_name,
+                        
GetAccountNameFromConnectionString(connection_string_uri));
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  connection_string = connection_string_uri;
+  credentials_kind = AzureCredentialsKind::ConnectionString;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureServicePrincipleCredentials(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  service_principle_credentials_provider =
+      std::make_shared<Azure::Identity::ClientSecretCredential>(tenant_id, 
client_id,
+                                                                client_secret);
+  credentials_kind = AzureCredentialsKind::ServicePrincipleCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureSasCredentials(const std::string& uri) {
+  Uri url;
+  RETURN_NOT_OK(url.Parse(uri));
+  sas_token = "?" + url.query_string();
+  account_blob_url = url.scheme() + "://" + url.host() + kSep;
+  account_dfs_url = std::regex_replace(account_blob_url, std::regex(".blob"), 
".dfs");
+  credentials_kind = AzureCredentialsKind::Sas;
+  return Status::OK();
+}
+
+bool AzureOptions::Equals(const AzureOptions& other) const {
+  return (scheme == other.scheme && account_dfs_url == other.account_dfs_url &&
+          account_blob_url == other.account_blob_url &&
+          credentials_kind == other.credentials_kind);
+}
+
+Result<AzureOptions> AzureOptions::FromAnonymous(const std::string& 
account_name) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromAccountKey(const std::string& 
account_name,
+                                                  const std::string& 
account_key) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, 
account_key));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromConnectionString(
+    const std::string& connection_string) {
+  AzureOptions options;
+  
RETURN_NOT_OK(options.ConfigureConnectionStringCredentials(connection_string));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromServicePrincipleCredential(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureServicePrincipleCredentials(account_name, 
tenant_id,
+                                                             client_id, 
client_secret));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromSas(const std::string& uri) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureSasCredentials(uri));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const std::string& uri_string,
+                                           std::string* out_path) {
+  Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  return FromUri(uri, out_path);
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const Uri& uri, std::string* 
out_path) {
+  // uri =
+  // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+  AzureOptions options;
+  // host = accountName.dfs.core.windows.net
+  const auto host = uri.host();
+  // path_to_blob = /pathToBlob/
+  const auto path_to_blob = uri.path();
+  std::string account_name;
+  if (host.empty()) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  auto pos = host.find('.');
+  if (pos == std::string::npos) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  std::string full_path = path_to_blob;
+  // account_name = accountName
+  account_name = host.substr(0, pos);
+  if (full_path.empty()) {
+    full_path = account_name;
+  } else {
+    if (full_path[0] != '/') {
+      return Status::IOError("Azure Blob Storage URI should be absolute, not 
relative");
+    }
+    // full_path = accountName/pathToBlob/
+    full_path = account_name + path_to_blob;
+  }
+  if (out_path != nullptr) {
+    *out_path = std::string(internal::RemoveTrailingSlash(full_path));
+  }
+  // scheme = https
+  options.scheme = uri.scheme();
+  // query_string = sas_token_key=sas_token_value
+  const auto query_string = uri.query_string();
+  if (!query_string.empty()) {
+    // Accepted Uri =
+    // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+    RETURN_NOT_OK(options.ConfigureSasCredentials(uri.scheme() + "://" + host +
+                                                  path_to_blob + "?" + 
query_string));
+  } else {
+    RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  }
+  return options;
+}
+
+namespace {
+
+struct AzurePath {
+  std::string full_path;
+  std::string container;
+  std::string path_to_file;
+  std::vector<std::string> path_to_file_parts;
+
+  static Result<AzurePath> FromString(const std::string& s) {
+    // 
https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/testfile.txt
+    // container = synapsemlfs
+    // account_name = synapsemladlsgen2
+    // path_to_file = testdir/testfile.txt
+    // path_to_file_parts = [testdir, testfile.txt]
+
+    // Expected input here => s = synapsemlfs/testdir/testfile.txt

Review Comment:
   It seems that `http://127.0.0.1/...` is an expected input here too.



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(
+        "Cannot find account name in Azure Blob Storage connection string: '",
+        connection_string, "'");
+  }
+  auto pos_colon = connection_string.find(';');

Review Comment:
   `pos_semicolon`?



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(
+        "Cannot find account name in Azure Blob Storage connection string: '",
+        connection_string, "'");
+  }
+  auto pos_colon = connection_string.find(';');
+  pos_colon = connection_string.find(';', pos_colon + 1);
+  if (pos_colon == std::string::npos) {
+    return Status::IOError("Invalid Azure Blob Storage connection string: '",
+                           connection_string, "' passed");
+  }
+  std::string account_name = connection_string.substr(pos_text + text.size(), 
pos_colon);
+  return account_name;
+}
+
+Status AzureOptions::ConfigureAnonymousCredentials(const std::string& 
account_name) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  credentials_kind = AzureCredentialsKind::Anonymous;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& 
account_name,
+                                                    const std::string& 
account_key) {
+  if (this->is_azurite) {
+    account_blob_url = "http://127.0.0.1:10000/"; + account_name + "/";
+    account_dfs_url = "http://127.0.0.1:10000/"; + account_name + "/";
+  } else {
+    account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+    account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  }
+  storage_credentials_provider =
+      
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+                                                                   
account_key);
+  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureConnectionStringCredentials(
+    const std::string& connection_string_uri) {
+  ARROW_ASSIGN_OR_RAISE(auto account_name,
+                        
GetAccountNameFromConnectionString(connection_string_uri));
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  connection_string = connection_string_uri;
+  credentials_kind = AzureCredentialsKind::ConnectionString;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureServicePrincipleCredentials(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  service_principle_credentials_provider =
+      std::make_shared<Azure::Identity::ClientSecretCredential>(tenant_id, 
client_id,
+                                                                client_secret);
+  credentials_kind = AzureCredentialsKind::ServicePrincipleCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureSasCredentials(const std::string& uri) {
+  Uri url;
+  RETURN_NOT_OK(url.Parse(uri));
+  sas_token = "?" + url.query_string();
+  account_blob_url = url.scheme() + "://" + url.host() + kSep;
+  account_dfs_url = std::regex_replace(account_blob_url, std::regex(".blob"), 
".dfs");
+  credentials_kind = AzureCredentialsKind::Sas;
+  return Status::OK();
+}
+
+bool AzureOptions::Equals(const AzureOptions& other) const {
+  return (scheme == other.scheme && account_dfs_url == other.account_dfs_url &&
+          account_blob_url == other.account_blob_url &&
+          credentials_kind == other.credentials_kind);
+}
+
+Result<AzureOptions> AzureOptions::FromAnonymous(const std::string& 
account_name) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromAccountKey(const std::string& 
account_name,
+                                                  const std::string& 
account_key) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, 
account_key));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromConnectionString(
+    const std::string& connection_string) {
+  AzureOptions options;
+  
RETURN_NOT_OK(options.ConfigureConnectionStringCredentials(connection_string));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromServicePrincipleCredential(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureServicePrincipleCredentials(account_name, 
tenant_id,
+                                                             client_id, 
client_secret));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromSas(const std::string& uri) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureSasCredentials(uri));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const std::string& uri_string,
+                                           std::string* out_path) {
+  Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  return FromUri(uri, out_path);
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const Uri& uri, std::string* 
out_path) {
+  // uri =
+  // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+  AzureOptions options;
+  // host = accountName.dfs.core.windows.net
+  const auto host = uri.host();
+  // path_to_blob = /pathToBlob/
+  const auto path_to_blob = uri.path();
+  std::string account_name;
+  if (host.empty()) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  auto pos = host.find('.');
+  if (pos == std::string::npos) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  std::string full_path = path_to_blob;
+  // account_name = accountName
+  account_name = host.substr(0, pos);
+  if (full_path.empty()) {
+    full_path = account_name;
+  } else {
+    if (full_path[0] != '/') {
+      return Status::IOError("Azure Blob Storage URI should be absolute, not 
relative");
+    }
+    // full_path = accountName/pathToBlob/
+    full_path = account_name + path_to_blob;
+  }
+  if (out_path != nullptr) {
+    *out_path = std::string(internal::RemoveTrailingSlash(full_path));
+  }
+  // scheme = https
+  options.scheme = uri.scheme();
+  // query_string = sas_token_key=sas_token_value
+  const auto query_string = uri.query_string();
+  if (!query_string.empty()) {
+    // Accepted Uri =
+    // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+    RETURN_NOT_OK(options.ConfigureSasCredentials(uri.scheme() + "://" + host +
+                                                  path_to_blob + "?" + 
query_string));
+  } else {
+    RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  }
+  return options;
+}
+
+namespace {
+
+struct AzurePath {
+  std::string full_path;
+  std::string container;
+  std::string path_to_file;
+  std::vector<std::string> path_to_file_parts;
+
+  static Result<AzurePath> FromString(const std::string& s) {
+    // 
https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/testfile.txt
+    // container = synapsemlfs
+    // account_name = synapsemladlsgen2
+    // path_to_file = testdir/testfile.txt
+    // path_to_file_parts = [testdir, testfile.txt]
+
+    // Expected input here => s = synapsemlfs/testdir/testfile.txt
+    auto src = internal::RemoveTrailingSlash(s);
+    if ((src.find("127.0.0.1") != std::string::npos)) {
+      RETURN_NOT_OK(FromLocalHostString(&src));
+    }
+    if (internal::IsLikelyUri(src)) {
+      RETURN_NOT_OK(ExtractBlobPath(&src));
+    }
+    src = internal::RemoveLeadingSlash(src);
+    auto first_sep = src.find_first_of(kSep);
+    if (first_sep == 0) {
+      return Status::IOError("Path cannot start with a separator ('", s, "')");
+    }
+    if (first_sep == std::string::npos) {
+      return AzurePath{std::string(src), std::string(src), "", {}};
+    }
+    AzurePath path;
+    path.full_path = std::string(src);
+    path.container = std::string(src.substr(0, first_sep));
+    path.path_to_file = std::string(src.substr(first_sep + 1));
+    path.path_to_file_parts = internal::SplitAbstractPath(path.path_to_file);
+    RETURN_NOT_OK(Validate(&path));
+    return path;
+  }
+
+  static Status FromLocalHostString(util::string_view* src) {
+    // src = http://127.0.0.1:10000/accountName/pathToBlob
+    auto port = src->find("127.0.0.1");

Review Comment:
   `host`?
   
   Anyway, can we use `arrow::internal::Uri::Parse()` here? 



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(
+        "Cannot find account name in Azure Blob Storage connection string: '",
+        connection_string, "'");
+  }
+  auto pos_colon = connection_string.find(';');
+  pos_colon = connection_string.find(';', pos_colon + 1);
+  if (pos_colon == std::string::npos) {
+    return Status::IOError("Invalid Azure Blob Storage connection string: '",
+                           connection_string, "' passed");
+  }
+  std::string account_name = connection_string.substr(pos_text + text.size(), 
pos_colon);
+  return account_name;
+}
+
+Status AzureOptions::ConfigureAnonymousCredentials(const std::string& 
account_name) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  credentials_kind = AzureCredentialsKind::Anonymous;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& 
account_name,
+                                                    const std::string& 
account_key) {
+  if (this->is_azurite) {
+    account_blob_url = "http://127.0.0.1:10000/"; + account_name + "/";
+    account_dfs_url = "http://127.0.0.1:10000/"; + account_name + "/";
+  } else {
+    account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+    account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  }
+  storage_credentials_provider =
+      
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+                                                                   
account_key);
+  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureConnectionStringCredentials(
+    const std::string& connection_string_uri) {
+  ARROW_ASSIGN_OR_RAISE(auto account_name,
+                        
GetAccountNameFromConnectionString(connection_string_uri));
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  connection_string = connection_string_uri;
+  credentials_kind = AzureCredentialsKind::ConnectionString;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureServicePrincipleCredentials(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  service_principle_credentials_provider =
+      std::make_shared<Azure::Identity::ClientSecretCredential>(tenant_id, 
client_id,
+                                                                client_secret);
+  credentials_kind = AzureCredentialsKind::ServicePrincipleCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureSasCredentials(const std::string& uri) {
+  Uri url;
+  RETURN_NOT_OK(url.Parse(uri));
+  sas_token = "?" + url.query_string();
+  account_blob_url = url.scheme() + "://" + url.host() + kSep;
+  account_dfs_url = std::regex_replace(account_blob_url, std::regex(".blob"), 
".dfs");
+  credentials_kind = AzureCredentialsKind::Sas;
+  return Status::OK();
+}
+
+bool AzureOptions::Equals(const AzureOptions& other) const {
+  return (scheme == other.scheme && account_dfs_url == other.account_dfs_url &&
+          account_blob_url == other.account_blob_url &&
+          credentials_kind == other.credentials_kind);
+}
+
+Result<AzureOptions> AzureOptions::FromAnonymous(const std::string& 
account_name) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromAccountKey(const std::string& 
account_name,
+                                                  const std::string& 
account_key) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, 
account_key));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromConnectionString(
+    const std::string& connection_string) {
+  AzureOptions options;
+  
RETURN_NOT_OK(options.ConfigureConnectionStringCredentials(connection_string));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromServicePrincipleCredential(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureServicePrincipleCredentials(account_name, 
tenant_id,
+                                                             client_id, 
client_secret));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromSas(const std::string& uri) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureSasCredentials(uri));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const std::string& uri_string,
+                                           std::string* out_path) {
+  Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  return FromUri(uri, out_path);
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const Uri& uri, std::string* 
out_path) {
+  // uri =
+  // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+  AzureOptions options;
+  // host = accountName.dfs.core.windows.net
+  const auto host = uri.host();
+  // path_to_blob = /pathToBlob/
+  const auto path_to_blob = uri.path();
+  std::string account_name;
+  if (host.empty()) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  auto pos = host.find('.');
+  if (pos == std::string::npos) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  std::string full_path = path_to_blob;
+  // account_name = accountName
+  account_name = host.substr(0, pos);
+  if (full_path.empty()) {
+    full_path = account_name;
+  } else {
+    if (full_path[0] != '/') {
+      return Status::IOError("Azure Blob Storage URI should be absolute, not 
relative");
+    }
+    // full_path = accountName/pathToBlob/
+    full_path = account_name + path_to_blob;
+  }
+  if (out_path != nullptr) {
+    *out_path = std::string(internal::RemoveTrailingSlash(full_path));
+  }
+  // scheme = https
+  options.scheme = uri.scheme();
+  // query_string = sas_token_key=sas_token_value
+  const auto query_string = uri.query_string();
+  if (!query_string.empty()) {
+    // Accepted Uri =
+    // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+    RETURN_NOT_OK(options.ConfigureSasCredentials(uri.scheme() + "://" + host +
+                                                  path_to_blob + "?" + 
query_string));
+  } else {
+    RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  }
+  return options;
+}
+
+namespace {
+
+struct AzurePath {
+  std::string full_path;
+  std::string container;
+  std::string path_to_file;
+  std::vector<std::string> path_to_file_parts;
+
+  static Result<AzurePath> FromString(const std::string& s) {
+    // 
https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/testfile.txt
+    // container = synapsemlfs
+    // account_name = synapsemladlsgen2
+    // path_to_file = testdir/testfile.txt
+    // path_to_file_parts = [testdir, testfile.txt]
+
+    // Expected input here => s = synapsemlfs/testdir/testfile.txt
+    auto src = internal::RemoveTrailingSlash(s);
+    if ((src.find("127.0.0.1") != std::string::npos)) {
+      RETURN_NOT_OK(FromLocalHostString(&src));
+    }
+    if (internal::IsLikelyUri(src)) {
+      RETURN_NOT_OK(ExtractBlobPath(&src));
+    }
+    src = internal::RemoveLeadingSlash(src);
+    auto first_sep = src.find_first_of(kSep);
+    if (first_sep == 0) {
+      return Status::IOError("Path cannot start with a separator ('", s, "')");
+    }
+    if (first_sep == std::string::npos) {
+      return AzurePath{std::string(src), std::string(src), "", {}};
+    }
+    AzurePath path;
+    path.full_path = std::string(src);
+    path.container = std::string(src.substr(0, first_sep));
+    path.path_to_file = std::string(src.substr(first_sep + 1));
+    path.path_to_file_parts = internal::SplitAbstractPath(path.path_to_file);
+    RETURN_NOT_OK(Validate(&path));
+    return path;
+  }
+
+  static Status FromLocalHostString(util::string_view* src) {
+    // src = http://127.0.0.1:10000/accountName/pathToBlob
+    auto port = src->find("127.0.0.1");
+    // src = 127.0.0.1:10000/accountName/pathToBlob
+    *src = src->substr(port);
+    auto first_sep = src->find_first_of(kSep);
+    if (first_sep == std::string::npos) {
+      return Status::IOError("Missing account name in Azure Blob Storage URI");
+    }
+    // src = accountName/pathToBlob
+    *src = src->substr(first_sep + 1);
+    auto sec_sep = src->find_first_of(kSep);
+    if (sec_sep == std::string::npos) {
+      return Status::IOError("Missing container name in Azure Blob Storage 
URI");
+    }
+    // src = pathToBlob
+    *src = src->substr(sec_sep + 1);
+    return Status::OK();
+  }
+
+  // Removes scheme, host and port from the uri
+  static Status ExtractBlobPath(util::string_view* src) {
+    std::string text = ".core.windows.net";
+    auto pos = src->find(text);

Review Comment:
   Does this work with 
`http://127.0.0.1/accountName/example.core.windows.net/...` or 
`http://a.core.windows.net.example.com/accountName/...`?
   
   Can we use `arrow::internal::Uri::Parse()` here?



##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -0,0 +1,2025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <regex>
+#include <sstream>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/identity/client_secret_credential.hpp>
+#include <azure/identity/managed_identity_credential.hpp>
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/files/datalake.hpp>
+
+#include "arrow/util/windows_fixup.h"
+
+#include "arrow/buffer.h"
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/atomic_shared_ptr.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/future.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+
+static const char kSep = '/';
+
+// -----------------------------------------------------------------------
+// AzureOptions implementation
+
+AzureOptions::AzureOptions() {}
+
+Result<std::string> AzureOptions::GetAccountNameFromConnectionString(
+    const std::string& connection_string) {
+  std::string text = "AccountName=";
+  auto pos_text = connection_string.find(text);
+  if (pos_text == std::string::npos) {
+    return Status::IOError(
+        "Cannot find account name in Azure Blob Storage connection string: '",
+        connection_string, "'");
+  }
+  auto pos_colon = connection_string.find(';');
+  pos_colon = connection_string.find(';', pos_colon + 1);
+  if (pos_colon == std::string::npos) {
+    return Status::IOError("Invalid Azure Blob Storage connection string: '",
+                           connection_string, "' passed");
+  }
+  std::string account_name = connection_string.substr(pos_text + text.size(), 
pos_colon);
+  return account_name;
+}
+
+Status AzureOptions::ConfigureAnonymousCredentials(const std::string& 
account_name) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  credentials_kind = AzureCredentialsKind::Anonymous;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& 
account_name,
+                                                    const std::string& 
account_key) {
+  if (this->is_azurite) {
+    account_blob_url = "http://127.0.0.1:10000/"; + account_name + "/";
+    account_dfs_url = "http://127.0.0.1:10000/"; + account_name + "/";
+  } else {
+    account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+    account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  }
+  storage_credentials_provider =
+      
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+                                                                   
account_key);
+  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureConnectionStringCredentials(
+    const std::string& connection_string_uri) {
+  ARROW_ASSIGN_OR_RAISE(auto account_name,
+                        
GetAccountNameFromConnectionString(connection_string_uri));
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  connection_string = connection_string_uri;
+  credentials_kind = AzureCredentialsKind::ConnectionString;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureServicePrincipleCredentials(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  account_dfs_url = "https://"; + account_name + ".dfs.core.windows.net/";
+  account_blob_url = "https://"; + account_name + ".blob.core.windows.net/";
+  service_principle_credentials_provider =
+      std::make_shared<Azure::Identity::ClientSecretCredential>(tenant_id, 
client_id,
+                                                                client_secret);
+  credentials_kind = AzureCredentialsKind::ServicePrincipleCredentials;
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureSasCredentials(const std::string& uri) {
+  Uri url;
+  RETURN_NOT_OK(url.Parse(uri));
+  sas_token = "?" + url.query_string();
+  account_blob_url = url.scheme() + "://" + url.host() + kSep;
+  account_dfs_url = std::regex_replace(account_blob_url, std::regex(".blob"), 
".dfs");
+  credentials_kind = AzureCredentialsKind::Sas;
+  return Status::OK();
+}
+
+bool AzureOptions::Equals(const AzureOptions& other) const {
+  return (scheme == other.scheme && account_dfs_url == other.account_dfs_url &&
+          account_blob_url == other.account_blob_url &&
+          credentials_kind == other.credentials_kind);
+}
+
+Result<AzureOptions> AzureOptions::FromAnonymous(const std::string& 
account_name) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromAccountKey(const std::string& 
account_name,
+                                                  const std::string& 
account_key) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, 
account_key));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromConnectionString(
+    const std::string& connection_string) {
+  AzureOptions options;
+  
RETURN_NOT_OK(options.ConfigureConnectionStringCredentials(connection_string));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromServicePrincipleCredential(
+    const std::string& account_name, const std::string& tenant_id,
+    const std::string& client_id, const std::string& client_secret) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureServicePrincipleCredentials(account_name, 
tenant_id,
+                                                             client_id, 
client_secret));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromSas(const std::string& uri) {
+  AzureOptions options;
+  RETURN_NOT_OK(options.ConfigureSasCredentials(uri));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const std::string& uri_string,
+                                           std::string* out_path) {
+  Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  return FromUri(uri, out_path);
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const Uri& uri, std::string* 
out_path) {
+  // uri =
+  // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+  AzureOptions options;
+  // host = accountName.dfs.core.windows.net
+  const auto host = uri.host();
+  // path_to_blob = /pathToBlob/
+  const auto path_to_blob = uri.path();
+  std::string account_name;
+  if (host.empty()) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  auto pos = host.find('.');
+  if (pos == std::string::npos) {
+    return Status::IOError("Missing container in Azure Blob Storage URI: '",
+                           uri.ToString(), "'");
+  }
+  std::string full_path = path_to_blob;
+  // account_name = accountName
+  account_name = host.substr(0, pos);
+  if (full_path.empty()) {
+    full_path = account_name;
+  } else {
+    if (full_path[0] != '/') {
+      return Status::IOError("Azure Blob Storage URI should be absolute, not 
relative");
+    }
+    // full_path = accountName/pathToBlob/
+    full_path = account_name + path_to_blob;
+  }
+  if (out_path != nullptr) {
+    *out_path = std::string(internal::RemoveTrailingSlash(full_path));
+  }
+  // scheme = https
+  options.scheme = uri.scheme();
+  // query_string = sas_token_key=sas_token_value
+  const auto query_string = uri.query_string();
+  if (!query_string.empty()) {
+    // Accepted Uri =
+    // 
https://accountName.dfs.core.windows.net/pathToBlob/?sas_token_key=sas_token_value
+    RETURN_NOT_OK(options.ConfigureSasCredentials(uri.scheme() + "://" + host +
+                                                  path_to_blob + "?" + 
query_string));
+  } else {
+    RETURN_NOT_OK(options.ConfigureAnonymousCredentials(account_name));
+  }
+  return options;
+}
+
+namespace {
+
+struct AzurePath {
+  std::string full_path;
+  std::string container;
+  std::string path_to_file;
+  std::vector<std::string> path_to_file_parts;
+
+  static Result<AzurePath> FromString(const std::string& s) {
+    // 
https://synapsemladlsgen2.dfs.core.windows.net/synapsemlfs/testdir/testfile.txt
+    // container = synapsemlfs
+    // account_name = synapsemladlsgen2
+    // path_to_file = testdir/testfile.txt
+    // path_to_file_parts = [testdir, testfile.txt]
+
+    // Expected input here => s = synapsemlfs/testdir/testfile.txt
+    auto src = internal::RemoveTrailingSlash(s);
+    if ((src.find("127.0.0.1") != std::string::npos)) {
+      RETURN_NOT_OK(FromLocalHostString(&src));
+    }
+    if (internal::IsLikelyUri(src)) {
+      RETURN_NOT_OK(ExtractBlobPath(&src));
+    }
+    src = internal::RemoveLeadingSlash(src);
+    auto first_sep = src.find_first_of(kSep);
+    if (first_sep == 0) {
+      return Status::IOError("Path cannot start with a separator ('", s, "')");
+    }
+    if (first_sep == std::string::npos) {
+      return AzurePath{std::string(src), std::string(src), "", {}};
+    }
+    AzurePath path;
+    path.full_path = std::string(src);
+    path.container = std::string(src.substr(0, first_sep));
+    path.path_to_file = std::string(src.substr(first_sep + 1));
+    path.path_to_file_parts = internal::SplitAbstractPath(path.path_to_file);
+    RETURN_NOT_OK(Validate(&path));
+    return path;
+  }
+
+  static Status FromLocalHostString(util::string_view* src) {
+    // src = http://127.0.0.1:10000/accountName/pathToBlob
+    auto port = src->find("127.0.0.1");
+    // src = 127.0.0.1:10000/accountName/pathToBlob
+    *src = src->substr(port);
+    auto first_sep = src->find_first_of(kSep);
+    if (first_sep == std::string::npos) {
+      return Status::IOError("Missing account name in Azure Blob Storage URI");
+    }
+    // src = accountName/pathToBlob
+    *src = src->substr(first_sep + 1);
+    auto sec_sep = src->find_first_of(kSep);
+    if (sec_sep == std::string::npos) {
+      return Status::IOError("Missing container name in Azure Blob Storage 
URI");
+    }
+    // src = pathToBlob
+    *src = src->substr(sec_sep + 1);
+    return Status::OK();
+  }
+
+  // Removes scheme, host and port from the uri
+  static Status ExtractBlobPath(util::string_view* src) {
+    std::string text = ".core.windows.net";
+    auto pos = src->find(text);
+    if (pos == std::string::npos) {
+      return Status::IOError("Invalid Azure blob storage URI provided: ", src);
+    }
+    pos = src->find("/", pos);
+    if (pos == std::string::npos) {
+      *src = "";
+    } else {
+      *src = src->substr(pos + 1);
+    }
+    return Status::OK();
+  }
+
+  static Status Validate(const AzurePath* path) {
+    auto result = 
internal::ValidateAbstractPathParts(path->path_to_file_parts);
+    if (!result.ok()) {
+      return Status::Invalid(result.message(), " in path ", path->full_path);
+    } else {
+      return result;
+    }
+  }
+
+  AzurePath parent() const {
+    DCHECK(!path_to_file_parts.empty());
+    auto parent = AzurePath{"", container, "", path_to_file_parts};
+    parent.path_to_file_parts.pop_back();
+    parent.path_to_file = 
internal::JoinAbstractPath(parent.path_to_file_parts);
+    if (parent.path_to_file.empty()) {
+      parent.full_path = parent.container;
+    } else {
+      parent.full_path = parent.container + kSep + parent.path_to_file;
+    }
+    return parent;
+  }
+
+  bool has_parent() const { return !path_to_file.empty(); }
+
+  bool empty() const { return container.empty() && path_to_file.empty(); }
+
+  bool operator==(const AzurePath& other) const {
+    return container == other.container && path_to_file == other.path_to_file;
+  }
+};
+
+template <typename ObjectResult>
+std::shared_ptr<const KeyValueMetadata> GetObjectMetadata(const ObjectResult& 
result) {
+  auto md = std::make_shared<KeyValueMetadata>();
+  auto push = [&](std::string k, const std::string v) {

Review Comment:
   It seems that we don't need to define this because this is used only at one 
place.



##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -0,0 +1,531 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/azurefs.h"
+
+#include <gmock/gmock-matchers.h>
+#include <gmock/gmock-more-matchers.h>
+#include <gtest/gtest.h>
+#include <azure/storage/files/datalake.hpp>
+#include <boost/process.hpp>
+#include <chrono>
+#include <thread>
+
+#include "arrow/filesystem/test_util.h"
+#include "arrow/testing/future_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/util.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/uri.h"
+
+namespace arrow {
+
+using internal::Uri;
+
+namespace fs {
+namespace internal {
+
+namespace bp = boost::process;
+
+using ::arrow::internal::TemporaryDir;
+using ::testing::IsEmpty;
+using ::testing::NotNull;
+
+class AzuriteEnv : public ::testing::Environment {
+ public:
+  AzuriteEnv() {
+    account_name_ = "devstoreaccount1";
+    account_key_ =
+        
"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
+        "KBHBeksoGMGw==";
+    auto exe_path = bp::search_path("azurite");
+    if (exe_path.empty()) {
+      auto error = std::string("Could not find Azurite emulator.");
+      status_ = Status::Invalid(error);
+      return;
+    }
+    auto temp_dir_ = TemporaryDir::Make("azurefs-test-").ValueOrDie();
+    server_process_ = bp::child(boost::this_process::environment(), exe_path, 
"--silent",
+                                "--location", temp_dir_->path().ToString(), 
"--debug",
+                                temp_dir_->path().ToString() + "/debug.log");
+    if (!(server_process_.valid() && server_process_.running())) {
+      auto error = "Could not start Azurite emulator.";
+      server_process_.terminate();
+      server_process_.wait();
+      status_ = Status::Invalid(error);
+      return;
+    }
+    status_ = Status::OK();
+  }
+
+  ~AzuriteEnv() override {
+    server_process_.terminate();
+    server_process_.wait();
+  }
+
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+  const Status status() const { return status_; }
+
+ private:
+  std::string account_name_;
+  std::string account_key_;
+  bp::child server_process_;
+  Status status_;
+  std::unique_ptr<TemporaryDir> temp_dir_;
+};
+
+auto* azurite_env = ::testing::AddGlobalTestEnvironment(new AzuriteEnv);
+
+AzuriteEnv* GetAzuriteEnv() {
+  return ::arrow::internal::checked_cast<AzuriteEnv*>(azurite_env);
+}
+
+class TestAzureFileSystem : public ::testing::Test {
+ public:
+  std::shared_ptr<FileSystem> fs_;
+  std::shared_ptr<Azure::Storage::Files::DataLake::DataLakeServiceClient> 
gen2_client_;
+  AzureOptions options_;
+
+  void MakeFileSystem() {
+    const std::string& account_name = GetAzuriteEnv()->account_name();
+    const std::string& account_key = GetAzuriteEnv()->account_key();
+    options_.is_azurite = true;
+    options_.ConfigureAccountKeyCredentials(account_name, account_key);
+    gen2_client_ =
+        
std::make_shared<Azure::Storage::Files::DataLake::DataLakeServiceClient>(
+            options_.account_dfs_url, options_.storage_credentials_provider);
+    ASSERT_OK_AND_ASSIGN(fs_, AzureBlobFileSystem::Make(options_));
+  }
+
+  void SetUp() override {
+    ASSERT_THAT(GetAzuriteEnv(), NotNull());
+    ASSERT_THAT(GetAzuriteEnv()->status(), Status::OK());

Review Comment:
   Can we use out `ASSERT_OK()` here?
   
   
https://github.com/apache/arrow/blob/master/cpp/src/arrow/testing/gtest_util.h#L87



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to