This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 88e8140ad7 GH-43097: [C++] Implement `PathFromUri` support for Azure 
file system (#43098)
88e8140ad7 is described below

commit 88e8140ad7902435b5d1ac29205dda7517f2cc79
Author: Oliver Layer <[email protected]>
AuthorDate: Wed Aug 14 02:16:54 2024 +0200

    GH-43097: [C++] Implement `PathFromUri` support for Azure file system 
(#43098)
    
    ### Rationale for this change
    
    See #43097.
    
    ### What changes are included in this PR?
    Implements `AzureFS::PathFromUri` using existing URI parsing and path 
extraction inside the `AzureOptions`.
    
    ### Are these changes tested?
    Yes, added a unit test.
    
    ### Are there any user-facing changes?
    No, but calling `PathFromUri` will now work instead of throwing due to no 
implementation provided.
    * GitHub Issue: #43097
    
    Authored-by: Oliver Layer <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 27 +++++++++++++++++++++++++++
 cpp/src/arrow/filesystem/azurefs.h       |  2 ++
 cpp/src/arrow/filesystem/azurefs_test.cc |  9 +++++++++
 3 files changed, 38 insertions(+)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc 
b/cpp/src/arrow/filesystem/azurefs.cc
index a3aa2c8e83..9b3c0c0c1d 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -3199,4 +3199,31 @@ Result<std::shared_ptr<io::OutputStream>> 
AzureFileSystem::OpenAppendStream(
   return impl_->OpenAppendStream(location, metadata, false, this);
 }
 
+Result<std::string> AzureFileSystem::PathFromUri(const std::string& 
uri_string) const {
+  /// We can not use `internal::PathFromUriHelper` here because for Azure we 
have to
+  /// support different URI schemes where the authority is handled differently.
+  /// Example (both should yield the same path `container/some/path`):
+  ///   - (1) abfss://storageacc.blob.core.windows.net/container/some/path
+  ///   - (2) abfss://acc:pw@container/some/path
+  /// The authority handling is different with these two URIs. (1) requires no 
prepending
+  /// of the authority to the path, while (2) requires to preprend the 
authority to the
+  /// path.
+  std::string path;
+  Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  RETURN_NOT_OK(AzureOptions::FromUri(uri, &path));
+
+  std::vector<std::string> supported_schemes = {"abfs", "abfss"};
+  const auto scheme = uri.scheme();
+  if (std::find(supported_schemes.begin(), supported_schemes.end(), scheme) ==
+      supported_schemes.end()) {
+    std::string expected_schemes =
+        ::arrow::internal::JoinStrings(supported_schemes, ", ");
+    return Status::Invalid("The filesystem expected a URI with one of the 
schemes (",
+                           expected_schemes, ") but received ", uri_string);
+  }
+
+  return path;
+}
+
 }  // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs.h 
b/cpp/src/arrow/filesystem/azurefs.h
index 93d6ec2f94..072b061eeb 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -367,6 +367,8 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
       const std::string& path,
       const std::shared_ptr<const KeyValueMetadata>& metadata) override;
+
+  Result<std::string> PathFromUri(const std::string& uri_string) const 
override;
 };
 
 }  // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc 
b/cpp/src/arrow/filesystem/azurefs_test.cc
index 9a11a6f249..36646f417c 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -2958,5 +2958,14 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) {
   ASSERT_RAISES(Invalid, stream->ReadAt(1, 1));
   ASSERT_RAISES(Invalid, stream->Seek(2));
 }
+
+TEST_F(TestAzuriteFileSystem, PathFromUri) {
+  ASSERT_EQ(
+      "container/some/path",
+      
fs()->PathFromUri("abfss://storageacc.blob.core.windows.net/container/some/path"));
+  ASSERT_EQ("container/some/path",
+            fs()->PathFromUri("abfss://acc:pw@container/some/path"));
+  ASSERT_RAISES(Invalid, 
fs()->PathFromUri("http://acc:pw@container/some/path";));
+}
 }  // namespace fs
 }  // namespace arrow

Reply via email to