This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 88e8140ad7 GH-43097: [C++] Implement `PathFromUri` support for Azure
file system (#43098)
88e8140ad7 is described below
commit 88e8140ad7902435b5d1ac29205dda7517f2cc79
Author: Oliver Layer <[email protected]>
AuthorDate: Wed Aug 14 02:16:54 2024 +0200
GH-43097: [C++] Implement `PathFromUri` support for Azure file system
(#43098)
### Rationale for this change
See #43097.
### What changes are included in this PR?
Implements `AzureFS::PathFromUri` using existing URI parsing and path
extraction inside the `AzureOptions`.
### Are these changes tested?
Yes, added a unit test.
### Are there any user-facing changes?
No, but calling `PathFromUri` will now work instead of throwing due to no
implementation provided.
* GitHub Issue: #43097
Authored-by: Oliver Layer <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/arrow/filesystem/azurefs.cc | 27 +++++++++++++++++++++++++++
cpp/src/arrow/filesystem/azurefs.h | 2 ++
cpp/src/arrow/filesystem/azurefs_test.cc | 9 +++++++++
3 files changed, 38 insertions(+)
diff --git a/cpp/src/arrow/filesystem/azurefs.cc
b/cpp/src/arrow/filesystem/azurefs.cc
index a3aa2c8e83..9b3c0c0c1d 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -3199,4 +3199,31 @@ Result<std::shared_ptr<io::OutputStream>>
AzureFileSystem::OpenAppendStream(
return impl_->OpenAppendStream(location, metadata, false, this);
}
+Result<std::string> AzureFileSystem::PathFromUri(const std::string&
uri_string) const {
+ /// We can not use `internal::PathFromUriHelper` here because for Azure we
have to
+ /// support different URI schemes where the authority is handled differently.
+ /// Example (both should yield the same path `container/some/path`):
+ /// - (1) abfss://storageacc.blob.core.windows.net/container/some/path
+ /// - (2) abfss://acc:pw@container/some/path
+ /// The authority handling is different with these two URIs. (1) requires no
prepending
+ /// of the authority to the path, while (2) requires to preprend the
authority to the
+ /// path.
+ std::string path;
+ Uri uri;
+ RETURN_NOT_OK(uri.Parse(uri_string));
+ RETURN_NOT_OK(AzureOptions::FromUri(uri, &path));
+
+ std::vector<std::string> supported_schemes = {"abfs", "abfss"};
+ const auto scheme = uri.scheme();
+ if (std::find(supported_schemes.begin(), supported_schemes.end(), scheme) ==
+ supported_schemes.end()) {
+ std::string expected_schemes =
+ ::arrow::internal::JoinStrings(supported_schemes, ", ");
+ return Status::Invalid("The filesystem expected a URI with one of the
schemes (",
+ expected_schemes, ") but received ", uri_string);
+ }
+
+ return path;
+}
+
} // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs.h
b/cpp/src/arrow/filesystem/azurefs.h
index 93d6ec2f94..072b061eeb 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -367,6 +367,8 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
const std::string& path,
const std::shared_ptr<const KeyValueMetadata>& metadata) override;
+
+ Result<std::string> PathFromUri(const std::string& uri_string) const
override;
};
} // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc
b/cpp/src/arrow/filesystem/azurefs_test.cc
index 9a11a6f249..36646f417c 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -2958,5 +2958,14 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) {
ASSERT_RAISES(Invalid, stream->ReadAt(1, 1));
ASSERT_RAISES(Invalid, stream->Seek(2));
}
+
+TEST_F(TestAzuriteFileSystem, PathFromUri) {
+ ASSERT_EQ(
+ "container/some/path",
+
fs()->PathFromUri("abfss://storageacc.blob.core.windows.net/container/some/path"));
+ ASSERT_EQ("container/some/path",
+ fs()->PathFromUri("abfss://acc:pw@container/some/path"));
+ ASSERT_RAISES(Invalid,
fs()->PathFromUri("http://acc:pw@container/some/path"));
+}
} // namespace fs
} // namespace arrow