This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs-object-store.git
The following commit(s) were added to refs/heads/main by this push:
new f422dce Fix azure path parsing (#399)
f422dce is described below
commit f422dce1528ee2a089d8061af639c3f2a9cd43af
Author: Kyle Barron <[email protected]>
AuthorDate: Mon Jun 16 16:21:31 2025 -0400
Fix azure path parsing (#399)
---
src/parse.rs | 37 ++++++++++++++++++++++++++++++-------
1 file changed, 30 insertions(+), 7 deletions(-)
diff --git a/src/parse.rs b/src/parse.rs
index 4188609..bee1f87 100644
--- a/src/parse.rs
+++ b/src/parse.rs
@@ -92,7 +92,7 @@ impl ObjectStoreScheme {
/// assert_eq!(scheme, ObjectStoreScheme::Local);
/// assert_eq!(path.as_ref(), "path/to/my/file");
///
- /// let url: Url =
"https://blob.core.windows.net/path/to/my/file".parse().unwrap();
+ /// let url: Url =
"https://blob.core.windows.net/container/path/to/my/file".parse().unwrap();
/// let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap();
/// assert_eq!(scheme, ObjectStoreScheme::MicrosoftAzure);
/// assert_eq!(path.as_ref(), "path/to/my/file");
@@ -110,9 +110,8 @@ impl ObjectStoreScheme {
("memory", None) => (Self::Memory, url.path()),
("s3" | "s3a", Some(_)) => (Self::AmazonS3, url.path()),
("gs", Some(_)) => (Self::GoogleCloudStorage, url.path()),
- ("az" | "adl" | "azure" | "abfs" | "abfss", Some(_)) => {
- (Self::MicrosoftAzure, url.path())
- }
+ ("az", Some(_)) => (Self::MicrosoftAzure,
strip_bucket().unwrap_or_default()),
+ ("adl" | "azure" | "abfs" | "abfss", Some(_)) =>
(Self::MicrosoftAzure, url.path()),
("http", Some(_)) => (Self::Http, url.path()),
("https", Some(host)) => {
if host.ends_with("dfs.core.windows.net")
@@ -120,7 +119,7 @@ impl ObjectStoreScheme {
|| host.ends_with("dfs.fabric.microsoft.com")
|| host.ends_with("blob.fabric.microsoft.com")
{
- (Self::MicrosoftAzure, url.path())
+ (Self::MicrosoftAzure, strip_bucket().unwrap_or_default())
} else if host.ends_with("amazonaws.com") {
match host.starts_with("s3") {
true => (Self::AmazonS3,
strip_bucket().unwrap_or_default()),
@@ -286,10 +285,26 @@ mod tests {
"https://account.dfs.core.windows.net",
(ObjectStoreScheme::MicrosoftAzure, ""),
),
+ (
+ "https://account.dfs.core.windows.net/container/path",
+ (ObjectStoreScheme::MicrosoftAzure, "path"),
+ ),
(
"https://account.blob.core.windows.net",
(ObjectStoreScheme::MicrosoftAzure, ""),
),
+ (
+ "https://account.blob.core.windows.net/container/path",
+ (ObjectStoreScheme::MicrosoftAzure, "path"),
+ ),
+ (
+ "az://account/container",
+ (ObjectStoreScheme::MicrosoftAzure, ""),
+ ),
+ (
+ "az://account/container/path",
+ (ObjectStoreScheme::MicrosoftAzure, "path"),
+ ),
(
"gs://bucket/path",
(ObjectStoreScheme::GoogleCloudStorage, "path"),
@@ -335,7 +350,11 @@ mod tests {
),
(
"https://account.dfs.fabric.microsoft.com/container",
- (ObjectStoreScheme::MicrosoftAzure, "container"),
+ (ObjectStoreScheme::MicrosoftAzure, ""),
+ ),
+ (
+ "https://account.dfs.fabric.microsoft.com/container/path",
+ (ObjectStoreScheme::MicrosoftAzure, "path"),
),
(
"https://account.blob.fabric.microsoft.com/",
@@ -343,7 +362,11 @@ mod tests {
),
(
"https://account.blob.fabric.microsoft.com/container",
- (ObjectStoreScheme::MicrosoftAzure, "container"),
+ (ObjectStoreScheme::MicrosoftAzure, ""),
+ ),
+ (
+ "https://account.blob.fabric.microsoft.com/container/path",
+ (ObjectStoreScheme::MicrosoftAzure, "path"),
),
];