This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs-object-store.git


The following commit(s) were added to refs/heads/main by this push:
     new 18ed86c  Whitelisting Onelake API & Workspace PL FQDNs (#1) (#711)
18ed86c is described below

commit 18ed86caf5d06aa318f38158501eeb736f28a73b
Author: Smriti Agrawal <[email protected]>
AuthorDate: Fri Jun 12 01:44:14 2026 +0530

    Whitelisting Onelake API & Workspace PL FQDNs (#1) (#711)
    
    * Whitelisting Onelake API & Workspace PL FQDNs
    
    * Addressing comments to whitelist api-onelake fqdns and add UTs
    
    * Reverting the mistakenly modified UT
    
    * Adding validation for xy in WS-PL URL & case insensitive regex
    
    * Making regex an optional dependency
    
    * Eliminating the use of regex for ws-pl fqdn matching
    
    * Adding a separte method 'azure_test_workspace_private_link' for ws-pl 
tests
    
    * Organising private link UTs
    
    * Adding integration test for WSPL & rebasing with unpstream
    
    * Whitelisting Onelake API & Workspace PL FQDNs
    
    * Addressing comments to whitelist api-onelake fqdns and add UTs
    
    * Reverting the mistakenly modified UT
    
    * Adding validation for xy in WS-PL URL & case insensitive regex
    
    * Making regex an optional dependency
    
    * Eliminating the use of regex for ws-pl fqdn matching
    
    * Adding a separte method 'azure_test_workspace_private_link' for ws-pl 
tests
    
    * Organising private link UTs
    
    * Fixing cargo fmt
    
    * Fixing Clippy
    
    * Fixing Clippy
    
    * Fixing Clippy
    
    * Modified Integration test to pass workspace & artifact id within URL only
    
    * Fixing Clippy
    
    * Adding WSPL support for ABFSS scheme as well
    
    * Adding comments for readability
    
    * Adding comments for readability
    
    ---------
    
    Co-authored-by: Smriti Agrawal <[email protected]>
---
 src/azure/builder.rs | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/azure/mod.rs     |  41 +++++++++++++++++
 2 files changed, 163 insertions(+)

diff --git a/src/azure/builder.rs b/src/azure/builder.rs
index 1f57fac..4ef95cb 100644
--- a/src/azure/builder.rs
+++ b/src/azure/builder.rs
@@ -666,10 +666,26 @@ impl MicrosoftAzureBuilder {
                     self.container_name = Some(validate(host)?);
                 } else {
                     match host.split_once('.') {
+                        // Workspace-level Private Link detection
+                        // 
"{workspaceid}.z??.(onelake|dfs|blob).fabric.microsoft.com"
+                        Some((workspaceid, rest))
+                            if rest.starts_with('z') && 
rest.ends_with("fabric.microsoft.com") =>
+                        {
+                            // Account name for WS-PL is two labels: 
"{workspaceid}.z{xy}"
+                            let (zone, _) = 
rest.split_once('.').unwrap_or((rest, ""));
+
+                            self.account_name = 
Some(format!("{workspaceid}.{zone}"));
+                            self.endpoint = Some(format!("https://{}";, host));
+
+                            self.container_name = 
Some(validate(parsed.username())?);
+                            self.use_fabric_endpoint = true.into();
+                        }
+
                         Some((a, "dfs.core.windows.net")) | Some((a, 
"blob.core.windows.net")) => {
                             self.account_name = Some(validate(a)?);
                             self.container_name = 
Some(validate(parsed.username())?);
                         }
+
                         Some((a, "dfs.fabric.microsoft.com"))
                         | Some((a, "blob.fabric.microsoft.com")) => {
                             self.account_name = Some(validate(a)?);
@@ -681,6 +697,30 @@ impl MicrosoftAzureBuilder {
                 }
             }
             "https" => match host.split_once('.') {
+                // Workspace-level Private Link detection
+                // "{workspaceid}.z??.(onelake|dfs|blob).fabric.microsoft.com"
+                Some((workspaceid, rest))
+                    if rest.starts_with('z') && 
rest.ends_with("fabric.microsoft.com") =>
+                {
+                    // rest looks like: "z28.dfs.fabric.microsoft.com" / 
"z28.blob.fabric.microsoft.com" / etc.
+                    // Account name for WS-PL is two labels: 
"{workspaceid}.z{xy}"
+                    let (zone, _) = rest.split_once('.').unwrap_or((rest, ""));
+
+                    self.account_name = Some(format!("{workspaceid}.{zone}"));
+                    self.endpoint = Some(format!("https://{}";, host));
+
+                    // Attempt to infer the container name from the URL
+                    let container = 
parsed.path_segments().unwrap().next().expect(
+                        "iterator always contains at least one string (which 
may be empty)",
+                    );
+
+                    if !container.is_empty() {
+                        self.container_name = Some(validate(container)?);
+                    }
+
+                    self.use_fabric_endpoint = true.into();
+                }
+
                 Some((a, "dfs.core.windows.net")) | Some((a, 
"blob.core.windows.net")) => {
                     self.account_name = Some(validate(a)?);
                     let container = 
parsed.path_segments().unwrap().next().expect(
@@ -1204,6 +1244,17 @@ mod tests {
         assert_eq!(builder.container_name.as_deref(), Some("container"));
         assert!(builder.use_fabric_endpoint.get().unwrap());
 
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder
+            
.parse_url("https://onelake.dfs.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456.lakehouse/Files/tables/sales/data.parquet";)
+            .unwrap();
+        assert_eq!(builder.account_name, Some("onelake".to_string()));
+        assert_eq!(
+            builder.container_name.as_deref(),
+            Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3")
+        );
+        assert!(builder.use_fabric_endpoint.get().unwrap());
+
         let mut builder = MicrosoftAzureBuilder::new();
         builder
             .parse_url("https://account.blob.fabric.microsoft.com/";)
@@ -1235,6 +1286,77 @@ mod tests {
         }
     }
 
+    #[test]
+    fn azure_test_workspace_private_link() {
+        let test_cases: Vec<(&str, &str, Option<&str>)> = vec![
+            (
+                
"https://Ab000000000000000000000000000000.zAb.dfs.fabric.microsoft.com/";,
+                "ab000000000000000000000000000000.zab",
+                None,
+            ),
+            (
+                
"https://ab000000000000000000000000000000.zab.dfs.fabric.microsoft.com/";,
+                "ab000000000000000000000000000000.zab",
+                None,
+            ),
+            (
+                
"https://c047b3e34e89407a98d7cf9949ae92a3.zc0.blob.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file";,
+                "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+                Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"),
+            ),
+            (
+                
"https://c047b3e34e89407a98d7cf9949ae92a3.zc0.dfs.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file";,
+                "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+                Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"),
+            ),
+            (
+                
"https://c047b3e34e89407a98d7cf9949ae92a3.zc0.onelake.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file";,
+                "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+                Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"),
+            ),
+            (
+                
"https://c047b3e34e89407a98d7cf9949ae92a3.zc0.w.api.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file";,
+                "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+                Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"),
+            ),
+            (
+                
"https://c047b3e34e89407a98d7cf9949ae92a3.zc0.c.api.fabric.microsoft.com/c047b3e3-4e89-407a-98d7-cf9949ae92a3/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file";,
+                "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+                Some("c047b3e3-4e89-407a-98d7-cf9949ae92a3"),
+            ),
+            (
+                
"abfss://c047b3e34e89407a98d7cf9949ae9...@c047b3e34e89407a98d7cf9949ae92a3.zc0.dfs.fabric.microsoft.com/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file",
+                "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+                Some("c047b3e34e89407a98d7cf9949ae92a3"),
+            ),
+            (
+                
"abfss://c047b3e34e89407a98d7cf9949ae9...@c047b3e34e89407a98d7cf9949ae92a3.zc0.blob.fabric.microsoft.com/9f1a2b3c-4d5e-6f70-8a9b-c0d1e2f3a456/file",
+                "c047b3e34e89407a98d7cf9949ae92a3.zc0",
+                Some("c047b3e34e89407a98d7cf9949ae92a3"),
+            ),
+        ];
+
+        for (url, expected_account, expected_container) in &test_cases {
+            let mut builder = MicrosoftAzureBuilder::new();
+            builder.parse_url(url).unwrap();
+
+            assert_eq!(
+                builder.account_name.as_deref(),
+                Some(*expected_account),
+                "account mismatch for URL: {url}"
+            );
+            assert_eq!(
+                builder.container_name.as_deref(),
+                *expected_container,
+                "container mismatch for URL: {url}"
+            );
+            assert!(
+                builder.use_fabric_endpoint.get().unwrap(),
+                "use_fabric_endpoint not set for URL: {url}"
+            );
+        }
+    }
+
     #[test]
     fn azure_test_config_from_map() {
         let azure_client_id = "object_store:fake_access_key_id";
diff --git a/src/azure/mod.rs b/src/azure/mod.rs
index 1429bec..e2ed05f 100644
--- a/src/azure/mod.rs
+++ b/src/azure/mod.rs
@@ -378,6 +378,47 @@ mod tests {
         }
     }
 
+    #[ignore = "Used for manual testing against a real Workspace Private Link 
Endpoint."]
+    #[tokio::test]
+    async fn azure_onelake_wspl_test() {
+        maybe_skip_integration!();
+
+        let url =
+            std::env::var("AZURE_ONELAKE_URL").expect("Set AZURE_ONELAKE_URL 
to a WS-PL FQDN");
+        let parsed = url::Url::parse(&url).unwrap();
+
+        let path = match parsed.scheme() {
+            "abfss" | "abfs" => {
+                // abfss://<container>@<host>/<path...>
+                // container is in username, entire path is the object path
+                let segments: Vec<&str> = 
parsed.path_segments().unwrap().collect();
+                Path::from(segments.join("/"))
+            }
+            _ => {
+                // https://<host>/<container>/<path...>
+                // first segment is container, rest is the object path
+                let segments: Vec<&str> = 
parsed.path_segments().unwrap().collect();
+                Path::from(segments[1..].join("/"))
+            }
+        };
+
+        let store = MicrosoftAzureBuilder::new()
+            .with_url(&url)
+            .with_bearer_token_authorization(
+                std::env::var("AZURE_STORAGE_TOKEN").expect("Set 
AZURE_STORAGE_TOKEN"),
+            )
+            .build()
+            .unwrap();
+
+        let data = Bytes::from("Hello OneLake WSPL");
+
+        store.put(&path, data.clone().into()).await.unwrap();
+        let result = store.get(&path).await.unwrap();
+        let loaded = result.bytes().await.unwrap();
+        assert_eq!(data, loaded);
+        store.delete(&path).await.unwrap();
+    }
+
     #[ignore = "Used for manual testing against a real storage account."]
     #[tokio::test]
     async fn test_user_delegation_key() {

Reply via email to