This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 230612eedc Add Support for Microsoft Fabric / OneLake (#4573)
230612eedc is described below
commit 230612eedc77a8b707206767cb1b36ed7ad82f76
Author: vmuddassir-msft <[email protected]>
AuthorDate: Fri Aug 11 20:50:16 2023 +0530
Add Support for Microsoft Fabric / OneLake (#4573)
* Changes required for onelake-fix
* Fix Unit tests
* Add Unit Tests
* Add onelake read/write test
* Add with_use_fabric , for fabric url check
* Final tweaks
* Further tweaks
* Automatically set use_fabric_endpoint
---------
Co-authored-by: Raphael Taylor-Davies <[email protected]>
---
object_store/src/azure/mod.rs | 101 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 100 insertions(+), 1 deletion(-)
diff --git a/object_store/src/azure/mod.rs b/object_store/src/azure/mod.rs
index 019cde5813..6bb4cdad1b 100644
--- a/object_store/src/azure/mod.rs
+++ b/object_store/src/azure/mod.rs
@@ -341,6 +341,10 @@ pub struct MicrosoftAzureBuilder {
client_options: ClientOptions,
/// Credentials
credentials: Option<AzureCredentialProvider>,
+ /// When set to true, fabric url scheme will be used
+ ///
+ /// i.e. https://{account_name}.dfs.fabric.microsoft.com
+ use_fabric_endpoint: ConfigValue<bool>,
}
/// Configuration keys for [`MicrosoftAzureBuilder`]
@@ -430,6 +434,13 @@ pub enum AzureConfigKey {
/// - `use_emulator`
UseEmulator,
+ /// Use object store with url scheme account.dfs.fabric.microsoft.com
+ ///
+ /// Supported keys:
+ /// - `azure_use_fabric_endpoint`
+ /// - `use_fabric_endpoint`
+ UseFabricEndpoint,
+
/// Endpoint to request a imds managed identity token
///
/// Supported keys:
@@ -482,6 +493,7 @@ impl AsRef<str> for AzureConfigKey {
Self::SasKey => "azure_storage_sas_key",
Self::Token => "azure_storage_token",
Self::UseEmulator => "azure_storage_use_emulator",
+ Self::UseFabricEndpoint => "azure_use_fabric_endpoint",
Self::MsiEndpoint => "azure_msi_endpoint",
Self::ObjectId => "azure_object_id",
Self::MsiResourceId => "azure_msi_resource_id",
@@ -531,6 +543,9 @@ impl FromStr for AzureConfigKey {
"azure_federated_token_file" | "federated_token_file" => {
Ok(Self::FederatedTokenFile)
}
+ "azure_use_fabric_endpoint" | "use_fabric_endpoint" => {
+ Ok(Self::UseFabricEndpoint)
+ }
"azure_use_azure_cli" | "use_azure_cli" => Ok(Self::UseAzureCli),
// Backwards compatibility
"azure_allow_http" => Ok(Self::Client(ClientConfigKey::AllowHttp)),
@@ -600,11 +615,16 @@ impl MicrosoftAzureBuilder {
///
/// - `abfs[s]://<container>/<path>` (according to
[fsspec](https://github.com/fsspec/adlfs))
/// - `abfs[s]://<file_system>@<account_name>.dfs.core.windows.net/<path>`
+ /// -
`abfs[s]://<file_system>@<account_name>.dfs.fabric.microsoft.com/<path>`
/// - `az://<container>/<path>` (according to
[fsspec](https://github.com/fsspec/adlfs))
/// - `adl://<container>/<path>` (according to
[fsspec](https://github.com/fsspec/adlfs))
/// - `azure://<container>/<path>` (custom)
/// - `https://<account>.dfs.core.windows.net`
/// - `https://<account>.blob.core.windows.net`
+ /// - `https://<account>.dfs.fabric.microsoft.com`
+ /// - `https://<account>.dfs.fabric.microsoft.com/<container>`
+ /// - `https://<account>.blob.fabric.microsoft.com`
+ /// - `https://<account>.blob.fabric.microsoft.com/<container>`
///
/// Note: Settings derived from the URL will override any others set on
this builder
///
@@ -639,6 +659,7 @@ impl MicrosoftAzureBuilder {
}
AzureConfigKey::UseAzureCli => self.use_azure_cli.parse(value),
AzureConfigKey::UseEmulator => self.use_emulator.parse(value),
+ AzureConfigKey::UseFabricEndpoint =>
self.use_fabric_endpoint.parse(value),
AzureConfigKey::Client(key) => {
self.client_options = self.client_options.with_config(key,
value)
}
@@ -692,6 +713,9 @@ impl MicrosoftAzureBuilder {
AzureConfigKey::SasKey => self.sas_key.clone(),
AzureConfigKey::Token => self.bearer_token.clone(),
AzureConfigKey::UseEmulator => Some(self.use_emulator.to_string()),
+ AzureConfigKey::UseFabricEndpoint => {
+ Some(self.use_fabric_endpoint.to_string())
+ }
AzureConfigKey::MsiEndpoint => self.msi_endpoint.clone(),
AzureConfigKey::ObjectId => self.object_id.clone(),
AzureConfigKey::MsiResourceId => self.msi_resource_id.clone(),
@@ -724,6 +748,10 @@ impl MicrosoftAzureBuilder {
} else if let Some(a) =
host.strip_suffix(".dfs.core.windows.net") {
self.container_name = Some(validate(parsed.username())?);
self.account_name = Some(validate(a)?);
+ } else if let Some(a) =
host.strip_suffix(".dfs.fabric.microsoft.com") {
+ self.container_name = Some(validate(parsed.username())?);
+ self.account_name = Some(validate(a)?);
+ self.use_fabric_endpoint = true.into();
} else {
return Err(UrlNotRecognisedSnafu { url }.build().into());
}
@@ -733,6 +761,21 @@ impl MicrosoftAzureBuilder {
| Some((a, "blob.core.windows.net")) => {
self.account_name = Some(validate(a)?);
}
+ Some((a, "dfs.fabric.microsoft.com"))
+ | Some((a, "blob.fabric.microsoft.com")) => {
+ self.account_name = Some(validate(a)?);
+ // Attempt to infer the container name from the URL
+ // -
https://onelake.dfs.fabric.microsoft.com/<workspaceGUID>/<itemGUID>/Files/test.csv
+ // -
https://onelake.dfs.fabric.microsoft.com/<workspace>/<item>.<itemtype>/<path>/<fileName>
+ //
+ // See
<https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api>
+ if let Some(workspace) =
parsed.path_segments().unwrap().next() {
+ if !workspace.is_empty() {
+ self.container_name = Some(workspace.to_string())
+ }
+ }
+ self.use_fabric_endpoint = true.into();
+ }
_ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
},
scheme => return Err(UnknownUrlSchemeSnafu { scheme
}.build().into()),
@@ -819,6 +862,14 @@ impl MicrosoftAzureBuilder {
self
}
+ /// Set if Microsoft Fabric url scheme should be used (defaults to false)
+ /// When disabled the url scheme used is
`https://{account}.blob.core.windows.net`
+ /// When enabled the url scheme used is
`https://{account}.dfs.fabric.microsoft.com`
+ pub fn with_use_fabric_endpoint(mut self, use_fabric_endpoint: bool) ->
Self {
+ self.use_fabric_endpoint = use_fabric_endpoint.into();
+ self
+ }
+
/// Sets what protocol is allowed. If `allow_http` is :
/// * false (default): Only HTTPS are allowed
/// * true: HTTP and HTTPS are allowed
@@ -885,6 +936,7 @@ impl MicrosoftAzureBuilder {
}
let container = self.container_name.ok_or(Error::MissingContainerName
{})?;
+
let static_creds = |credential: AzureCredential| ->
AzureCredentialProvider {
Arc::new(StaticCredentialProvider::new(credential))
};
@@ -906,7 +958,11 @@ impl MicrosoftAzureBuilder {
(true, url, credential, account_name)
} else {
let account_name = self.account_name.ok_or(Error::MissingAccount
{})?;
- let account_url = format!("https://{}.blob.core.windows.net",
&account_name);
+ let account_url = match self.use_fabric_endpoint.get()? {
+ true => format!("https://{}.blob.fabric.microsoft.com",
&account_name),
+ false => format!("https://{}.blob.core.windows.net",
&account_name),
+ };
+
let url = Url::parse(&account_url)
.context(UnableToParseUrlSnafu { url: account_url })?;
@@ -1049,6 +1105,15 @@ mod tests {
.unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
assert_eq!(builder.container_name, Some("file_system".to_string()));
+ assert!(!builder.use_fabric_endpoint.get().unwrap());
+
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder
+ .parse_url("abfss://[email protected]/")
+ .unwrap();
+ assert_eq!(builder.account_name, Some("account".to_string()));
+ assert_eq!(builder.container_name, Some("file_system".to_string()));
+ assert!(builder.use_fabric_endpoint.get().unwrap());
let mut builder = MicrosoftAzureBuilder::new();
builder.parse_url("abfs://container/path").unwrap();
@@ -1067,12 +1132,46 @@ mod tests {
.parse_url("https://account.dfs.core.windows.net/")
.unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
+ assert!(!builder.use_fabric_endpoint.get().unwrap());
let mut builder = MicrosoftAzureBuilder::new();
builder
.parse_url("https://account.blob.core.windows.net/")
.unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
+ assert!(!builder.use_fabric_endpoint.get().unwrap());
+
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder
+ .parse_url("https://account.dfs.fabric.microsoft.com/")
+ .unwrap();
+ assert_eq!(builder.account_name, Some("account".to_string()));
+ assert_eq!(builder.container_name, None);
+ assert!(builder.use_fabric_endpoint.get().unwrap());
+
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder
+ .parse_url("https://account.dfs.fabric.microsoft.com/container")
+ .unwrap();
+ assert_eq!(builder.account_name, Some("account".to_string()));
+ assert_eq!(builder.container_name.as_deref(), Some("container"));
+ assert!(builder.use_fabric_endpoint.get().unwrap());
+
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder
+ .parse_url("https://account.blob.fabric.microsoft.com/")
+ .unwrap();
+ assert_eq!(builder.account_name, Some("account".to_string()));
+ assert_eq!(builder.container_name, None);
+ assert!(builder.use_fabric_endpoint.get().unwrap());
+
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder
+ .parse_url("https://account.blob.fabric.microsoft.com/container")
+ .unwrap();
+ assert_eq!(builder.account_name, Some("account".to_string()));
+ assert_eq!(builder.container_name.as_deref(), Some("container"));
+ assert!(builder.use_fabric_endpoint.get().unwrap());
let err_cases = [
"mailto://account.blob.core.windows.net/",