roeap commented on code in PR #4200:
URL: https://github.com/apache/arrow-rs/pull/4200#discussion_r1191736849


##########
object_store/src/parse.rs:
##########
@@ -0,0 +1,240 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::local::LocalFileSystem;
+use crate::memory::InMemory;
+use crate::path::Path;
+use crate::ObjectStore;
+use snafu::Snafu;
+use url::Url;
+
+#[derive(Debug, Snafu)]
+enum Error {
+    #[snafu(display("Unable to convert URL \"{}\" to filesystem path", url))]
+    InvalidUrl { url: Url },
+
+    #[snafu(display("Unable to recognise URL \"{}\"", url))]
+    Unrecognised { url: Url },
+
+    #[snafu(display("Feature {scheme:?} not enabled"))]
+    NotEnabled { scheme: ObjectStoreScheme },
+}
+
+impl From<Error> for super::Error {
+    fn from(e: Error) -> Self {
+        Self::Generic {
+            store: "URL",
+            source: Box::new(e),
+        }
+    }
+}
+
+/// Recognises various URL formats, identifying the relevant 
[`ObjectStore`](crate::ObjectStore)
+#[derive(Debug, Eq, PartialEq)]
+pub enum ObjectStoreScheme {
+    /// Url corresponding to [`LocalFileSystem`](crate::local::LocalFileSystem)
+    Local,
+    /// Url corresponding to [`InMemory`](crate::memory::InMemory)
+    Memory,
+    /// Url corresponding to [`AmazonS3`](crate::aws::AmazonS3)
+    AmazonS3,
+    /// Url corresponding to 
[`GoogleCloudStorage`](crate::gcp::GoogleCloudStorage)
+    GoogleCloudStorage,
+    /// Url corresponding to [`MicrosoftAzure`](crate::azure::MicrosoftAzure)
+    MicrosoftAzure,
+    /// Url corresponding to [`HttpStore`](crate::http::HttpStore)
+    Http,
+}
+
+impl ObjectStoreScheme {
+    /// Create an [`ObjectStoreScheme`] from the provided [`Url`]
+    fn parse(url: &Url) -> Result<Self, Error> {
+        match (url.scheme(), url.host_str()) {
+            ("file", None) => Ok(Self::Local),
+            ("memory", None) => Ok(Self::Memory),
+            ("s3" | "s3a", Some(_)) => Ok(Self::AmazonS3),
+            ("gs", Some(_)) => Ok(Self::GoogleCloudStorage),
+            ("az" | "adl" | "azure" | "abfs" | "abfss", Some(_)) => {
+                Ok(Self::MicrosoftAzure)
+            }
+            ("http", Some(_)) => Ok(Self::Http),
+            ("https", Some(host)) => {
+                if host.ends_with("dfs.core.windows.net")
+                    || host.ends_with("blob.core.windows.net")
+                {
+                    Ok(Self::MicrosoftAzure)
+                } else if host.ends_with("amazonaws.com")
+                    || host.ends_with("r2.cloudflarestorage.com")
+                {
+                    Ok(Self::AmazonS3)
+                } else {
+                    Ok(Self::Http)
+                }
+            }
+            _ => Err(Error::Unrecognised { url: url.clone() }),
+        }
+    }
+}
+
+#[cfg(any(feature = "aws", feature = "gcp", feature = "azure", feature = 
"http"))]
+macro_rules! builder_opts {
+    ($builder:ty, $url:expr, $options:expr) => {{
+        let builder = $options.into_iter().fold(
+            <$builder>::from_env().with_url($url.as_str()),

Review Comment:
   This is actually something we have been struggling a bit with, and I am 
unsure what the best solution is without introducing too much complexity.
   
   Essentially we end up in a situation where any valid credentials configured 
in the environment could take precedence over a credentials passed via the 
storage options. Priority would be based on the order in which we check 
credentials during build.
   
   To work around this I was contemplating several options, none of which 
really resonated so far. but they all boil down to having some sort of notion 
on what combinations of config options constitute a credential, then checking 
if the options contain a full credential and skipping parsing from env if they 
do.
   
   Things get a bit messier if the options contain a partial credential that 
can be imputed via the environment, as especially in the azure case there are 
several permutations. Within delta-rs we are unfortunately in a situation right 
now, where this is affecting users.
   
   This becomes even more relevant as we begin to introduce catalog 
integrations where different locations are associated with different 
credentials.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to