yahoNanJing commented on code in PR #5543:
URL: https://github.com/apache/arrow-datafusion/pull/5543#discussion_r1144167474


##########
datafusion/execution/src/object_store.rs:
##########
@@ -156,78 +158,64 @@ impl std::fmt::Debug for ObjectStoreRegistry {
     }
 }
 
-impl Default for ObjectStoreRegistry {
+impl Default for DefaultObjectStoreRegistry {
     fn default() -> Self {
         Self::new()
     }
 }
 
-impl ObjectStoreRegistry {
-    /// Create an [`ObjectStoreRegistry`] with no [`ObjectStoreProvider`].
-    ///
-    /// This will register [`LocalFileSystem`] to handle `file://` paths, 
further stores
-    /// will need to be explicitly registered with calls to 
[`ObjectStoreRegistry::register_store`]
+impl DefaultObjectStoreRegistry {
+    /// This will register [`LocalFileSystem`] to handle `file://` paths
     pub fn new() -> Self {
-        ObjectStoreRegistry::new_with_provider(None)
-    }
-
-    /// Create an [`ObjectStoreRegistry`] with the provided 
[`ObjectStoreProvider`]
-    ///
-    /// This will register [`LocalFileSystem`] to handle `file://` paths, 
further stores
-    /// may be explicity registered with calls to 
[`ObjectStoreRegistry::register_store`] or
-    /// created lazily, on-demand by the provided [`ObjectStoreProvider`]
-    pub fn new_with_provider(provider: Option<Arc<dyn ObjectStoreProvider>>) 
-> Self {
         let object_stores: DashMap<String, Arc<dyn ObjectStore>> = 
DashMap::new();
-        object_stores.insert("file://".to_string(), 
Arc::new(LocalFileSystem::new()));
-        Self {
-            object_stores,
-            provider,
-        }
+        let url = Url::parse("file://").unwrap();
+        let key = get_url_key(&url);
+        object_stores.insert(key, Arc::new(LocalFileSystem::new()));
+        Self { object_stores }
     }
+}
 
-    /// Adds a new store to this registry.
-    ///
-    /// If a store with the same schema and host existed before, it is 
replaced and returned
-    pub fn register_store(
+impl ObjectStoreRegistry for DefaultObjectStoreRegistry {
+    fn register_store(
         &self,
-        scheme: impl AsRef<str>,
-        host: impl AsRef<str>,
+        url: &Url,
         store: Arc<dyn ObjectStore>,
     ) -> Option<Arc<dyn ObjectStore>> {
-        let s = format!("{}://{}", scheme.as_ref(), host.as_ref());
+        let s = get_url_key(url);
         self.object_stores.insert(s, store)
     }
 
-    /// Get a suitable store for the provided URL. For example:
-    ///
-    /// - URL with scheme `file:///` or no schema will return the default 
LocalFS store
-    /// - URL with scheme `s3://bucket/` will return the S3 store if it's 
registered
-    /// - URL with scheme `hdfs://hostname:port/` will return the hdfs store 
if it's registered
-    ///
-    pub fn get_by_url(&self, url: impl AsRef<Url>) -> Result<Arc<dyn 
ObjectStore>> {
-        let url = url.as_ref();
-        // First check whether can get object store from registry
-        let s = &url[url::Position::BeforeScheme..url::Position::BeforePath];
-        let store = self.object_stores.get(s).map(|o| o.value().clone());
-
-        match store {
-            Some(store) => Ok(store),
-            None => match &self.provider {
-                Some(provider) => {
-                    let store = provider.get_by_url(url)?;
-                    let key =
-                        
&url[url::Position::BeforeScheme..url::Position::BeforePath];
-                    self.object_stores.insert(key.to_owned(), store.clone());
-                    Ok(store)
-                }
-                None => Err(DataFusionError::Internal(format!(
+    /// The [`DefaultObjectStoreRegistry`] will only depend on the inner 
object store cache
+    /// to decide whether it's able to find an [`ObjectStore`] for a url. No 
ad-hoc discovery
+    /// and lazy registration will be executed.
+    fn get_or_lazy_register_store(&self, url: &Url) -> Result<Arc<dyn 
ObjectStore>> {
+        let s = get_url_key(url);
+        self.object_stores
+            .get(&s)
+            .map(|o| o.value().clone())
+            .ok_or_else(|| {
+                DataFusionError::Internal(format!(
                     "No suitable object store found for {url}"
-                ))),
-            },
-        }
+                ))
+            })
     }
 }
 
+/// Get the key of a url for object store registration.
+/// The credential info will be removed
+fn get_url_key(url: &Url) -> String {
+    let port_info = url
+        .port()
+        .map(|port| format!(":{port}"))
+        .unwrap_or(String::new());
+    format!(
+        "{}://{}{}/",
+        &url[url::Position::BeforeScheme..url::Position::AfterScheme],
+        &url[url::Position::BeforeHost..url::Position::AfterHost],
+        port_info
+    )

Review Comment:
   My bad. The trailing `/` can be removed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to