tustvold commented on code in PR #5543:
URL: https://github.com/apache/arrow-datafusion/pull/5543#discussion_r1143118823
##########
datafusion/execution/src/object_store.rs:
##########
@@ -156,78 +158,74 @@ impl std::fmt::Debug for ObjectStoreRegistry {
}
}
-impl Default for ObjectStoreRegistry {
+impl Default for DefaultObjectStoreRegistry {
fn default() -> Self {
Self::new()
}
}
-impl ObjectStoreRegistry {
- /// Create an [`ObjectStoreRegistry`] with no [`ObjectStoreProvider`].
- ///
- /// This will register [`LocalFileSystem`] to handle `file://` paths,
further stores
- /// will need to be explicitly registered with calls to
[`ObjectStoreRegistry::register_store`]
+impl DefaultObjectStoreRegistry {
+ /// This will register [`LocalFileSystem`] to handle `file://` paths
pub fn new() -> Self {
- ObjectStoreRegistry::new_with_provider(None)
- }
-
- /// Create an [`ObjectStoreRegistry`] with the provided
[`ObjectStoreProvider`]
- ///
- /// This will register [`LocalFileSystem`] to handle `file://` paths,
further stores
- /// may be explicity registered with calls to
[`ObjectStoreRegistry::register_store`] or
- /// created lazily, on-demand by the provided [`ObjectStoreProvider`]
- pub fn new_with_provider(provider: Option<Arc<dyn ObjectStoreProvider>>)
-> Self {
let object_stores: DashMap<String, Arc<dyn ObjectStore>> =
DashMap::new();
object_stores.insert("file://".to_string(),
Arc::new(LocalFileSystem::new()));
- Self {
- object_stores,
- provider,
- }
+ Self { object_stores }
}
+}
- /// Adds a new store to this registry.
- ///
- /// If a store with the same schema and host existed before, it is
replaced and returned
- pub fn register_store(
+impl ObjectStoreRegistry for DefaultObjectStoreRegistry {
+ fn register_store(
&self,
- scheme: impl AsRef<str>,
- host: impl AsRef<str>,
+ key: &str,
store: Arc<dyn ObjectStore>,
) -> Option<Arc<dyn ObjectStore>> {
- let s = format!("{}://{}", scheme.as_ref(), host.as_ref());
- self.object_stores.insert(s, store)
+ self.object_stores.insert(String::from(key), store)
}
- /// Get a suitable store for the provided URL. For example:
- ///
- /// - URL with scheme `file:///` or no schema will return the default
LocalFS store
- /// - URL with scheme `s3://bucket/` will return the S3 store if it's
registered
- /// - URL with scheme `hdfs://hostname:port/` will return the hdfs store
if it's registered
- ///
- pub fn get_by_url(&self, url: impl AsRef<Url>) -> Result<Arc<dyn
ObjectStore>> {
- let url = url.as_ref();
- // First check whether can get object store from registry
- let s = &url[url::Position::BeforeScheme..url::Position::BeforePath];
- let store = self.object_stores.get(s).map(|o| o.value().clone());
-
- match store {
- Some(store) => Ok(store),
- None => match &self.provider {
- Some(provider) => {
- let store = provider.get_by_url(url)?;
- let key =
-
&url[url::Position::BeforeScheme..url::Position::BeforePath];
- self.object_stores.insert(key.to_owned(), store.clone());
Review Comment:
As pointed out in
https://github.com/apache/arrow-datafusion/pull/5543/files#r1143108925 this has
a bug, in that it registers using the entire URL authority, including things
like credentials, etc... This should instead be calling `register_store` as
"normal".
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]