tustvold commented on code in PR #5543:
URL: https://github.com/apache/arrow-datafusion/pull/5543#discussion_r1137565167
##########
datafusion-cli/src/object_storage.rs:
##########
@@ -43,28 +46,61 @@ impl FromStr for ObjectStoreScheme {
}
}
-#[derive(Debug)]
-pub struct DatafusionCliObjectStoreProvider {}
+#[derive(Debug, Default)]
+pub struct DatafusionCliObjectStoreRegistry {
+ inner: DefaultObjectStoreRegistry,
+}
+
+impl DatafusionCliObjectStoreRegistry {
+ pub fn new() -> Self {
+ Default::default()
+ }
+}
+
+impl ObjectStoreRegistry for DatafusionCliObjectStoreRegistry {
+ fn register_store(
+ &self,
+ scheme: &str,
+ host: &str,
+ store: Arc<dyn ObjectStore>,
+ ) -> Option<Arc<dyn ObjectStore>> {
+ self.inner.register_store(scheme, host, store)
+ }
+
+ fn put_with_url(
+ &self,
+ url: &Url,
+ store: Arc<dyn ObjectStore>,
+ ) -> Option<Arc<dyn ObjectStore>> {
+ self.inner.put_with_url(url, store)
+ }
+
+ fn get_by_url(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
+ self.inner.get_by_url(url).or_else(|_| {
+ let store =
+ ObjectStoreScheme::from_str(url.scheme()).map(
+ |scheme| match scheme {
+ ObjectStoreScheme::S3 => build_s3_object_store(url),
+ ObjectStoreScheme::GCS => build_gcs_object_store(url),
+ },
+ )??;
Review Comment:
```suggestion
let store = match ObjectStoreScheme::from_str(url.scheme())?
ObjectStoreScheme::S3 => build_s3_object_store(url)?,
ObjectStoreScheme::GCS => build_gcs_object_store(url)?,
};
```
Or something similar
##########
datafusion/execution/src/object_store.rs:
##########
@@ -79,71 +79,42 @@ impl std::fmt::Display for ObjectStoreUrl {
}
}
-/// Provides a mechanism for lazy, on-demand creation of an [`ObjectStore`]
-///
-/// For example, to support reading arbitrary buckets from AWS S3
-/// without instantiating an [`ObjectStore`] for each possible bucket
-/// up front, an [`ObjectStoreProvider`] can be used to create the
-/// appropriate [`ObjectStore`] instance on demand.
-///
-/// See [`ObjectStoreRegistry::new_with_provider`]
-pub trait ObjectStoreProvider: Send + Sync + 'static {
- /// Return an ObjectStore for the provided url, called by
[`ObjectStoreRegistry::get_by_url`]
- /// when no matching store has already been registered. The result will be
cached based
- /// on its schema and authority. Any error will be returned to the caller
+/// Provides a mechanism to get and put object stores.
+pub trait ObjectStoreRegistry: Send + Sync + std::fmt::Debug + 'static {
+ /// If a store with the same schema and host existed before, it is
replaced and returned
+ fn register_store(
+ &self,
+ scheme: &str,
+ host: &str,
+ store: Arc<dyn ObjectStore>,
+ ) -> Option<Arc<dyn ObjectStore>>;
+
+ /// Insert a [`ObjectStore`] with the key of a given url got by
[`get_url_key()`]
+ ///
+ /// If a store with the same url key, it is replaced and returned
+ fn put_with_url(
Review Comment:
Do we need both a `put_with_url` and `register_store`?
The motivation for not passing the entire URL, was to avoid confusion over
what bits of the URL "counted", e.g. should credential influence this? I don't
feel strongly, but feel we probably shouldn't have two methods that do
basically the same thing
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]