This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new ce1572f91 Minor: improve docstrings for `ObjectStoreRegistry` and 
`ObjectStoreProvider` (#5577)
ce1572f91 is described below

commit ce1572f91005d0fd6d2c5510bef2ec1d0cae5537
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Mar 14 12:30:25 2023 +0100

    Minor: improve docstrings for `ObjectStoreRegistry` and 
`ObjectStoreProvider` (#5577)
---
 datafusion/execution/src/object_store.rs | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/datafusion/execution/src/object_store.rs 
b/datafusion/execution/src/object_store.rs
index 9c5cca847..9b958e302 100644
--- a/datafusion/execution/src/object_store.rs
+++ b/datafusion/execution/src/object_store.rs
@@ -79,7 +79,12 @@ impl std::fmt::Display for ObjectStoreUrl {
     }
 }
 
-/// Provides a mechanism for lazy, on-demand creation of [`ObjectStore`]
+/// Provides a mechanism for lazy, on-demand creation of an [`ObjectStore`]
+///
+/// For example, to support reading arbitrary buckets from AWS S3
+/// without instantiating an [`ObjectStore`] for each possible bucket
+/// up front, an [`ObjectStoreProvider`] can be used to create the
+/// appropriate [`ObjectStore`] instance on demand.
 ///
 /// See [`ObjectStoreRegistry::new_with_provider`]
 pub trait ObjectStoreProvider: Send + Sync + 'static {
@@ -89,21 +94,29 @@ pub trait ObjectStoreProvider: Send + Sync + 'static {
     fn get_by_url(&self, url: &Url) -> Result<Arc<dyn ObjectStore>>;
 }
 
-/// [`ObjectStoreRegistry`] stores [`ObjectStore`] keyed by url scheme and 
authority, that is
-/// the part of a URL preceding the path
+/// [`ObjectStoreRegistry`] maps a URL to an [`ObjectStore`] instance,
+/// and allows DataFusion to read from different [`ObjectStore`]
+/// instances. For example DataFusion might be configured so that
+///
+/// 1. `s3://my_bucket/lineitem/` mapped to the `/lineitem` path on an
+/// AWS S3 object store bound to `my_bucket`
 ///
-/// This is used by DataFusion to find an appropriate [`ObjectStore`] for a 
[`ListingTableUrl`]
-/// provided in a query such as
+/// 2. `s3://my_other_bucket/lineitem/` mapped to the (same)
+/// `/lineitem` path on a *different* AWS S3 object store bound to
+/// `my_other_bucket`
+///
+/// When given a [`ListingTableUrl`], DataFusion tries to find an
+/// appropriate [`ObjectStore`]. For example
 ///
 /// ```sql
 /// create external table unicorns stored as parquet location 
's3://my_bucket/lineitem/';
 /// ```
 ///
-/// In this particular case the url `s3://my_bucket/lineitem/` will be 
provided to
+/// In this particular case, the url `s3://my_bucket/lineitem/` will be 
provided to
 /// [`ObjectStoreRegistry::get_by_url`] and one of three things will happen:
 ///
 /// - If an [`ObjectStore`] has been registered with 
[`ObjectStoreRegistry::register_store`] with
-/// scheme `s3` and host `my_bucket`, this [`ObjectStore`] will be returned
+/// scheme `s3` and host `my_bucket`, that [`ObjectStore`] will be returned
 ///
 /// - If an [`ObjectStoreProvider`] has been associated with this 
[`ObjectStoreRegistry`] using
 /// [`ObjectStoreRegistry::new_with_provider`], 
[`ObjectStoreProvider::get_by_url`] will be invoked,
@@ -115,9 +128,10 @@ pub trait ObjectStoreProvider: Send + Sync + 'static {
 ///
 /// This allows for two different use-cases:
 ///
-/// * DBMS systems where object store buckets are explicitly created using 
DDL, can register these
+/// 1. Systems where object store buckets are explicitly created using DDL, 
can register these
 /// buckets using [`ObjectStoreRegistry::register_store`]
-/// * DMBS systems relying on ad-hoc discovery, without corresponding DDL, can 
create [`ObjectStore`]
+///
+/// 2. Systems relying on ad-hoc discovery, without corresponding DDL, can 
create [`ObjectStore`]
 /// lazily, on-demand using [`ObjectStoreProvider`]
 ///
 /// [`ListingTableUrl`]: crate::datasource::listing::ListingTableUrl

Reply via email to