This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs-object-store.git


The following commit(s) were added to refs/heads/main by this push:
     new 612d74f  Add PaginatedListStore (#371)
612d74f is described below

commit 612d74f183a138cd290486e6a00977173b05e566
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Sun May 25 12:15:29 2025 +0100

    Add PaginatedListStore (#371)
    
    * Add PaginatedListStore
    
    * Format
    
    * More docs
    
    * Format
    
    * Fix empty Azure pagination token
---
 src/aws/client.rs   | 36 ++++++++++++---------
 src/aws/mod.rs      | 15 ++++++++-
 src/azure/client.rs | 37 +++++++++++++++-------
 src/azure/mod.rs    | 15 ++++++++-
 src/client/list.rs  | 28 +++++++++--------
 src/gcp/client.rs   | 37 +++++++++++++---------
 src/gcp/mod.rs      | 15 ++++++++-
 src/integration.rs  | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/lib.rs          |  9 ++++++
 src/list.rs         | 85 +++++++++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 312 insertions(+), 56 deletions(-)

diff --git a/src/aws/client.rs b/src/aws/client.rs
index fb2a033..a1fb463 100644
--- a/src/aws/client.rs
+++ b/src/aws/client.rs
@@ -33,11 +33,11 @@ use crate::client::s3::{
     InitiateMultipartUploadResult, ListResponse, PartMetadata,
 };
 use crate::client::{GetOptionsExt, HttpClient, HttpError, HttpResponse};
+use crate::list::{PaginatedListOptions, PaginatedListResult};
 use crate::multipart::PartId;
-use crate::path::DELIMITER;
 use crate::{
-    Attribute, Attributes, ClientOptions, GetOptions, ListResult, MultipartId, 
Path,
-    PutMultipartOpts, PutPayload, PutResult, Result, RetryConfig, TagSet,
+    Attribute, Attributes, ClientOptions, GetOptions, MultipartId, Path, 
PutMultipartOpts,
+    PutPayload, PutResult, Result, RetryConfig, TagSet,
 };
 use async_trait::async_trait;
 use base64::prelude::BASE64_STANDARD;
@@ -877,21 +877,19 @@ impl ListClient for Arc<S3Client> {
     async fn list_request(
         &self,
         prefix: Option<&str>,
-        delimiter: bool,
-        token: Option<&str>,
-        offset: Option<&str>,
-    ) -> Result<(ListResult, Option<String>)> {
+        opts: PaginatedListOptions,
+    ) -> Result<PaginatedListResult> {
         let credential = self.config.get_session_credential().await?;
         let url = self.config.bucket_endpoint.clone();
 
         let mut query = Vec::with_capacity(4);
 
-        if let Some(token) = token {
-            query.push(("continuation-token", token))
+        if let Some(token) = &opts.page_token {
+            query.push(("continuation-token", token.as_ref()))
         }
 
-        if delimiter {
-            query.push(("delimiter", DELIMITER))
+        if let Some(d) = &opts.delimiter {
+            query.push(("delimiter", d.as_ref()))
         }
 
         query.push(("list-type", "2"));
@@ -900,13 +898,20 @@ impl ListClient for Arc<S3Client> {
             query.push(("prefix", prefix))
         }
 
-        if let Some(offset) = offset {
-            query.push(("start-after", offset))
+        if let Some(offset) = &opts.offset {
+            query.push(("start-after", offset.as_ref()))
+        }
+
+        let max_keys_str;
+        if let Some(max_keys) = &opts.max_keys {
+            max_keys_str = max_keys.to_string();
+            query.push(("max-keys", max_keys_str.as_ref()))
         }
 
         let response = self
             .client
             .request(Method::GET, &url)
+            .extensions(opts.extensions)
             .query(&query)
             .with_aws_sigv4(credential.authorizer(), None)
             .send_retry(&self.config.retry_config)
@@ -922,7 +927,10 @@ impl ListClient for Arc<S3Client> {
 
         let token = response.next_continuation_token.take();
 
-        Ok((response.try_into()?, token))
+        Ok(PaginatedListResult {
+            result: response.try_into()?,
+            page_token: token,
+        })
     }
 }
 
diff --git a/src/aws/mod.rs b/src/aws/mod.rs
index 119ba10..83edf84 100644
--- a/src/aws/mod.rs
+++ b/src/aws/mod.rs
@@ -38,7 +38,7 @@ use url::Url;
 
 use crate::aws::client::{CompleteMultipartMode, PutPartPayload, RequestError, 
S3Client};
 use crate::client::get::GetClientExt;
-use crate::client::list::ListClientExt;
+use crate::client::list::{ListClient, ListClientExt};
 use crate::client::CredentialProvider;
 use crate::multipart::{MultipartStore, PartId};
 use crate::signer::Signer;
@@ -78,6 +78,7 @@ const STORE: &str = "S3";
 /// [`CredentialProvider`] for [`AmazonS3`]
 pub type AwsCredentialProvider = Arc<dyn CredentialProvider<Credential = 
AwsCredential>>;
 use crate::client::parts::Parts;
+use crate::list::{PaginatedListOptions, PaginatedListResult, 
PaginatedListStore};
 pub use credential::{AwsAuthorizer, AwsCredential};
 
 /// Interface for [Amazon S3](https://aws.amazon.com/s3/).
@@ -496,6 +497,17 @@ impl MultipartStore for AmazonS3 {
     }
 }
 
+#[async_trait]
+impl PaginatedListStore for AmazonS3 {
+    async fn list_paginated(
+        &self,
+        prefix: Option<&str>,
+        opts: PaginatedListOptions,
+    ) -> Result<PaginatedListResult> {
+        self.client.list_request(prefix, opts).await
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -594,6 +606,7 @@ mod tests {
         signing(&integration).await;
         s3_encryption(&integration).await;
         put_get_attributes(&integration).await;
+        list_paginated(&integration, &integration).await;
 
         // Object tagging is not supported by S3 Express One Zone
         if config.session_provider.is_none() {
diff --git a/src/azure/client.rs b/src/azure/client.rs
index 2d5db13..329cdd4 100644
--- a/src/azure/client.rs
+++ b/src/azure/client.rs
@@ -24,8 +24,8 @@ use crate::client::header::{get_put_result, HeaderConfig};
 use crate::client::list::ListClient;
 use crate::client::retry::RetryExt;
 use crate::client::{GetOptionsExt, HttpClient, HttpError, HttpRequest, 
HttpResponse};
+use crate::list::{PaginatedListOptions, PaginatedListResult};
 use crate::multipart::PartId;
-use crate::path::DELIMITER;
 use crate::util::{deserialize_rfc1123, GetRange};
 use crate::{
     Attribute, Attributes, ClientOptions, GetOptions, ListResult, ObjectMeta, 
Path, PutMode,
@@ -961,11 +961,13 @@ impl ListClient for Arc<AzureClient> {
     async fn list_request(
         &self,
         prefix: Option<&str>,
-        delimiter: bool,
-        token: Option<&str>,
-        offset: Option<&str>,
-    ) -> Result<(ListResult, Option<String>)> {
-        assert!(offset.is_none()); // Not yet supported
+        opts: PaginatedListOptions,
+    ) -> Result<PaginatedListResult> {
+        if opts.offset.is_some() {
+            return Err(crate::Error::NotSupported {
+                source: "Azure does not support listing with offsets".into(),
+            });
+        }
 
         let credential = self.get_credential().await?;
         let url = self.config.path_url(&Path::default());
@@ -978,21 +980,29 @@ impl ListClient for Arc<AzureClient> {
             query.push(("prefix", prefix))
         }
 
-        if delimiter {
-            query.push(("delimiter", DELIMITER))
+        if let Some(delimiter) = &opts.delimiter {
+            query.push(("delimiter", delimiter.as_ref()))
         }
 
-        if let Some(token) = token {
-            query.push(("marker", token))
+        if let Some(token) = &opts.page_token {
+            query.push(("marker", token.as_ref()))
+        }
+
+        let max_keys_str;
+        if let Some(max_keys) = &opts.max_keys {
+            max_keys_str = max_keys.to_string();
+            query.push(("maxresults", max_keys_str.as_ref()))
         }
 
         let sensitive = credential
             .as_deref()
             .map(|c| c.sensitive_request())
             .unwrap_or_default();
+
         let response = self
             .client
             .get(url.as_str())
+            .extensions(opts.extensions)
             .query(&query)
             .with_azure_authorization(&credential, &self.config.account)
             .retryable(&self.config.retry_config)
@@ -1008,9 +1018,12 @@ impl ListClient for Arc<AzureClient> {
         let mut response: ListResultInternal = 
quick_xml::de::from_reader(response.reader())
             .map_err(|source| Error::InvalidListResponse { source })?;
 
-        let token = response.next_marker.take();
+        let token = response.next_marker.take().filter(|x| !x.is_empty());
 
-        Ok((to_list_result(response, prefix)?, token))
+        Ok(PaginatedListResult {
+            result: to_list_result(response, prefix)?,
+            page_token: token,
+        })
     }
 }
 
diff --git a/src/azure/mod.rs b/src/azure/mod.rs
index b4243dd..d686bac 100644
--- a/src/azure/mod.rs
+++ b/src/azure/mod.rs
@@ -38,7 +38,7 @@ use std::time::Duration;
 use url::Url;
 
 use crate::client::get::GetClientExt;
-use crate::client::list::ListClientExt;
+use crate::client::list::{ListClient, ListClientExt};
 use crate::client::CredentialProvider;
 pub use credential::{authority_hosts, AzureAccessKey, AzureAuthorizer};
 
@@ -50,6 +50,7 @@ mod credential;
 pub type AzureCredentialProvider = Arc<dyn CredentialProvider<Credential = 
AzureCredential>>;
 use crate::azure::client::AzureClient;
 use crate::client::parts::Parts;
+use crate::list::{PaginatedListOptions, PaginatedListResult, 
PaginatedListStore};
 pub use builder::{AzureConfigKey, MicrosoftAzureBuilder};
 pub use credential::AzureCredential;
 
@@ -292,6 +293,17 @@ impl MultipartStore for MicrosoftAzure {
     }
 }
 
+#[async_trait]
+impl PaginatedListStore for MicrosoftAzure {
+    async fn list_paginated(
+        &self,
+        prefix: Option<&str>,
+        opts: PaginatedListOptions,
+    ) -> Result<PaginatedListResult> {
+        self.client.list_request(prefix, opts).await
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -316,6 +328,7 @@ mod tests {
         multipart_race_condition(&integration, false).await;
         multipart_out_of_order(&integration).await;
         signing(&integration).await;
+        list_paginated(&integration, &integration).await;
 
         let validate = !integration.client.config().disable_tagging;
         tagging(
diff --git a/src/client/list.rs b/src/client/list.rs
index fe9bfeb..7a2cf62 100644
--- a/src/client/list.rs
+++ b/src/client/list.rs
@@ -16,12 +16,14 @@
 // under the License.
 
 use crate::client::pagination::stream_paginated;
-use crate::path::Path;
+use crate::list::{PaginatedListOptions, PaginatedListResult};
+use crate::path::{Path, DELIMITER};
 use crate::Result;
 use crate::{ListResult, ObjectMeta};
 use async_trait::async_trait;
 use futures::stream::BoxStream;
 use futures::{StreamExt, TryStreamExt};
+use std::borrow::Cow;
 use std::collections::BTreeSet;
 
 /// A client that can perform paginated list requests
@@ -30,10 +32,8 @@ pub(crate) trait ListClient: Send + Sync + 'static {
     async fn list_request(
         &self,
         prefix: Option<&str>,
-        delimiter: bool,
-        token: Option<&str>,
-        offset: Option<&str>,
-    ) -> Result<(ListResult, Option<String>)>;
+        options: PaginatedListOptions,
+    ) -> Result<PaginatedListResult>;
 }
 
 /// Extension trait for [`ListClient`] that adds common listing functionality
@@ -69,21 +69,23 @@ impl<T: ListClient + Clone> ListClientExt for T {
         let offset = offset.map(|x| x.to_string());
         let prefix = prefix
             .filter(|x| !x.as_ref().is_empty())
-            .map(|p| format!("{}{}", p.as_ref(), crate::path::DELIMITER));
-
+            .map(|p| format!("{}{}", p.as_ref(), DELIMITER));
         stream_paginated(
             self.clone(),
             (prefix, offset),
-            move |client, (prefix, offset), token| async move {
-                let (r, next_token) = client
+            move |client, (prefix, offset), page_token| async move {
+                let r = client
                     .list_request(
                         prefix.as_deref(),
-                        delimiter,
-                        token.as_deref(),
-                        offset.as_deref(),
+                        PaginatedListOptions {
+                            offset: offset.clone(),
+                            delimiter: 
delimiter.then_some(Cow::Borrowed(DELIMITER)),
+                            page_token,
+                            ..Default::default()
+                        },
                     )
                     .await?;
-                Ok((r, (prefix, offset), next_token))
+                Ok((r.result, (prefix, offset), r.page_token))
             },
         )
         .boxed()
diff --git a/src/gcp/client.rs b/src/gcp/client.rs
index 911a053..f815085 100644
--- a/src/gcp/client.rs
+++ b/src/gcp/client.rs
@@ -27,12 +27,13 @@ use crate::client::s3::{
 use crate::client::{GetOptionsExt, HttpClient, HttpError, HttpResponse};
 use crate::gcp::credential::CredentialExt;
 use crate::gcp::{GcpCredential, GcpCredentialProvider, 
GcpSigningCredentialProvider, STORE};
+use crate::list::{PaginatedListOptions, PaginatedListResult};
 use crate::multipart::PartId;
-use crate::path::{Path, DELIMITER};
+use crate::path::Path;
 use crate::util::hex_encode;
 use crate::{
-    Attribute, Attributes, ClientOptions, GetOptions, ListResult, MultipartId, 
PutMode,
-    PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, RetryConfig,
+    Attribute, Attributes, ClientOptions, GetOptions, MultipartId, PutMode, 
PutMultipartOpts,
+    PutOptions, PutPayload, PutResult, Result, RetryConfig,
 };
 use async_trait::async_trait;
 use base64::prelude::BASE64_STANDARD;
@@ -652,24 +653,22 @@ impl ListClient for Arc<GoogleCloudStorageClient> {
     async fn list_request(
         &self,
         prefix: Option<&str>,
-        delimiter: bool,
-        page_token: Option<&str>,
-        offset: Option<&str>,
-    ) -> Result<(ListResult, Option<String>)> {
+        opts: PaginatedListOptions,
+    ) -> Result<PaginatedListResult> {
         let credential = self.get_credential().await?;
         let url = format!("{}/{}", self.config.base_url, 
self.bucket_name_encoded);
 
         let mut query = Vec::with_capacity(5);
         query.push(("list-type", "2"));
-        if delimiter {
-            query.push(("delimiter", DELIMITER))
+        if let Some(delimiter) = &opts.delimiter {
+            query.push(("delimiter", delimiter.as_ref()))
         }
 
-        if let Some(prefix) = &prefix {
+        if let Some(prefix) = prefix {
             query.push(("prefix", prefix))
         }
 
-        if let Some(page_token) = page_token {
+        if let Some(page_token) = &opts.page_token {
             query.push(("continuation-token", page_token))
         }
 
@@ -677,13 +676,20 @@ impl ListClient for Arc<GoogleCloudStorageClient> {
             query.push(("max-keys", max_results))
         }
 
-        if let Some(offset) = offset {
-            query.push(("start-after", offset))
+        if let Some(offset) = &opts.offset {
+            query.push(("start-after", offset.as_ref()))
+        }
+
+        let max_keys_str;
+        if let Some(max_keys) = &opts.max_keys {
+            max_keys_str = max_keys.to_string();
+            query.push(("max-keys", max_keys_str.as_ref()))
         }
 
         let response = self
             .client
             .request(Method::GET, url)
+            .extensions(opts.extensions)
             .query(&query)
             .with_bearer_auth(credential.as_deref())
             .send_retry(&self.config.retry_config)
@@ -698,6 +704,9 @@ impl ListClient for Arc<GoogleCloudStorageClient> {
             .map_err(|source| Error::InvalidListResponse { source })?;
 
         let token = response.next_continuation_token.take();
-        Ok((response.try_into()?, token))
+        Ok(PaginatedListResult {
+            result: response.try_into()?,
+            page_token: token,
+        })
     }
 }
diff --git a/src/gcp/mod.rs b/src/gcp/mod.rs
index 5f8c67d..dfd638a 100644
--- a/src/gcp/mod.rs
+++ b/src/gcp/mod.rs
@@ -52,8 +52,9 @@ use http::Method;
 use url::Url;
 
 use crate::client::get::GetClientExt;
-use crate::client::list::ListClientExt;
+use crate::client::list::{ListClient, ListClientExt};
 use crate::client::parts::Parts;
+use crate::list::{PaginatedListOptions, PaginatedListResult, 
PaginatedListStore};
 use crate::multipart::MultipartStore;
 pub use builder::{GoogleCloudStorageBuilder, GoogleConfigKey};
 pub use credential::{GcpCredential, GcpSigningCredential, ServiceAccountKey};
@@ -268,6 +269,17 @@ impl Signer for GoogleCloudStorage {
     }
 }
 
+#[async_trait]
+impl PaginatedListStore for GoogleCloudStorage {
+    async fn list_paginated(
+        &self,
+        prefix: Option<&str>,
+        opts: PaginatedListOptions,
+    ) -> Result<PaginatedListResult> {
+        self.client.list_request(prefix, opts).await
+    }
+}
+
 #[cfg(test)]
 mod test {
 
@@ -299,6 +311,7 @@ mod test {
             multipart(&integration, &integration).await;
             multipart_race_condition(&integration, true).await;
             multipart_out_of_order(&integration).await;
+            list_paginated(&integration, &integration).await;
             // Fake GCS server doesn't currently honor preconditions
             get_opts(&integration).await;
             put_opts(&integration, true).await;
diff --git a/src/integration.rs b/src/integration.rs
index 05cd97f..73339a6 100644
--- a/src/integration.rs
+++ b/src/integration.rs
@@ -24,6 +24,7 @@
 //!
 //! They are intended solely for testing purposes.
 
+use crate::list::{PaginatedListOptions, PaginatedListStore};
 use crate::multipart::MultipartStore;
 use crate::path::Path;
 use crate::{
@@ -35,6 +36,7 @@ use futures::stream::FuturesUnordered;
 use futures::{StreamExt, TryStreamExt};
 use rand::distr::Alphanumeric;
 use rand::{rng, Rng};
+use std::collections::HashSet;
 
 pub(crate) async fn flatten_list_stream(
     storage: &DynObjectStore,
@@ -1230,3 +1232,92 @@ pub async fn multipart_out_of_order(storage: &dyn 
ObjectStore) {
     let bytes = result.bytes().await.unwrap();
     assert_eq!(bytes, full);
 }
+
+/// Tests [`PaginatedListStore`]
+pub async fn list_paginated(storage: &dyn ObjectStore, list: &dyn 
PaginatedListStore) {
+    delete_fixtures(storage).await;
+
+    let r = list.list_paginated(None, Default::default()).await.unwrap();
+    assert_eq!(r.page_token, None);
+    assert_eq!(r.result.objects, vec![]);
+    assert_eq!(r.result.common_prefixes, vec![]);
+
+    let p1 = Path::from("foo/bar");
+    let p2 = Path::from("foo/bax");
+    let p3 = Path::from("foo/baz/bar");
+    let p4 = Path::from("foo/baz/banana");
+    let p5 = Path::from("fob/banana");
+    let p6 = Path::from("fongle/banana");
+
+    let paths = HashSet::from_iter([&p1, &p2, &p3, &p4, &p5, &p6]);
+
+    for path in &paths {
+        storage.put(path, vec![1].into()).await.unwrap();
+    }
+
+    // Test basic listing
+
+    let mut listed = HashSet::new();
+    let mut opts = PaginatedListOptions {
+        max_keys: Some(5),
+        ..Default::default()
+    };
+    let ret = list.list_paginated(None, opts.clone()).await.unwrap();
+    assert_eq!(ret.result.objects.len(), 5);
+    listed.extend(ret.result.objects.iter().map(|x| &x.location));
+
+    opts.page_token = Some(ret.page_token.unwrap());
+    let ret = list.list_paginated(None, opts.clone()).await.unwrap();
+    assert_eq!(ret.result.objects.len(), 1);
+    listed.extend(ret.result.objects.iter().map(|x| &x.location));
+
+    assert_eq!(listed, paths);
+
+    // List with prefix
+    let prefix = Some("foo/");
+    opts.page_token = None;
+    let ret = list.list_paginated(prefix, opts.clone()).await.unwrap();
+    assert_eq!(ret.result.objects.len(), 4);
+    assert!(ret.page_token.is_none());
+
+    let actual = HashSet::from_iter(ret.result.objects.iter().map(|x| 
&x.location));
+    assert_eq!(actual, HashSet::<&Path>::from_iter([&p1, &p2, &p3, &p4]));
+
+    // List with partial prefix
+    let prefix = Some("fo");
+    opts.page_token = None;
+    let ret = list.list_paginated(prefix, opts.clone()).await.unwrap();
+    assert_eq!(ret.result.objects.len(), 5);
+    listed.extend(ret.result.objects.iter().map(|x| &x.location));
+
+    opts.page_token = Some(ret.page_token.unwrap());
+    let ret = list.list_paginated(prefix, opts.clone()).await.unwrap();
+    assert_eq!(ret.result.objects.len(), 1);
+    listed.extend(ret.result.objects.iter().map(|x| &x.location));
+
+    assert_eq!(listed, paths);
+
+    // List with prefix and delimiter
+    let prefix = Some("foo/");
+    opts.page_token = None;
+    opts.delimiter = Some("/".into());
+    let ret = list.list_paginated(prefix, opts.clone()).await.unwrap();
+    assert_eq!(ret.result.objects.len(), 2);
+    assert_eq!(ret.result.common_prefixes, vec![Path::from("foo/baz")]);
+    assert!(ret.page_token.is_none());
+
+    let actual = HashSet::from_iter(ret.result.objects.iter().map(|x| 
&x.location));
+    assert_eq!(actual, HashSet::<&Path>::from_iter([&p1, &p2]));
+
+    // List with partial prefix and delimiter
+    let prefix = Some("fo");
+    opts.page_token = None;
+    opts.delimiter = Some("/".into());
+    let ret = list.list_paginated(prefix, opts.clone()).await.unwrap();
+    assert_eq!(ret.result.objects.len(), 0);
+    assert_eq!(
+        HashSet::<Path>::from_iter(ret.result.common_prefixes),
+        HashSet::from_iter([Path::from("foo"), Path::from("fob"), 
Path::from("fongle")])
+    );
+    assert!(ret.page_token.is_none());
+}
diff --git a/src/lib.rs b/src/lib.rs
index ffca24a..4743f96 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -547,6 +547,7 @@ mod tags;
 
 pub use tags::TagSet;
 
+pub mod list;
 pub mod multipart;
 mod parse;
 mod payload;
@@ -609,6 +610,8 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + 
Debug + 'static {
     ///
     /// Client should prefer [`ObjectStore::put`] for small payloads, as 
streaming uploads
     /// typically require multiple separate requests. See [`MultipartUpload`] 
for more information
+    ///
+    /// For more advanced multipart uploads see 
[`MultipartStore`](multipart::MultipartStore)
     async fn put_multipart(&self, location: &Path) -> Result<Box<dyn 
MultipartUpload>> {
         self.put_multipart_opts(location, PutMultipartOpts::default())
             .await
@@ -618,6 +621,8 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + 
Debug + 'static {
     ///
     /// Client should prefer [`ObjectStore::put`] for small payloads, as 
streaming uploads
     /// typically require multiple separate requests. See [`MultipartUpload`] 
for more information
+    ///
+    /// For more advanced multipart uploads see 
[`MultipartStore`](multipart::MultipartStore)
     async fn put_multipart_opts(
         &self,
         location: &Path,
@@ -728,6 +733,8 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + 
Debug + 'static {
     /// `foo/bar_baz/x`. List is recursive, i.e. `foo/bar/more/x` will be 
included.
     ///
     /// Note: the order of returned [`ObjectMeta`] is not guaranteed
+    ///
+    /// For more advanced listing see 
[`PaginatedListStore`](list::PaginatedListStore)
     fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, 
Result<ObjectMeta>>;
 
     /// List all the objects with the given prefix and a location greater than 
`offset`
@@ -736,6 +743,8 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + 
Debug + 'static {
     /// the number of network requests required
     ///
     /// Note: the order of returned [`ObjectMeta`] is not guaranteed
+    ///
+    /// For more advanced listing see 
[`PaginatedListStore`](list::PaginatedListStore)
     fn list_with_offset(
         &self,
         prefix: Option<&Path>,
diff --git a/src/list.rs b/src/list.rs
new file mode 100644
index 0000000..e73fe52
--- /dev/null
+++ b/src/list.rs
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Paginated Listing
+
+use super::Result;
+use crate::ListResult;
+use async_trait::async_trait;
+use std::borrow::Cow;
+
+/// Options for a paginated list request
+#[derive(Debug, Default, Clone)]
+pub struct PaginatedListOptions {
+    /// Path to start listing from
+    ///
+    /// Note: Not all stores support this
+    pub offset: Option<String>,
+
+    /// A delimiter use to group keys with a common prefix
+    ///
+    /// Note: Some stores only support `/`
+    pub delimiter: Option<Cow<'static, str>>,
+
+    /// The maximum number of paths to return
+    pub max_keys: Option<usize>,
+
+    /// A page token from a previous request
+    ///
+    /// Note: Behaviour is implementation defined if the previous request
+    /// used a different prefix or options
+    pub page_token: Option<String>,
+
+    /// Implementation-specific extensions. Intended for use by implementations
+    /// that need to pass context-specific information (like tracing spans) 
via trait methods.
+    ///
+    /// These extensions are ignored entirely by backends offered through this 
crate.
+    pub extensions: http::Extensions,
+}
+
+/// A [`ListResult`] with optional pagination token
+#[derive(Debug)]
+pub struct PaginatedListResult {
+    /// The list result
+    pub result: ListResult,
+    /// If result set truncated, the pagination token to fetch next results
+    pub page_token: Option<String>,
+}
+
+/// A low-level interface for interacting with paginated listing APIs
+///
+/// Most use-cases should prefer [`ObjectStore::list`] as this is supported by 
more
+/// backends, including [`LocalFileSystem`], however, [`PaginatedListStore`] 
can be
+/// used where stateless pagination or non-path segment based listing is 
required
+///
+/// [`ObjectStore::list`]: crate::ObjectStore::list
+/// [`LocalFileSystem`]: crate::local::LocalFileSystem
+#[async_trait]
+pub trait PaginatedListStore: Send + Sync + 'static {
+    /// Perform a paginated list request
+    ///
+    /// Note: the order of returned objects is not guaranteed and
+    /// unlike [`ObjectStore::list`] a trailing delimiter is not
+    /// automatically added to `prefix`
+    ///
+    /// [`ObjectStore::list`]: crate::ObjectStore::list
+    async fn list_paginated(
+        &self,
+        prefix: Option<&str>,
+        opts: PaginatedListOptions,
+    ) -> Result<PaginatedListResult>;
+}

Reply via email to