This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs-object-store.git
The following commit(s) were added to refs/heads/main by this push:
new 612d74f Add PaginatedListStore (#371)
612d74f is described below
commit 612d74f183a138cd290486e6a00977173b05e566
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Sun May 25 12:15:29 2025 +0100
Add PaginatedListStore (#371)
* Add PaginatedListStore
* Format
* More docs
* Format
* Fix empty Azure pagination token
---
src/aws/client.rs | 36 ++++++++++++---------
src/aws/mod.rs | 15 ++++++++-
src/azure/client.rs | 37 +++++++++++++++-------
src/azure/mod.rs | 15 ++++++++-
src/client/list.rs | 28 +++++++++--------
src/gcp/client.rs | 37 +++++++++++++---------
src/gcp/mod.rs | 15 ++++++++-
src/integration.rs | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++
src/lib.rs | 9 ++++++
src/list.rs | 85 +++++++++++++++++++++++++++++++++++++++++++++++++
10 files changed, 312 insertions(+), 56 deletions(-)
diff --git a/src/aws/client.rs b/src/aws/client.rs
index fb2a033..a1fb463 100644
--- a/src/aws/client.rs
+++ b/src/aws/client.rs
@@ -33,11 +33,11 @@ use crate::client::s3::{
InitiateMultipartUploadResult, ListResponse, PartMetadata,
};
use crate::client::{GetOptionsExt, HttpClient, HttpError, HttpResponse};
+use crate::list::{PaginatedListOptions, PaginatedListResult};
use crate::multipart::PartId;
-use crate::path::DELIMITER;
use crate::{
- Attribute, Attributes, ClientOptions, GetOptions, ListResult, MultipartId,
Path,
- PutMultipartOpts, PutPayload, PutResult, Result, RetryConfig, TagSet,
+ Attribute, Attributes, ClientOptions, GetOptions, MultipartId, Path,
PutMultipartOpts,
+ PutPayload, PutResult, Result, RetryConfig, TagSet,
};
use async_trait::async_trait;
use base64::prelude::BASE64_STANDARD;
@@ -877,21 +877,19 @@ impl ListClient for Arc<S3Client> {
async fn list_request(
&self,
prefix: Option<&str>,
- delimiter: bool,
- token: Option<&str>,
- offset: Option<&str>,
- ) -> Result<(ListResult, Option<String>)> {
+ opts: PaginatedListOptions,
+ ) -> Result<PaginatedListResult> {
let credential = self.config.get_session_credential().await?;
let url = self.config.bucket_endpoint.clone();
let mut query = Vec::with_capacity(4);
- if let Some(token) = token {
- query.push(("continuation-token", token))
+ if let Some(token) = &opts.page_token {
+ query.push(("continuation-token", token.as_ref()))
}
- if delimiter {
- query.push(("delimiter", DELIMITER))
+ if let Some(d) = &opts.delimiter {
+ query.push(("delimiter", d.as_ref()))
}
query.push(("list-type", "2"));
@@ -900,13 +898,20 @@ impl ListClient for Arc<S3Client> {
query.push(("prefix", prefix))
}
- if let Some(offset) = offset {
- query.push(("start-after", offset))
+ if let Some(offset) = &opts.offset {
+ query.push(("start-after", offset.as_ref()))
+ }
+
+ let max_keys_str;
+ if let Some(max_keys) = &opts.max_keys {
+ max_keys_str = max_keys.to_string();
+ query.push(("max-keys", max_keys_str.as_ref()))
}
let response = self
.client
.request(Method::GET, &url)
+ .extensions(opts.extensions)
.query(&query)
.with_aws_sigv4(credential.authorizer(), None)
.send_retry(&self.config.retry_config)
@@ -922,7 +927,10 @@ impl ListClient for Arc<S3Client> {
let token = response.next_continuation_token.take();
- Ok((response.try_into()?, token))
+ Ok(PaginatedListResult {
+ result: response.try_into()?,
+ page_token: token,
+ })
}
}
diff --git a/src/aws/mod.rs b/src/aws/mod.rs
index 119ba10..83edf84 100644
--- a/src/aws/mod.rs
+++ b/src/aws/mod.rs
@@ -38,7 +38,7 @@ use url::Url;
use crate::aws::client::{CompleteMultipartMode, PutPartPayload, RequestError,
S3Client};
use crate::client::get::GetClientExt;
-use crate::client::list::ListClientExt;
+use crate::client::list::{ListClient, ListClientExt};
use crate::client::CredentialProvider;
use crate::multipart::{MultipartStore, PartId};
use crate::signer::Signer;
@@ -78,6 +78,7 @@ const STORE: &str = "S3";
/// [`CredentialProvider`] for [`AmazonS3`]
pub type AwsCredentialProvider = Arc<dyn CredentialProvider<Credential =
AwsCredential>>;
use crate::client::parts::Parts;
+use crate::list::{PaginatedListOptions, PaginatedListResult,
PaginatedListStore};
pub use credential::{AwsAuthorizer, AwsCredential};
/// Interface for [Amazon S3](https://aws.amazon.com/s3/).
@@ -496,6 +497,17 @@ impl MultipartStore for AmazonS3 {
}
}
+#[async_trait]
+impl PaginatedListStore for AmazonS3 {
+ async fn list_paginated(
+ &self,
+ prefix: Option<&str>,
+ opts: PaginatedListOptions,
+ ) -> Result<PaginatedListResult> {
+ self.client.list_request(prefix, opts).await
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -594,6 +606,7 @@ mod tests {
signing(&integration).await;
s3_encryption(&integration).await;
put_get_attributes(&integration).await;
+ list_paginated(&integration, &integration).await;
// Object tagging is not supported by S3 Express One Zone
if config.session_provider.is_none() {
diff --git a/src/azure/client.rs b/src/azure/client.rs
index 2d5db13..329cdd4 100644
--- a/src/azure/client.rs
+++ b/src/azure/client.rs
@@ -24,8 +24,8 @@ use crate::client::header::{get_put_result, HeaderConfig};
use crate::client::list::ListClient;
use crate::client::retry::RetryExt;
use crate::client::{GetOptionsExt, HttpClient, HttpError, HttpRequest,
HttpResponse};
+use crate::list::{PaginatedListOptions, PaginatedListResult};
use crate::multipart::PartId;
-use crate::path::DELIMITER;
use crate::util::{deserialize_rfc1123, GetRange};
use crate::{
Attribute, Attributes, ClientOptions, GetOptions, ListResult, ObjectMeta,
Path, PutMode,
@@ -961,11 +961,13 @@ impl ListClient for Arc<AzureClient> {
async fn list_request(
&self,
prefix: Option<&str>,
- delimiter: bool,
- token: Option<&str>,
- offset: Option<&str>,
- ) -> Result<(ListResult, Option<String>)> {
- assert!(offset.is_none()); // Not yet supported
+ opts: PaginatedListOptions,
+ ) -> Result<PaginatedListResult> {
+ if opts.offset.is_some() {
+ return Err(crate::Error::NotSupported {
+ source: "Azure does not support listing with offsets".into(),
+ });
+ }
let credential = self.get_credential().await?;
let url = self.config.path_url(&Path::default());
@@ -978,21 +980,29 @@ impl ListClient for Arc<AzureClient> {
query.push(("prefix", prefix))
}
- if delimiter {
- query.push(("delimiter", DELIMITER))
+ if let Some(delimiter) = &opts.delimiter {
+ query.push(("delimiter", delimiter.as_ref()))
}
- if let Some(token) = token {
- query.push(("marker", token))
+ if let Some(token) = &opts.page_token {
+ query.push(("marker", token.as_ref()))
+ }
+
+ let max_keys_str;
+ if let Some(max_keys) = &opts.max_keys {
+ max_keys_str = max_keys.to_string();
+ query.push(("maxresults", max_keys_str.as_ref()))
}
let sensitive = credential
.as_deref()
.map(|c| c.sensitive_request())
.unwrap_or_default();
+
let response = self
.client
.get(url.as_str())
+ .extensions(opts.extensions)
.query(&query)
.with_azure_authorization(&credential, &self.config.account)
.retryable(&self.config.retry_config)
@@ -1008,9 +1018,12 @@ impl ListClient for Arc<AzureClient> {
let mut response: ListResultInternal =
quick_xml::de::from_reader(response.reader())
.map_err(|source| Error::InvalidListResponse { source })?;
- let token = response.next_marker.take();
+ let token = response.next_marker.take().filter(|x| !x.is_empty());
- Ok((to_list_result(response, prefix)?, token))
+ Ok(PaginatedListResult {
+ result: to_list_result(response, prefix)?,
+ page_token: token,
+ })
}
}
diff --git a/src/azure/mod.rs b/src/azure/mod.rs
index b4243dd..d686bac 100644
--- a/src/azure/mod.rs
+++ b/src/azure/mod.rs
@@ -38,7 +38,7 @@ use std::time::Duration;
use url::Url;
use crate::client::get::GetClientExt;
-use crate::client::list::ListClientExt;
+use crate::client::list::{ListClient, ListClientExt};
use crate::client::CredentialProvider;
pub use credential::{authority_hosts, AzureAccessKey, AzureAuthorizer};
@@ -50,6 +50,7 @@ mod credential;
pub type AzureCredentialProvider = Arc<dyn CredentialProvider<Credential =
AzureCredential>>;
use crate::azure::client::AzureClient;
use crate::client::parts::Parts;
+use crate::list::{PaginatedListOptions, PaginatedListResult,
PaginatedListStore};
pub use builder::{AzureConfigKey, MicrosoftAzureBuilder};
pub use credential::AzureCredential;
@@ -292,6 +293,17 @@ impl MultipartStore for MicrosoftAzure {
}
}
+#[async_trait]
+impl PaginatedListStore for MicrosoftAzure {
+ async fn list_paginated(
+ &self,
+ prefix: Option<&str>,
+ opts: PaginatedListOptions,
+ ) -> Result<PaginatedListResult> {
+ self.client.list_request(prefix, opts).await
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -316,6 +328,7 @@ mod tests {
multipart_race_condition(&integration, false).await;
multipart_out_of_order(&integration).await;
signing(&integration).await;
+ list_paginated(&integration, &integration).await;
let validate = !integration.client.config().disable_tagging;
tagging(
diff --git a/src/client/list.rs b/src/client/list.rs
index fe9bfeb..7a2cf62 100644
--- a/src/client/list.rs
+++ b/src/client/list.rs
@@ -16,12 +16,14 @@
// under the License.
use crate::client::pagination::stream_paginated;
-use crate::path::Path;
+use crate::list::{PaginatedListOptions, PaginatedListResult};
+use crate::path::{Path, DELIMITER};
use crate::Result;
use crate::{ListResult, ObjectMeta};
use async_trait::async_trait;
use futures::stream::BoxStream;
use futures::{StreamExt, TryStreamExt};
+use std::borrow::Cow;
use std::collections::BTreeSet;
/// A client that can perform paginated list requests
@@ -30,10 +32,8 @@ pub(crate) trait ListClient: Send + Sync + 'static {
async fn list_request(
&self,
prefix: Option<&str>,
- delimiter: bool,
- token: Option<&str>,
- offset: Option<&str>,
- ) -> Result<(ListResult, Option<String>)>;
+ options: PaginatedListOptions,
+ ) -> Result<PaginatedListResult>;
}
/// Extension trait for [`ListClient`] that adds common listing functionality
@@ -69,21 +69,23 @@ impl<T: ListClient + Clone> ListClientExt for T {
let offset = offset.map(|x| x.to_string());
let prefix = prefix
.filter(|x| !x.as_ref().is_empty())
- .map(|p| format!("{}{}", p.as_ref(), crate::path::DELIMITER));
-
+ .map(|p| format!("{}{}", p.as_ref(), DELIMITER));
stream_paginated(
self.clone(),
(prefix, offset),
- move |client, (prefix, offset), token| async move {
- let (r, next_token) = client
+ move |client, (prefix, offset), page_token| async move {
+ let r = client
.list_request(
prefix.as_deref(),
- delimiter,
- token.as_deref(),
- offset.as_deref(),
+ PaginatedListOptions {
+ offset: offset.clone(),
+ delimiter:
delimiter.then_some(Cow::Borrowed(DELIMITER)),
+ page_token,
+ ..Default::default()
+ },
)
.await?;
- Ok((r, (prefix, offset), next_token))
+ Ok((r.result, (prefix, offset), r.page_token))
},
)
.boxed()
diff --git a/src/gcp/client.rs b/src/gcp/client.rs
index 911a053..f815085 100644
--- a/src/gcp/client.rs
+++ b/src/gcp/client.rs
@@ -27,12 +27,13 @@ use crate::client::s3::{
use crate::client::{GetOptionsExt, HttpClient, HttpError, HttpResponse};
use crate::gcp::credential::CredentialExt;
use crate::gcp::{GcpCredential, GcpCredentialProvider,
GcpSigningCredentialProvider, STORE};
+use crate::list::{PaginatedListOptions, PaginatedListResult};
use crate::multipart::PartId;
-use crate::path::{Path, DELIMITER};
+use crate::path::Path;
use crate::util::hex_encode;
use crate::{
- Attribute, Attributes, ClientOptions, GetOptions, ListResult, MultipartId,
PutMode,
- PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, RetryConfig,
+ Attribute, Attributes, ClientOptions, GetOptions, MultipartId, PutMode,
PutMultipartOpts,
+ PutOptions, PutPayload, PutResult, Result, RetryConfig,
};
use async_trait::async_trait;
use base64::prelude::BASE64_STANDARD;
@@ -652,24 +653,22 @@ impl ListClient for Arc<GoogleCloudStorageClient> {
async fn list_request(
&self,
prefix: Option<&str>,
- delimiter: bool,
- page_token: Option<&str>,
- offset: Option<&str>,
- ) -> Result<(ListResult, Option<String>)> {
+ opts: PaginatedListOptions,
+ ) -> Result<PaginatedListResult> {
let credential = self.get_credential().await?;
let url = format!("{}/{}", self.config.base_url,
self.bucket_name_encoded);
let mut query = Vec::with_capacity(5);
query.push(("list-type", "2"));
- if delimiter {
- query.push(("delimiter", DELIMITER))
+ if let Some(delimiter) = &opts.delimiter {
+ query.push(("delimiter", delimiter.as_ref()))
}
- if let Some(prefix) = &prefix {
+ if let Some(prefix) = prefix {
query.push(("prefix", prefix))
}
- if let Some(page_token) = page_token {
+ if let Some(page_token) = &opts.page_token {
query.push(("continuation-token", page_token))
}
@@ -677,13 +676,20 @@ impl ListClient for Arc<GoogleCloudStorageClient> {
query.push(("max-keys", max_results))
}
- if let Some(offset) = offset {
- query.push(("start-after", offset))
+ if let Some(offset) = &opts.offset {
+ query.push(("start-after", offset.as_ref()))
+ }
+
+ let max_keys_str;
+ if let Some(max_keys) = &opts.max_keys {
+ max_keys_str = max_keys.to_string();
+ query.push(("max-keys", max_keys_str.as_ref()))
}
let response = self
.client
.request(Method::GET, url)
+ .extensions(opts.extensions)
.query(&query)
.with_bearer_auth(credential.as_deref())
.send_retry(&self.config.retry_config)
@@ -698,6 +704,9 @@ impl ListClient for Arc<GoogleCloudStorageClient> {
.map_err(|source| Error::InvalidListResponse { source })?;
let token = response.next_continuation_token.take();
- Ok((response.try_into()?, token))
+ Ok(PaginatedListResult {
+ result: response.try_into()?,
+ page_token: token,
+ })
}
}
diff --git a/src/gcp/mod.rs b/src/gcp/mod.rs
index 5f8c67d..dfd638a 100644
--- a/src/gcp/mod.rs
+++ b/src/gcp/mod.rs
@@ -52,8 +52,9 @@ use http::Method;
use url::Url;
use crate::client::get::GetClientExt;
-use crate::client::list::ListClientExt;
+use crate::client::list::{ListClient, ListClientExt};
use crate::client::parts::Parts;
+use crate::list::{PaginatedListOptions, PaginatedListResult,
PaginatedListStore};
use crate::multipart::MultipartStore;
pub use builder::{GoogleCloudStorageBuilder, GoogleConfigKey};
pub use credential::{GcpCredential, GcpSigningCredential, ServiceAccountKey};
@@ -268,6 +269,17 @@ impl Signer for GoogleCloudStorage {
}
}
+#[async_trait]
+impl PaginatedListStore for GoogleCloudStorage {
+ async fn list_paginated(
+ &self,
+ prefix: Option<&str>,
+ opts: PaginatedListOptions,
+ ) -> Result<PaginatedListResult> {
+ self.client.list_request(prefix, opts).await
+ }
+}
+
#[cfg(test)]
mod test {
@@ -299,6 +311,7 @@ mod test {
multipart(&integration, &integration).await;
multipart_race_condition(&integration, true).await;
multipart_out_of_order(&integration).await;
+ list_paginated(&integration, &integration).await;
// Fake GCS server doesn't currently honor preconditions
get_opts(&integration).await;
put_opts(&integration, true).await;
diff --git a/src/integration.rs b/src/integration.rs
index 05cd97f..73339a6 100644
--- a/src/integration.rs
+++ b/src/integration.rs
@@ -24,6 +24,7 @@
//!
//! They are intended solely for testing purposes.
+use crate::list::{PaginatedListOptions, PaginatedListStore};
use crate::multipart::MultipartStore;
use crate::path::Path;
use crate::{
@@ -35,6 +36,7 @@ use futures::stream::FuturesUnordered;
use futures::{StreamExt, TryStreamExt};
use rand::distr::Alphanumeric;
use rand::{rng, Rng};
+use std::collections::HashSet;
pub(crate) async fn flatten_list_stream(
storage: &DynObjectStore,
@@ -1230,3 +1232,92 @@ pub async fn multipart_out_of_order(storage: &dyn
ObjectStore) {
let bytes = result.bytes().await.unwrap();
assert_eq!(bytes, full);
}
+
+/// Tests [`PaginatedListStore`]
+pub async fn list_paginated(storage: &dyn ObjectStore, list: &dyn
PaginatedListStore) {
+ delete_fixtures(storage).await;
+
+ let r = list.list_paginated(None, Default::default()).await.unwrap();
+ assert_eq!(r.page_token, None);
+ assert_eq!(r.result.objects, vec![]);
+ assert_eq!(r.result.common_prefixes, vec![]);
+
+ let p1 = Path::from("foo/bar");
+ let p2 = Path::from("foo/bax");
+ let p3 = Path::from("foo/baz/bar");
+ let p4 = Path::from("foo/baz/banana");
+ let p5 = Path::from("fob/banana");
+ let p6 = Path::from("fongle/banana");
+
+ let paths = HashSet::from_iter([&p1, &p2, &p3, &p4, &p5, &p6]);
+
+ for path in &paths {
+ storage.put(path, vec![1].into()).await.unwrap();
+ }
+
+ // Test basic listing
+
+ let mut listed = HashSet::new();
+ let mut opts = PaginatedListOptions {
+ max_keys: Some(5),
+ ..Default::default()
+ };
+ let ret = list.list_paginated(None, opts.clone()).await.unwrap();
+ assert_eq!(ret.result.objects.len(), 5);
+ listed.extend(ret.result.objects.iter().map(|x| &x.location));
+
+ opts.page_token = Some(ret.page_token.unwrap());
+ let ret = list.list_paginated(None, opts.clone()).await.unwrap();
+ assert_eq!(ret.result.objects.len(), 1);
+ listed.extend(ret.result.objects.iter().map(|x| &x.location));
+
+ assert_eq!(listed, paths);
+
+ // List with prefix
+ let prefix = Some("foo/");
+ opts.page_token = None;
+ let ret = list.list_paginated(prefix, opts.clone()).await.unwrap();
+ assert_eq!(ret.result.objects.len(), 4);
+ assert!(ret.page_token.is_none());
+
+ let actual = HashSet::from_iter(ret.result.objects.iter().map(|x|
&x.location));
+ assert_eq!(actual, HashSet::<&Path>::from_iter([&p1, &p2, &p3, &p4]));
+
+ // List with partial prefix
+ let prefix = Some("fo");
+ opts.page_token = None;
+ let ret = list.list_paginated(prefix, opts.clone()).await.unwrap();
+ assert_eq!(ret.result.objects.len(), 5);
+ listed.extend(ret.result.objects.iter().map(|x| &x.location));
+
+ opts.page_token = Some(ret.page_token.unwrap());
+ let ret = list.list_paginated(prefix, opts.clone()).await.unwrap();
+ assert_eq!(ret.result.objects.len(), 1);
+ listed.extend(ret.result.objects.iter().map(|x| &x.location));
+
+ assert_eq!(listed, paths);
+
+ // List with prefix and delimiter
+ let prefix = Some("foo/");
+ opts.page_token = None;
+ opts.delimiter = Some("/".into());
+ let ret = list.list_paginated(prefix, opts.clone()).await.unwrap();
+ assert_eq!(ret.result.objects.len(), 2);
+ assert_eq!(ret.result.common_prefixes, vec![Path::from("foo/baz")]);
+ assert!(ret.page_token.is_none());
+
+ let actual = HashSet::from_iter(ret.result.objects.iter().map(|x|
&x.location));
+ assert_eq!(actual, HashSet::<&Path>::from_iter([&p1, &p2]));
+
+ // List with partial prefix and delimiter
+ let prefix = Some("fo");
+ opts.page_token = None;
+ opts.delimiter = Some("/".into());
+ let ret = list.list_paginated(prefix, opts.clone()).await.unwrap();
+ assert_eq!(ret.result.objects.len(), 0);
+ assert_eq!(
+ HashSet::<Path>::from_iter(ret.result.common_prefixes),
+ HashSet::from_iter([Path::from("foo"), Path::from("fob"),
Path::from("fongle")])
+ );
+ assert!(ret.page_token.is_none());
+}
diff --git a/src/lib.rs b/src/lib.rs
index ffca24a..4743f96 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -547,6 +547,7 @@ mod tags;
pub use tags::TagSet;
+pub mod list;
pub mod multipart;
mod parse;
mod payload;
@@ -609,6 +610,8 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync +
Debug + 'static {
///
/// Client should prefer [`ObjectStore::put`] for small payloads, as
streaming uploads
/// typically require multiple separate requests. See [`MultipartUpload`]
for more information
+ ///
+ /// For more advanced multipart uploads see
[`MultipartStore`](multipart::MultipartStore)
async fn put_multipart(&self, location: &Path) -> Result<Box<dyn
MultipartUpload>> {
self.put_multipart_opts(location, PutMultipartOpts::default())
.await
@@ -618,6 +621,8 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync +
Debug + 'static {
///
/// Client should prefer [`ObjectStore::put`] for small payloads, as
streaming uploads
/// typically require multiple separate requests. See [`MultipartUpload`]
for more information
+ ///
+ /// For more advanced multipart uploads see
[`MultipartStore`](multipart::MultipartStore)
async fn put_multipart_opts(
&self,
location: &Path,
@@ -728,6 +733,8 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync +
Debug + 'static {
/// `foo/bar_baz/x`. List is recursive, i.e. `foo/bar/more/x` will be
included.
///
/// Note: the order of returned [`ObjectMeta`] is not guaranteed
+ ///
+ /// For more advanced listing see
[`PaginatedListStore`](list::PaginatedListStore)
fn list(&self, prefix: Option<&Path>) -> BoxStream<'static,
Result<ObjectMeta>>;
/// List all the objects with the given prefix and a location greater than
`offset`
@@ -736,6 +743,8 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync +
Debug + 'static {
/// the number of network requests required
///
/// Note: the order of returned [`ObjectMeta`] is not guaranteed
+ ///
+ /// For more advanced listing see
[`PaginatedListStore`](list::PaginatedListStore)
fn list_with_offset(
&self,
prefix: Option<&Path>,
diff --git a/src/list.rs b/src/list.rs
new file mode 100644
index 0000000..e73fe52
--- /dev/null
+++ b/src/list.rs
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Paginated Listing
+
+use super::Result;
+use crate::ListResult;
+use async_trait::async_trait;
+use std::borrow::Cow;
+
+/// Options for a paginated list request
+#[derive(Debug, Default, Clone)]
+pub struct PaginatedListOptions {
+ /// Path to start listing from
+ ///
+ /// Note: Not all stores support this
+ pub offset: Option<String>,
+
+ /// A delimiter use to group keys with a common prefix
+ ///
+ /// Note: Some stores only support `/`
+ pub delimiter: Option<Cow<'static, str>>,
+
+ /// The maximum number of paths to return
+ pub max_keys: Option<usize>,
+
+ /// A page token from a previous request
+ ///
+ /// Note: Behaviour is implementation defined if the previous request
+ /// used a different prefix or options
+ pub page_token: Option<String>,
+
+ /// Implementation-specific extensions. Intended for use by implementations
+ /// that need to pass context-specific information (like tracing spans)
via trait methods.
+ ///
+ /// These extensions are ignored entirely by backends offered through this
crate.
+ pub extensions: http::Extensions,
+}
+
+/// A [`ListResult`] with optional pagination token
+#[derive(Debug)]
+pub struct PaginatedListResult {
+ /// The list result
+ pub result: ListResult,
+ /// If result set truncated, the pagination token to fetch next results
+ pub page_token: Option<String>,
+}
+
+/// A low-level interface for interacting with paginated listing APIs
+///
+/// Most use-cases should prefer [`ObjectStore::list`] as this is supported by
more
+/// backends, including [`LocalFileSystem`], however, [`PaginatedListStore`]
can be
+/// used where stateless pagination or non-path segment based listing is
required
+///
+/// [`ObjectStore::list`]: crate::ObjectStore::list
+/// [`LocalFileSystem`]: crate::local::LocalFileSystem
+#[async_trait]
+pub trait PaginatedListStore: Send + Sync + 'static {
+ /// Perform a paginated list request
+ ///
+ /// Note: the order of returned objects is not guaranteed and
+ /// unlike [`ObjectStore::list`] a trailing delimiter is not
+ /// automatically added to `prefix`
+ ///
+ /// [`ObjectStore::list`]: crate::ObjectStore::list
+ async fn list_paginated(
+ &self,
+ prefix: Option<&str>,
+ opts: PaginatedListOptions,
+ ) -> Result<PaginatedListResult>;
+}