kevinjqliu commented on code in PR #732: URL: https://github.com/apache/arrow-rs-object-store/pull/732#discussion_r3383215524
########## src/capabilities.rs: ########## @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Capability advertisement for [`ObjectStore`](crate::ObjectStore) implementations. +//! +//! See [`Capabilities`] and [`Capability`] for details. +use crate::Error; +use std::collections::HashSet; + +const ORDERED_LISTING: &str = "ordered-listing"; + +/// An individual capability that an [`ObjectStore`] implementation may support. +/// +/// Used together with [`Capabilities`] to advertise optional backend features. +/// Obtain the set of supported capabilities via [`ObjectStore::capabilities`]. +#[derive(Hash, Eq, PartialEq, Copy, Clone, Debug)] +pub enum Capability { + /// List results from [`ObjectStore::list`] and + /// [`ObjectStore::list_with_offset`] are returned in ascending + /// lexicographic order by [`Path`]. + /// + /// When this capability is present, callers may rely on the ordering and + /// avoid buffering all results solely for sorting purposes. + OrderedListing, +} + +impl std::str::FromStr for Capability { + type Err = Error; + + /// Parses a capability from its kebab-case string representation. + /// + /// Returns `None` if `s` does not correspond to any known capability. + fn from_str(s: &str) -> Result<Self, Self::Err> { + match s { + ORDERED_LISTING => Ok(Self::OrderedListing), + cap => Err(Error::Generic { + store: "Config", + source: format!("invalid capability: {cap}").into(), + }), + } + } +} + +impl std::fmt::Display for Capability { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::OrderedListing => write!(f, "{}", ORDERED_LISTING), + } + } +} + +/// Optional features supported by an [`ObjectStore`] implementation. +/// +/// Obtain the capabilities of a store by calling [`ObjectStore::capabilities`]. +/// All fields default to `false`; a store sets a field to `true` when it +/// natively supports that feature. Review Comment: nit: set false/true is confusing. maybe something like "has the capability" ########## src/lib.rs: ########## @@ -1131,6 +1133,14 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static { self.delete(from).await?; Ok(()) } + + /// Return the [`Capabilities`] supported by this store. + /// + /// All capability fields default to `false`. Individual store Review Comment: nit: empty instead of `false`? ########## src/aws/builder.rs: ########## @@ -1071,6 +1084,17 @@ impl AmazonS3Builder { self } + /// Override the [`Capabilities`] advertised by this store. + /// + /// By default the store reports `ordered_listing: true` because S3 Review Comment: ```suggestion /// By default the store reports `ordered-listing: true` because S3 ``` ########## src/integration.rs: ########## @@ -398,6 +399,18 @@ pub async fn put_get_delete_list(storage: &DynObjectStore) { assert_eq!(actual, expected, "{prefix:?} - {offset:?}"); } + if storage.capabilities().has(Capability::OrderedListing) { + let actual: Vec<_> = storage + .list(None) + .map_ok(|x| x.location) + .try_collect::<Vec<_>>() + .await + .unwrap(); + let mut sorted_files = files.clone(); + sorted_files.sort(); + assert_eq!(actual, sorted_files); + } Review Comment: could we test `list_with_offset` here as well? its explicitly called out in the OrderedListing docs ########## src/aws/builder.rs: ########## @@ -1071,6 +1084,17 @@ impl AmazonS3Builder { self } + /// Override the [`Capabilities`] advertised by this store. + /// + /// By default the store reports `ordered_listing: true` because S3 + /// `ListObjectsV2` returns results in lexicographic order. Use this + /// method if you are connecting to an S3-compatible endpoint whose + /// behaviour differs from the standard S3 API. Review Comment: ```suggestion ``` ordering_listing capability is not set for aws. also for this and other builders, would be good to move the documentation of capabilities away from `with_capabilities` and perhaps into `get_default_capabilities` ########## src/capabilities.rs: ########## @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Capability advertisement for [`ObjectStore`](crate::ObjectStore) implementations. +//! +//! See [`Capabilities`] and [`Capability`] for details. +use crate::Error; +use std::collections::HashSet; + +const ORDERED_LISTING: &str = "ordered-listing"; + +/// An individual capability that an [`ObjectStore`] implementation may support. +/// +/// Used together with [`Capabilities`] to advertise optional backend features. +/// Obtain the set of supported capabilities via [`ObjectStore::capabilities`]. +#[derive(Hash, Eq, PartialEq, Copy, Clone, Debug)] +pub enum Capability { + /// List results from [`ObjectStore::list`] and + /// [`ObjectStore::list_with_offset`] are returned in ascending + /// lexicographic order by [`Path`]. + /// + /// When this capability is present, callers may rely on the ordering and + /// avoid buffering all results solely for sorting purposes. + OrderedListing, +} + +impl std::str::FromStr for Capability { + type Err = Error; + + /// Parses a capability from its kebab-case string representation. + /// + /// Returns `None` if `s` does not correspond to any known capability. + fn from_str(s: &str) -> Result<Self, Self::Err> { + match s { + ORDERED_LISTING => Ok(Self::OrderedListing), + cap => Err(Error::Generic { + store: "Config", + source: format!("invalid capability: {cap}").into(), + }), + } + } +} + +impl std::fmt::Display for Capability { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::OrderedListing => write!(f, "{}", ORDERED_LISTING), + } + } +} + +/// Optional features supported by an [`ObjectStore`] implementation. +/// +/// Obtain the capabilities of a store by calling [`ObjectStore::capabilities`]. +/// All fields default to `false`; a store sets a field to `true` when it +/// natively supports that feature. +/// +/// The struct is `#[non_exhaustive]` so that new capability flags can be added Review Comment: The `Capabilities` docs say the struct is `#[non_exhaustive]`, but the attribute is not present. `Capability` is a public exhaustive enum. Since the motivation for this API includes future capabilities like negative ranges, adding a new variant later would be a semver concern. Could we either mark the enum `#[non_exhaustive]`, add a `Custom(...)` variant, or use a string-like capability representation? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
