liurenjie1024 commented on code in PR #1755:
URL: https://github.com/apache/iceberg-rust/pull/1755#discussion_r2485873820
##########
crates/iceberg/src/io/storage.rs:
##########
@@ -15,220 +15,296 @@
// specific language governing permissions and limitations
// under the License.
+//! Storage traits and implementations for Iceberg.
+//!
+//! This module provides the core storage abstraction used throughout Iceberg
Rust.
+//! Storage implementations handle reading and writing files across different
backends
+//! (S3, GCS, Azure, local filesystem, etc.).
+
+use std::collections::HashMap;
+use std::fmt::Debug;
use std::sync::Arc;
-use opendal::layers::RetryLayer;
-#[cfg(feature = "storage-azdls")]
-use opendal::services::AzdlsConfig;
-#[cfg(feature = "storage-gcs")]
-use opendal::services::GcsConfig;
-#[cfg(feature = "storage-oss")]
-use opendal::services::OssConfig;
-#[cfg(feature = "storage-s3")]
-use opendal::services::S3Config;
-use opendal::{Operator, Scheme};
-
-#[cfg(feature = "storage-azdls")]
-use super::AzureStorageScheme;
-use super::FileIOBuilder;
-#[cfg(feature = "storage-s3")]
-use crate::io::CustomAwsCredentialLoader;
-use crate::{Error, ErrorKind};
-
-/// The storage carries all supported storage services in iceberg
-#[derive(Debug)]
-pub(crate) enum Storage {
- #[cfg(feature = "storage-memory")]
- Memory(Operator),
- #[cfg(feature = "storage-fs")]
- LocalFs,
- /// Expects paths of the form `s3[a]://<bucket>/<path>`.
- #[cfg(feature = "storage-s3")]
- S3 {
- /// s3 storage could have `s3://` and `s3a://`.
- /// Storing the scheme string here to return the correct path.
- configured_scheme: String,
- config: Arc<S3Config>,
- customized_credential_load: Option<CustomAwsCredentialLoader>,
- },
- #[cfg(feature = "storage-gcs")]
- Gcs { config: Arc<GcsConfig> },
- #[cfg(feature = "storage-oss")]
- Oss { config: Arc<OssConfig> },
- /// Expects paths of the form
- /// `abfs[s]://<filesystem>@<account>.dfs.<endpoint-suffix>/<path>` or
- /// `wasb[s]://<container>@<account>.blob.<endpoint-suffix>/<path>`.
- #[cfg(feature = "storage-azdls")]
- Azdls {
- /// Because Azdls accepts multiple possible schemes, we store the full
- /// passed scheme here to later validate schemes passed via paths.
- configured_scheme: AzureStorageScheme,
- config: Arc<AzdlsConfig>,
- },
-}
+use async_trait::async_trait;
+use bytes::Bytes;
-impl Storage {
- /// Convert iceberg config to opendal config.
- pub(crate) fn build(file_io_builder: FileIOBuilder) -> crate::Result<Self>
{
- let (scheme_str, props, extensions) = file_io_builder.into_parts();
- let scheme = Self::parse_scheme(&scheme_str)?;
-
- match scheme {
- #[cfg(feature = "storage-memory")]
- Scheme::Memory => Ok(Self::Memory(super::memory_config_build()?)),
- #[cfg(feature = "storage-fs")]
- Scheme::Fs => Ok(Self::LocalFs),
- #[cfg(feature = "storage-s3")]
- Scheme::S3 => Ok(Self::S3 {
- configured_scheme: scheme_str,
- config: super::s3_config_parse(props)?.into(),
- customized_credential_load: extensions
- .get::<CustomAwsCredentialLoader>()
- .map(Arc::unwrap_or_clone),
- }),
- #[cfg(feature = "storage-gcs")]
- Scheme::Gcs => Ok(Self::Gcs {
- config: super::gcs_config_parse(props)?.into(),
- }),
- #[cfg(feature = "storage-oss")]
- Scheme::Oss => Ok(Self::Oss {
- config: super::oss_config_parse(props)?.into(),
- }),
- #[cfg(feature = "storage-azdls")]
- Scheme::Azdls => {
- let scheme = scheme_str.parse::<AzureStorageScheme>()?;
- Ok(Self::Azdls {
- config: super::azdls_config_parse(props)?.into(),
- configured_scheme: scheme,
- })
- }
- // Update doc on [`FileIO`] when adding new schemes.
- _ => Err(Error::new(
- ErrorKind::FeatureUnsupported,
- format!("Constructing file io from scheme: {scheme} not
supported now",),
- )),
- }
- }
+use super::{Extensions, FileMetadata, FileRead, FileWrite, InputFile,
OutputFile};
+use crate::Result;
+
+/// Trait for storage operations in Iceberg.
+///
+/// This trait defines the interface for all storage backends. Implementations
+/// provide access to different storage systems like S3, GCS, Azure, local
filesystem, etc.
+///
+/// # Example
+///
+/// ```rust,ignore
+/// use iceberg::io::Storage;
+///
+/// async fn example(storage: Arc<dyn Storage>) -> Result<()> {
+/// // Check if file exists
+/// if storage.exists("s3://bucket/path/file.parquet").await? {
+/// // Read file
+/// let data = storage.read("s3://bucket/path/file.parquet").await?;
+/// }
+/// Ok(())
+/// }
+/// ```
+#[async_trait]
+pub trait Storage: Debug + Send + Sync {
Review Comment:
Do you mean the `Storage` trait only or the whole storage implementation?
I'm thinking about putting them in the `iceberg-storage` crate forever, sinc
this align with our small crate pattern. `iceberg-storage` has no much
dependencies, and could be used standalone. After we split out
`iceberg-storage`, we could even more out of the core crate, like puffin format.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]