This is an automated email from the ASF dual-hosted git repository. koushiro pushed a commit to branch add-cache-layer in repository https://gitbox.apache.org/repos/asf/opendal.git
commit 3a9427cd12a89e47a4188c7cbd9ca8538cb12d45 Author: koushiro <[email protected]> AuthorDate: Sat Dec 27 20:31:40 2025 +0800 feat(layers): add cache layer --- core/Cargo.lock | 9 + core/Cargo.toml | 2 + core/layers/cache/Cargo.toml | 40 +++++ core/layers/cache/src/lib.rs | 399 +++++++++++++++++++++++++++++++++++++++++++ core/src/lib.rs | 2 + 5 files changed, 452 insertions(+) diff --git a/core/Cargo.lock b/core/Cargo.lock index ff85dd83b..48b86fcee 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -5549,6 +5549,7 @@ dependencies = [ "opendal-core", "opendal-layer-async-backtrace", "opendal-layer-await-tree", + "opendal-layer-cache", "opendal-layer-capability-check", "opendal-layer-chaos", "opendal-layer-concurrent-limit", @@ -5750,6 +5751,14 @@ dependencies = [ "opendal-core", ] +[[package]] +name = "opendal-layer-cache" +version = "0.55.0" +dependencies = [ + "bytes", + "opendal-core", +] + [[package]] name = "opendal-layer-capability-check" version = "0.55.0" diff --git a/core/Cargo.toml b/core/Cargo.toml index 013261659..53a0ae69e 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -92,6 +92,7 @@ internal-path-cache = ["opendal-core/internal-path-cache"] internal-tokio-rt = ["opendal-core/internal-tokio-rt"] layers-async-backtrace = ["dep:opendal-layer-async-backtrace"] layers-await-tree = ["dep:opendal-layer-await-tree"] +layers-cache = ["dep:opendal-layer-cache"] layers-capability-check = ["dep:opendal-layer-capability-check"] layers-chaos = ["dep:opendal-layer-chaos"] layers-concurrent-limit = ["dep:opendal-layer-concurrent-limit"] @@ -205,6 +206,7 @@ ctor = { workspace = true } opendal-core = { path = "core", version = "0.55.0", default-features = false } opendal-layer-async-backtrace = { path = "layers/async-backtrace", version = "0.55.0", optional = true, default-features = false } opendal-layer-await-tree = { path = "layers/await-tree", version = "0.55.0", optional = true, default-features = false } +opendal-layer-cache = { path = "layers/cache", version = "0.55.0", optional = true, default-features = false } opendal-layer-capability-check = { path = "layers/capability-check", version = "0.55.0", optional = true, default-features = false } opendal-layer-chaos = { path = "layers/chaos", version = "0.55.0", optional = true, default-features = false } opendal-layer-concurrent-limit = { path = "layers/concurrent-limit", version = "0.55.0", optional = true, default-features = false } diff --git a/core/layers/cache/Cargo.toml b/core/layers/cache/Cargo.toml new file mode 100644 index 000000000..369494a8d --- /dev/null +++ b/core/layers/cache/Cargo.toml @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +description = "Apache OpenDAL cache layer" +name = "opendal-layer-cache" + +authors = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +license = { workspace = true } +repository = { workspace = true } +rust-version = { workspace = true } +version = { workspace = true } + +[package.metadata.docs.rs] +all-features = true + +[dependencies] +bytes = { workspace = true } +opendal-core = { path = "../../core", version = "0.55.0", default-features = false } + +[dev-dependencies] +# log = { workspace = true } +# opendal-core = { path = "../../core", version = "0.55.0" } +# tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } diff --git a/core/layers/cache/src/lib.rs b/core/layers/cache/src/lib.rs new file mode 100644 index 000000000..d28cf4d63 --- /dev/null +++ b/core/layers/cache/src/lib.rs @@ -0,0 +1,399 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Cache layer implementation for Apache OpenDAL. + +#![cfg_attr(docsrs, feature(doc_cfg))] +#![deny(missing_docs)] + +use std::fmt::Debug; +use std::sync::Arc; + +use bytes::BytesMut; +use opendal_core::raw::*; +use opendal_core::*; + +/// `CacheService` defines the backing storage interface for [`CacheLayer`]. +/// It should behave like a simple object store: get/set bytes by key and +/// expose lightweight metadata for existence checks. +pub trait CacheService: Clone + Send + Sync + 'static { + /// Identifier of the cache backend, used mainly for logging and debugging. + fn scheme(&self) -> &'static str; + + /// Read cached content by `key`. Returns `Ok(None)` on cache miss instead of `NotFound`. + fn read(&self, key: &str) -> impl Future<Output = Result<Option<Buffer>>> + MaybeSend; + + /// Write full bytes for `key`, replacing any existing value. + fn write(&self, key: &str, value: Vec<u8>) -> impl Future<Output = Result<()>> + MaybeSend; + + /// Fetch metadata for `key`. Should return [`ErrorKind::NotFound`] on miss. + fn stat(&self, key: &str) -> impl Future<Output = Result<Metadata>> + MaybeSend; + + /// Check whether `key` exists in the cache. + fn exists(&self, key: &str) -> impl Future<Output = Result<bool>> + MaybeSend; +} + +impl CacheService for Operator { + fn scheme(&self) -> &'static str { + self.info().scheme() + } + + async fn read(&self, key: &str) -> Result<Option<Buffer>> { + let r = Operator::read(self, key).await; + match r { + Ok(r) => Ok(Some(r)), + Err(err) => match err.kind() { + ErrorKind::NotFound => Ok(None), + _ => Err(err), + }, + } + } + + async fn write(&self, key: &str, value: Vec<u8>) -> Result<()> { + Operator::write(self, key, value).await.map(|_| ()) + } + + async fn stat(&self, key: &str) -> Result<Metadata> { + Operator::stat(self, key).await + } + + async fn exists(&self, key: &str) -> Result<bool> { + Operator::exists(self, key).await + } +} + +#[derive(Clone, Debug)] +struct CacheOptions { + /// Enable cache lookups before hitting the inner service. + read: bool, + /// Promote data read from the inner service into the cache (read-through fill). + read_promotion: bool, + /// Write-through caching for data written to the inner service. + write: bool, +} + +impl Default for CacheOptions { + fn default() -> Self { + Self { + read: true, + read_promotion: true, + write: true, + } + } +} + +/// Cache layer that wraps an `Access` with a [`CacheService`]. +/// +/// The cache service can be any OpenDAL [`Operator`], allowing reuse of existing services as a +/// cache backend. Provides read-through (cache lookup), read-promotion (populate cache after +/// misses), and write-through caching behaviors. +#[derive(Clone)] +pub struct CacheLayer<S> { + service: Arc<S>, + options: CacheOptions, +} + +impl<S> CacheLayer<S> { + /// Create a new [`CacheLayer`] using the given cache service with default options. + pub fn new(inner: S) -> Self { + Self { + service: Arc::new(inner), + options: CacheOptions::default(), + } + } + + /// Enable/disable read-through caching. + /// When disabled, reads bypass the cache entirely. + pub fn with_cache_read(mut self, enabled: bool) -> Self { + self.options.read = enabled; + self + } + + /// Enable/disable cache promotion during read operations. + /// When disabled, data fetched from the inner service on a miss will not be stored back into the cache. + pub fn with_cache_read_promotion(mut self, enabled: bool) -> Self { + self.options.read_promotion = enabled; + self + } + + /// Enable/disable write-through caching. + /// When enabled, bytes written to the inner service are also stored into the cache. + pub fn with_cache_write(mut self, enabled: bool) -> Self { + self.options.write = enabled; + self + } +} + +impl<A: Access, S: CacheService> Layer<A> for CacheLayer<S> { + type LayeredAccess = CacheAccessor<A, S>; + + fn layer(&self, inner: A) -> Self::LayeredAccess { + CacheAccessor { + inner, + cache_service: self.service.clone(), + cache_options: self.options.clone(), + } + } +} + +#[doc(hidden)] +pub struct CacheAccessor<A, S> { + inner: A, + cache_service: Arc<S>, + cache_options: CacheOptions, +} + +impl<A: Debug, S: CacheService> Debug for CacheAccessor<A, S> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CacheAccessor") + .field("inner", &self.inner) + .field("cache_scheme", &self.cache_service.scheme()) + .field("cache_options", &self.cache_options) + .finish() + } +} + +impl<A: Access, S: CacheService> LayeredAccess for CacheAccessor<A, S> { + type Inner = A; + type Reader = CacheReader<A::Reader, S>; + type Writer = CacheWriter<A::Writer, S>; + type Lister = A::Lister; + type Deleter = A::Deleter; + + fn inner(&self) -> &Self::Inner { + &self.inner + } + + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + let cache_key = path.to_owned(); + + // Try cache first if read caching is enabled + if self.cache_options.read { + match self.cache_service.read(&cache_key).await { + Ok(Some(cached_data)) => { + // Cache hit + return Ok((RpRead::new(), CacheReader::from_buffer(cached_data))); + } + Ok(None) => { /* Cache miss, continue to underlying service */ } + Err(_) => { /* Cache error, continue to underlying service */ } + } + } + + // Query underlying service + let (rp, reader) = self.inner.read(path, args).await?; + + // Create a reader that will cache data as it's read + Ok(( + rp, + CacheReader::new( + reader, + self.cache_service.clone(), + cache_key, + self.cache_options.read_promotion, + ), + )) + } + + async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { + let cache_key = path.to_owned(); + + // Always try to write to underlying storage first + let (rp, writer) = self.inner.write(path, args).await?; + + // Create a writer that will cache data as it's written + Ok(( + rp, + CacheWriter::new( + writer, + self.cache_service.clone(), + cache_key, + self.cache_options.write, + ), + )) + } + + async fn stat(&self, path: &str, args: OpStat) -> Result<RpStat> { + let cache_key = &path; + + // Check cache first if read caching is enabled + if self.cache_options.read { + match self.cache_service.stat(cache_key).await { + Ok(metadata) => { + // Cache hit - key exists in cache service + return Ok(RpStat::new(metadata)); + } + Err(_) => { /* Cache miss, continue to underlying service */ } + } + } + + // Fallback to underlying service + self.inner.stat(path, args).await + } + + async fn delete(&self) -> Result<(RpDelete, Self::Deleter)> { + self.inner.delete().await + } + + async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> { + // For list operations, we typically don't cache results + // as they can be large and change frequently + self.inner.list(path, args).await + } +} + +/// Reader that caches data as it reads from the underlying service. +#[doc(hidden)] +pub enum CacheReader<R, S> { + /// Reader backed by cached data + Cached { data: Buffer, pos: usize }, + /// Reader that reads from underlying service and caches the data + Uncached { + inner: R, + cache_service: Arc<S>, + cache_key: String, + cache_read_promotion: bool, + buffer: BytesMut, + }, +} + +impl<R, S> CacheReader<R, S> { + /// Create a new cache reader from cached data. + fn from_buffer(data: Buffer) -> Self { + Self::Cached { data, pos: 0 } + } + + /// Create a new cache reader that will read from underlying service. + fn new(inner: R, cache_service: Arc<S>, cache_key: String, cache_read_promotion: bool) -> Self { + Self::Uncached { + inner, + cache_service, + cache_key, + cache_read_promotion, + buffer: BytesMut::new(), + } + } +} + +impl<R: oio::Read, S: CacheService> oio::Read for CacheReader<R, S> { + async fn read(&mut self) -> Result<Buffer> { + match self { + Self::Cached { data, pos } => { + if *pos >= data.len() { + return Ok(Buffer::new()); + } + + let remaining = data.slice(*pos..); + *pos = data.len(); + Ok(remaining) + } + Self::Uncached { + inner, + cache_service, + cache_key, + cache_read_promotion, + buffer, + } => { + let chunk = inner.read().await?; + + if chunk.is_empty() { + // EOF reached, cache the complete data if read promotion is enabled + if *cache_read_promotion && !buffer.is_empty() { + let cached_data = buffer.to_vec(); + let _ = cache_service.write(cache_key, cached_data).await; + } + return Ok(Buffer::new()); + } + + // Accumulate data for caching + if *cache_read_promotion { + buffer.extend_from_slice(&chunk.to_bytes()); + } + + Ok(chunk) + } + } + } +} + +/// Writer that caches data as it writes to the underlying service +#[doc(hidden)] +pub struct CacheWriter<W, S> { + inner: W, + cache_service: Arc<S>, + cache_key: String, + cache_write: bool, + buffer: BytesMut, +} + +impl<W, S> CacheWriter<W, S> { + fn new(inner: W, cache_service: Arc<S>, cache_key: String, cache_write: bool) -> Self { + Self { + inner, + cache_service, + cache_key, + cache_write, + buffer: BytesMut::new(), + } + } +} + +impl<W: oio::Write, S: CacheService> oio::Write for CacheWriter<W, S> { + async fn write(&mut self, bs: Buffer) -> Result<()> { + // Always write to underlying service first + self.inner.write(bs.clone()).await?; + + // Accumulate data for potential caching if `cache_write` is enabled + if self.cache_write { + self.buffer.extend_from_slice(&bs.to_bytes()); + } + + Ok(()) + } + + async fn close(&mut self) -> Result<Metadata> { + // Finalize the underlying writer + match self.inner.close().await { + Ok(metadata) => { + // Cache the complete data if `cache_write` is enabled + if self.cache_write && !self.buffer.is_empty() { + // Cache errors don't fail the write operation + let _ = self + .cache_service + .write(&self.cache_key, self.buffer.to_vec()) + .await; + } + + self.buffer.clear(); + Ok(metadata) + } + Err(err) => { + self.buffer.clear(); + Err(err) + } + } + } + + async fn abort(&mut self) -> Result<()> { + // No need to cache anything since the write operation was aborted + self.buffer.clear(); + + // Abort underlying writer + self.inner.abort().await?; + + Ok(()) + } +} diff --git a/core/src/lib.rs b/core/src/lib.rs index da5589d77..0ac2a150a 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -373,6 +373,8 @@ pub mod layers { pub use opendal_layer_async_backtrace::*; #[cfg(feature = "layers-await-tree")] pub use opendal_layer_await_tree::*; + #[cfg(feature = "layers-cache")] + pub use opendal_layer_cache::*; #[cfg(feature = "layers-capability-check")] pub use opendal_layer_capability_check::*; #[cfg(feature = "layers-chaos")]
