jorgehermo9 commented on code in PR #6366:
URL: https://github.com/apache/opendal/pull/6366#discussion_r2661588304


##########
core/layers/foyer/src/lib.rs:
##########
@@ -0,0 +1,509 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    future::Future,
+    ops::{Bound, Deref, Range, RangeBounds},
+    sync::Arc,
+};
+
+use foyer::{Code, CodeError, Error as FoyerError, HybridCache};
+
+use opendal_core::raw::oio::*;
+use opendal_core::raw::*;
+use opendal_core::*;
+
+fn extract_err(e: FoyerError) -> Error {
+    let e = match e.downcast::<Error>() {
+        Ok(e) => return e,
+        Err(e) => e,
+    };
+    Error::new(ErrorKind::Unexpected, e.to_string())
+}
+
+/// [`FoyerKey`] is a key for the foyer cache. It's encoded via bincode, which 
is
+/// backed by foyer's "serde" feature.
+///
+/// It's possible to specify a version in the [`OpRead`] args:
+///
+/// - If a version is given, the object is cached under that versioned key.  
+/// - If version is not supplied, the object is cached exactly as returned by 
the backend,
+///   We do NOT interpret `None` as "latest" and we do not promote it to any 
other version.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, 
serde::Deserialize)]
+pub struct FoyerKey {
+    pub path: String,
+    pub version: Option<String>,
+}
+
+/// [`FoyerValue`] is a wrapper around `Buffer` that implements the `Code` 
trait.
+#[derive(Debug)]
+pub struct FoyerValue(pub Buffer);
+
+impl Deref for FoyerValue {
+    type Target = Buffer;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl Code for FoyerValue {
+    fn encode(&self, writer: &mut impl std::io::Write) -> 
std::result::Result<(), CodeError> {
+        let len = self.0.len() as u64;
+        writer.write_all(&len.to_le_bytes())?;
+        std::io::copy(&mut self.0.clone(), writer)?;
+        Ok(())
+    }
+
+    fn decode(reader: &mut impl std::io::Read) -> std::result::Result<Self, 
CodeError>
+    where
+        Self: Sized,
+    {
+        let mut len_bytes = [0u8; 8];
+        reader.read_exact(&mut len_bytes)?;
+        let len = u64::from_le_bytes(len_bytes) as usize;
+        let mut buffer = vec![0u8; len];
+        reader.read_exact(&mut buffer[..len])?;
+        Ok(FoyerValue(buffer.into()))
+    }
+
+    fn estimated_size(&self) -> usize {
+        8 + self.0.len()
+    }
+}
+
+/// Hybrid cache layer for OpenDAL that uses 
[foyer](https://github.com/foyer-rs/foyer) for caching.
+///
+/// # Operation Behavior
+/// - `write`: [`FoyerLayer`] will write to the foyer hybrid cache after the 
service's write operation is completed.
+/// - `read`: [`FoyerLayer`] will first check the foyer hybrid cache for the 
data. If the data is not found, it will perform the read operation on the 
service and cache the result.
+/// - `delete`: [`FoyerLayer`] will remove the data from the foyer hybrid 
cache regardless of whether the service's delete operation is successful.
+/// - Other operations: [`FoyerLayer`] will not cache the results of other 
operations, such as `list`, `copy`, `rename`, etc. They will be passed through 
to the underlying accessor without caching.
+///
+/// # Examples
+///
+/// ```no_run
+/// use opendal_core::{Operator, services::Memory};
+/// use opendal_layer_foyer::FoyerLayer;
+/// use foyer::{HybridCacheBuilder, Engine};
+///
+/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
+/// let cache = HybridCacheBuilder::new()
+///     .memory(64 * 1024 * 1024) // 64MB memory cache
+///     .with_shards(4)
+///     .storage(Engine::Large(Default::default()))
+///     .build()
+///     .await?;
+///
+/// let op = Operator::new(Memory::default())?
+///     .layer(FoyerLayer::new(cache))
+///     .finish();
+/// # Ok(())
+/// # }
+/// ```
+///
+/// # Note
+///
+/// If the object version is enabled, the foyer cache layer will treat the 
objects with same key but different versions as different objects.
+#[derive(Debug)]
+pub struct FoyerLayer {
+    cache: HybridCache<FoyerKey, FoyerValue>,
+    size_limit: Range<usize>,
+}
+
+impl FoyerLayer {
+    /// Creates a new `FoyerLayer` with the given foyer hybrid cache.
+    pub fn new(cache: HybridCache<FoyerKey, FoyerValue>) -> Self {
+        FoyerLayer {
+            cache,
+            size_limit: 0..usize::MAX,
+        }
+    }
+
+    /// Sets the size limit for caching.
+    ///
+    /// It is recommended to set a size limit to avoid caching large files 
that may not be suitable for caching.
+    pub fn with_size_limit<R: RangeBounds<usize>>(mut self, size_limit: R) -> 
Self {
+        let start = match size_limit.start_bound() {
+            Bound::Included(v) => *v,
+            Bound::Excluded(v) => *v + 1,
+            Bound::Unbounded => 0,
+        };
+        let end = match size_limit.end_bound() {
+            Bound::Included(v) => *v + 1,
+            Bound::Excluded(v) => *v,
+            Bound::Unbounded => usize::MAX,
+        };
+        self.size_limit = start..end;
+        self
+    }
+}
+
+impl<A: Access> Layer<A> for FoyerLayer {
+    type LayeredAccess = FoyerAccessor<A>;
+
+    fn layer(&self, accessor: A) -> Self::LayeredAccess {
+        let cache = self.cache.clone();
+        FoyerAccessor {
+            inner: Arc::new(Inner {
+                accessor,
+                cache,
+                size_limit: self.size_limit.clone(),
+            }),
+        }
+    }
+}
+
+#[derive(Debug)]
+struct Inner<A: Access> {
+    accessor: A,
+    cache: HybridCache<FoyerKey, FoyerValue>,
+    size_limit: Range<usize>,
+}
+
+#[derive(Debug)]
+pub struct FoyerAccessor<A: Access> {
+    inner: Arc<Inner<A>>,
+}
+
+impl<A: Access> LayeredAccess for FoyerAccessor<A> {
+    type Inner = A;
+    type Reader = Buffer;
+    type Writer = Writer<A>;
+    type Lister = A::Lister;
+    type Deleter = Deleter<A>;
+
+    fn inner(&self) -> &Self::Inner {
+        &self.inner.accessor
+    }
+
+    fn info(&self) -> Arc<AccessorInfo> {
+        self.inner.accessor.info()
+    }
+
+    async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, 
Self::Reader)> {
+        let path = path.to_string();
+        let version = args.version().map(|v| v.to_string());
+
+        // Extract range bounds before async block to avoid lifetime issues
+        let (range_start, range_end) = {
+            let r = args.range();
+            let start = r.offset();
+            let end = r.size().map(|size| start + size);
+            (start, end)
+        };
+
+        let entry = self
+            .inner
+            .cache
+            .fetch(

Review Comment:
   Hmmm, Does this have the logic of "do not buffer things bigger than 
`size_limit`"? I see the `write` method has it, but we shouldn't also cache 
things bigger than `size_limit` when reading, right?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to