erickguan commented on code in PR #6366:
URL: https://github.com/apache/opendal/pull/6366#discussion_r2203533223


##########
core/src/layers/foyer.rs:
##########
@@ -0,0 +1,354 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    future::Future,
+    ops::{Bound, Deref, RangeBounds},
+    sync::Arc,
+};
+
+use foyer::{Code, CodeError, Error as FoyerError, HybridCache};
+
+use crate::raw::oio::*;
+use crate::raw::*;
+use crate::*;
+
+fn extract_err(e: FoyerError) -> Error {
+    let e = match e.downcast::<Error>() {
+        Ok(e) => return e,
+        Err(e) => e,
+    };
+    Error::new(ErrorKind::Unexpected, e.to_string())
+}
+
+/// [`FoyerValue`] is a wrapper around `Buffer` that implements the `Code` 
trait.
+#[derive(Debug)]
+pub struct FoyerValue(pub Buffer);
+
+impl Deref for FoyerValue {
+    type Target = Buffer;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl Code for FoyerValue {
+    fn encode(&self, writer: &mut impl std::io::Write) -> 
std::result::Result<(), CodeError> {
+        let len = self.0.len() as u64;
+        writer.write_all(&len.to_le_bytes())?;
+        std::io::copy(&mut self.0.clone(), writer)?;
+        Ok(())
+    }
+
+    fn decode(reader: &mut impl std::io::Read) -> std::result::Result<Self, 
CodeError>
+    where
+        Self: Sized,
+    {
+        let mut len_bytes = [0u8; 8];
+        reader.read_exact(&mut len_bytes)?;
+        let len = u64::from_le_bytes(len_bytes) as usize;
+        let mut buffer = vec![0u8; len];
+        reader.read_exact(&mut buffer[..len])?;
+        Ok(FoyerValue(buffer.into()))
+    }
+
+    fn estimated_size(&self) -> usize {
+        8 + self.0.len()
+    }
+}
+
+/// Hybrid cache layer for OpenDAL that uses 
[foyer](https://github.com/foyer-rs/foyer) for caching.
+///
+/// # Operation Behavior
+/// - `write`: [`FoyerLayer`] will write to the foyer hybrid cache after the 
service's write operation is completed.
+/// - `read`: [`FoyerLayer`] will first check the foyer hybrid cache for the 
data. If the data is not found, it will perform the read operation on the 
service and cache the result.
+/// - `delete`: [`FoyerLayer`] will remove the data from the foyer hybrid 
cache regardless of whether the service's delete operation is successful.
+/// - Other operations: [`FoyerLayer`] will not cache the results of other 
operations, such as `list`, `copy`, `rename`, etc. They will be passed through 
to the underlying accessor without caching.
+///
+/// # Examples
+///
+/// ```rust
+/// use opendal::layers::FoyerLayer;
+/// use opendal::services::S3;
+///
+/// ```
+#[derive(Debug)]
+pub struct FoyerLayer {
+    cache: HybridCache<String, FoyerValue>,
+}
+
+impl FoyerLayer {
+    /// Creates a new `FoyerLayer` with the given foyer hybrid cache.
+    pub fn new(cache: HybridCache<String, FoyerValue>) -> Self {
+        FoyerLayer { cache }
+    }
+}
+
+impl<A: Access> Layer<A> for FoyerLayer {
+    type LayeredAccess = FoyerAccessor<A>;
+
+    fn layer(&self, accessor: A) -> Self::LayeredAccess {
+        let cache = self.cache.clone();

Review Comment:
   Cool that foyer implement this.
   
   I found this behavior intricate. Maybe a future improvement.
   1. Cloning cache entries may or may not be a desired behavior. Better to 
offer an option.
   2. Some bindings usually fork process. I assume file cache is not fork safe 
in foyer. Language bindings will need an API for safety. The simplest is to 
copy nothing, and share nothing.



##########
core/src/layers/foyer.rs:
##########
@@ -0,0 +1,354 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    future::Future,
+    ops::{Bound, Deref, RangeBounds},
+    sync::Arc,
+};
+
+use foyer::{Code, CodeError, Error as FoyerError, HybridCache};
+
+use crate::raw::oio::*;
+use crate::raw::*;
+use crate::*;
+
+fn extract_err(e: FoyerError) -> Error {
+    let e = match e.downcast::<Error>() {
+        Ok(e) => return e,
+        Err(e) => e,
+    };
+    Error::new(ErrorKind::Unexpected, e.to_string())
+}
+
+/// [`FoyerValue`] is a wrapper around `Buffer` that implements the `Code` 
trait.
+#[derive(Debug)]
+pub struct FoyerValue(pub Buffer);
+
+impl Deref for FoyerValue {
+    type Target = Buffer;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl Code for FoyerValue {
+    fn encode(&self, writer: &mut impl std::io::Write) -> 
std::result::Result<(), CodeError> {
+        let len = self.0.len() as u64;
+        writer.write_all(&len.to_le_bytes())?;
+        std::io::copy(&mut self.0.clone(), writer)?;
+        Ok(())
+    }
+
+    fn decode(reader: &mut impl std::io::Read) -> std::result::Result<Self, 
CodeError>
+    where
+        Self: Sized,
+    {
+        let mut len_bytes = [0u8; 8];
+        reader.read_exact(&mut len_bytes)?;
+        let len = u64::from_le_bytes(len_bytes) as usize;
+        let mut buffer = vec![0u8; len];
+        reader.read_exact(&mut buffer[..len])?;
+        Ok(FoyerValue(buffer.into()))
+    }
+
+    fn estimated_size(&self) -> usize {
+        8 + self.0.len()

Review Comment:
   Minor: extract magic number



##########
core/Cargo.toml:
##########
@@ -427,6 +433,7 @@ pretty_assertions = "1"
 rand = "0.8"
 sha2 = "0.10"
 size = "0.4"
+tempfile = "3"

Review Comment:
   Can this be in dev-dependency for running tests?
   This is similar to Xuamwo's comment - we try introducing less direct 
dependency.



##########
core/src/layers/foyer.rs:
##########
@@ -0,0 +1,420 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    future::Future,
+    ops::{Bound, Deref, RangeBounds},
+    sync::Arc,
+};
+
+use foyer::{Code, CodeError, Error as FoyerError, HybridCache};
+
+use crate::{
+    raw::{
+        oio::{self, QueueBuf, Read as _},
+        Access, AccessorInfo, BytesContentRange, BytesRange, Layer, 
LayeredAccess, MaybeSend,
+        OpCopy, OpCreateDir, OpDelete, OpList, OpPresign, OpRead, OpRename, 
OpStat, OpWrite,
+        RpCopy, RpCreateDir, RpDelete, RpList, RpPresign, RpRead, RpRename, 
RpStat, RpWrite,
+    },
+    Buffer, Error, ErrorKind, Metadata, Result,
+};
+
+fn extract_err(e: FoyerError) -> Error {
+    let e = match e.downcast::<Error>() {
+        Ok(e) => return e,
+        Err(e) => e,
+    };
+    Error::new(ErrorKind::Unexpected, e.to_string())
+}
+
+#[derive(Debug)]
+pub struct Value(Buffer);
+
+impl Deref for Value {
+    type Target = Buffer;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl From<Buffer> for Value {
+    fn from(buf: Buffer) -> Self {
+        Value(buf)
+    }
+}
+
+impl Code for Value {
+    fn encode(&self, writer: &mut impl std::io::Write) -> 
std::result::Result<(), CodeError> {
+        let len = self.0.len() as u64;
+        writer.write_all(&len.to_le_bytes())?;
+        std::io::copy(&mut self.0.clone(), writer)?;
+        Ok(())
+    }
+
+    fn decode(reader: &mut impl std::io::Read) -> std::result::Result<Self, 
CodeError>
+    where
+        Self: Sized,
+    {
+        let mut len_bytes = [0u8; 8];
+        reader.read_exact(&mut len_bytes)?;
+        let len = u64::from_le_bytes(len_bytes) as usize;
+        let mut buffer = vec![0u8; len];
+        reader.read_exact(&mut buffer[..len])?;
+        Ok(Value(buffer.into()))
+    }
+
+    fn estimated_size(&self) -> usize {
+        8 + self.0.len()
+    }
+}
+
+/// Hybrid cache layer for OpenDAL that uses 
[foyer](https://github.com/foyer-rs/foyer) for caching.
+///
+/// # Operation Behavior
+/// - `write`: [`FoyerLayer`] will write to the foyer hybrid cache after the 
service's write operation is completed.
+/// - `read`: [`FoyerLayer`] will first check the foyer hybrid cache for the 
data. If the data is not found, it will perform the read operation on the 
service and cache the result.
+/// - `delete`: [`FoyerLayer`] will remove the data from the foyer hybrid 
cache regardless of whether the service's delete operation is successful.
+/// - Other operations: [`FoyerLayer`] will not cache the results of other 
operations, such as `list`, `copy`, `rename`, etc. They will be passed through 
to the underlying accessor without caching.
+///
+/// # Examples
+///
+/// ```rust
+/// use opendal::layers::FoyerLayer;
+/// use opendal::services::S3;
+///
+/// ```
+#[derive(Debug)]
+pub struct FoyerLayer {
+    cache: HybridCache<String, Value>,
+}
+
+impl FoyerLayer {
+    /// Creates a new `FoyerLayer` with the given foyer hybrid cache.
+    pub fn new(cache: HybridCache<String, Value>) -> Self {
+        FoyerLayer { cache }
+    }
+}
+
+impl From<HybridCache<String, Value>> for FoyerLayer {
+    fn from(cache: HybridCache<String, Value>) -> Self {
+        Self::new(cache)
+    }
+}
+
+impl<A: Access> Layer<A> for FoyerLayer {
+    type LayeredAccess = FoyerAccessor<A>;
+
+    fn layer(&self, accessor: A) -> Self::LayeredAccess {
+        let cache = self.cache.clone();
+        FoyerAccessor {
+            inner: Arc::new(Inner { accessor, cache }),
+        }
+    }
+}
+
+#[derive(Debug)]
+struct Inner<A: Access> {
+    accessor: A,
+    cache: HybridCache<String, Value>,
+}
+
+#[derive(Debug)]
+pub struct FoyerAccessor<A: Access> {
+    inner: Arc<Inner<A>>,
+}
+
+impl<A: Access> LayeredAccess for FoyerAccessor<A> {
+    type Inner = A;
+    type Reader = Buffer;
+    type Writer = Writer<A>;
+    type Lister = A::Lister;
+    type Deleter = Deleter<A>;
+
+    fn inner(&self) -> &Self::Inner {
+        &self.inner.accessor
+    }
+
+    fn read(
+        &self,
+        path: &str,
+        args: OpRead,
+    ) -> impl Future<Output = Result<(RpRead, Self::Reader)>> + MaybeSend {
+        let r = args.range();
+        let path = path.to_string();
+        async move {
+            let entry = self
+                .inner
+                .cache
+                .fetch(path.clone(), || {
+                    let inner = self.inner.clone();
+                    async move {
+                        let (_, mut reader) = inner
+                            .accessor
+                            .read(&path, args.with_range(BytesRange::new(0, 
None)))
+                            .await
+                            .map_err(FoyerError::other)?;
+                        let buffer = 
reader.read_all().await.map_err(FoyerError::other)?;
+                        Ok(buffer.into())
+                    }
+                })
+                .await
+                .map_err(extract_err)?;
+
+            let r = r.to_range();
+            let start = match r.start_bound() {
+                Bound::Included(i) => *i,
+                Bound::Excluded(i) => *i + 1,
+                Bound::Unbounded => 0,
+            };
+            let end = match r.end_bound() {
+                Bound::Included(i) => *i + 1,
+                Bound::Excluded(i) => *i,
+                Bound::Unbounded => entry.len() as u64,
+            };
+            let range = BytesContentRange::default()
+                .with_range(start, end - 1)
+                .with_size(entry.len() as _);
+            let buffer = entry.slice(start as usize..end as usize);

Review Comment:
   Maybe add a comment as a reminder? Up to you.



##########
core/Cargo.toml:
##########
@@ -378,6 +380,10 @@ web-sys = { version = "0.3.77", optional = true, features 
= [
   "StorageManager",
   "FileSystemGetFileOptions",
 ] }
+# for services-foyer
+# TODO(MrCroxx): Switch to a released version after testing with OpenDAL 
integration.

Review Comment:
   👏 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@opendal.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to