MrCroxx commented on code in PR #6366: URL: https://github.com/apache/opendal/pull/6366#discussion_r2203147700
########## core/src/layers/foyer.rs: ########## @@ -0,0 +1,420 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + future::Future, + ops::{Bound, Deref, RangeBounds}, + sync::Arc, +}; + +use foyer::{Code, CodeError, Error as FoyerError, HybridCache}; + +use crate::{ + raw::{ + oio::{self, QueueBuf, Read as _}, + Access, AccessorInfo, BytesContentRange, BytesRange, Layer, LayeredAccess, MaybeSend, + OpCopy, OpCreateDir, OpDelete, OpList, OpPresign, OpRead, OpRename, OpStat, OpWrite, + RpCopy, RpCreateDir, RpDelete, RpList, RpPresign, RpRead, RpRename, RpStat, RpWrite, + }, + Buffer, Error, ErrorKind, Metadata, Result, +}; + +fn extract_err(e: FoyerError) -> Error { + let e = match e.downcast::<Error>() { + Ok(e) => return e, + Err(e) => e, + }; + Error::new(ErrorKind::Unexpected, e.to_string()) +} + +#[derive(Debug)] +pub struct Value(Buffer); + +impl Deref for Value { + type Target = Buffer; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl From<Buffer> for Value { + fn from(buf: Buffer) -> Self { + Value(buf) + } +} + +impl Code for Value { + fn encode(&self, writer: &mut impl std::io::Write) -> std::result::Result<(), CodeError> { + let len = self.0.len() as u64; + writer.write_all(&len.to_le_bytes())?; + std::io::copy(&mut self.0.clone(), writer)?; + Ok(()) + } + + fn decode(reader: &mut impl std::io::Read) -> std::result::Result<Self, CodeError> + where + Self: Sized, + { + let mut len_bytes = [0u8; 8]; + reader.read_exact(&mut len_bytes)?; + let len = u64::from_le_bytes(len_bytes) as usize; + let mut buffer = vec![0u8; len]; + reader.read_exact(&mut buffer[..len])?; + Ok(Value(buffer.into())) + } + + fn estimated_size(&self) -> usize { + 8 + self.0.len() + } +} + +/// Hybrid cache layer for OpenDAL that uses [foyer](https://github.com/foyer-rs/foyer) for caching. +/// +/// # Operation Behavior +/// - `write`: [`FoyerLayer`] will write to the foyer hybrid cache after the service's write operation is completed. +/// - `read`: [`FoyerLayer`] will first check the foyer hybrid cache for the data. If the data is not found, it will perform the read operation on the service and cache the result. +/// - `delete`: [`FoyerLayer`] will remove the data from the foyer hybrid cache regardless of whether the service's delete operation is successful. +/// - Other operations: [`FoyerLayer`] will not cache the results of other operations, such as `list`, `copy`, `rename`, etc. They will be passed through to the underlying accessor without caching. +/// +/// # Examples +/// +/// ```rust +/// use opendal::layers::FoyerLayer; +/// use opendal::services::S3; +/// +/// ``` +#[derive(Debug)] +pub struct FoyerLayer { + cache: HybridCache<String, Value>, +} + +impl FoyerLayer { + /// Creates a new `FoyerLayer` with the given foyer hybrid cache. + pub fn new(cache: HybridCache<String, Value>) -> Self { + FoyerLayer { cache } + } +} + +impl From<HybridCache<String, Value>> for FoyerLayer { + fn from(cache: HybridCache<String, Value>) -> Self { + Self::new(cache) + } +} + +impl<A: Access> Layer<A> for FoyerLayer { + type LayeredAccess = FoyerAccessor<A>; + + fn layer(&self, accessor: A) -> Self::LayeredAccess { + let cache = self.cache.clone(); + FoyerAccessor { + inner: Arc::new(Inner { accessor, cache }), + } + } +} + +#[derive(Debug)] +struct Inner<A: Access> { + accessor: A, + cache: HybridCache<String, Value>, +} + +#[derive(Debug)] +pub struct FoyerAccessor<A: Access> { + inner: Arc<Inner<A>>, +} + +impl<A: Access> LayeredAccess for FoyerAccessor<A> { + type Inner = A; + type Reader = Buffer; + type Writer = Writer<A>; + type Lister = A::Lister; + type Deleter = Deleter<A>; + + fn inner(&self) -> &Self::Inner { + &self.inner.accessor + } + + fn read( + &self, + path: &str, + args: OpRead, + ) -> impl Future<Output = Result<(RpRead, Self::Reader)>> + MaybeSend { + let r = args.range(); + let path = path.to_string(); + async move { + let entry = self + .inner + .cache + .fetch(path.clone(), || { + let inner = self.inner.clone(); + async move { + let (_, mut reader) = inner + .accessor + .read(&path, args.with_range(BytesRange::new(0, None))) + .await + .map_err(FoyerError::other)?; + let buffer = reader.read_all().await.map_err(FoyerError::other)?; + Ok(buffer.into()) + } + }) + .await + .map_err(extract_err)?; + + let r = r.to_range(); + let start = match r.start_bound() { + Bound::Included(i) => *i, + Bound::Excluded(i) => *i + 1, + Bound::Unbounded => 0, + }; + let end = match r.end_bound() { + Bound::Included(i) => *i + 1, + Bound::Excluded(i) => *i, + Bound::Unbounded => entry.len() as u64, + }; + let range = BytesContentRange::default() + .with_range(start, end - 1) + .with_size(entry.len() as _); + let buffer = entry.slice(start as usize..end as usize); Review Comment: Agreed and nope. I am trying to implement a chunk size-based caching mechanism, but it is difficult to determine the chunk segmentation without a complete range, and I haven't figured out how to implement it better yet. Therefore, I would like to introduce the simplest approach in this PR first. Later, I will introduce other mechanisms and use a configuration parameter to choose which mode to use. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@opendal.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org