This is an automated email from the ASF dual-hosted git repository.
xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opendal.git
The following commit(s) were added to refs/heads/main by this push:
new 76272ec4a refactor(core): Optimize Writer::write_from to avoid extra
copy (#7098)
76272ec4a is described below
commit 76272ec4a81f9d7da2db14b5c27b01c481f34ec5
Author: Hugo <[email protected]>
AuthorDate: Thu Dec 25 18:22:39 2025 +0800
refactor(core): Optimize Writer::write_from to avoid extra copy (#7098)
* refactor(core): Optimize Writer::write_from to avoid extra copy
Previously, `write_from` called `copy_to_bytes` with the full remaining
length. If the input `Buf` was non-contiguous (e.g., a `Chain`), this forced a
deep copy to merge memory into a single contiguous block.
This commit optimizes the implementation by:
1. Introducing a fast path for contiguous buffers to avoid Vec allocation.
2. Iterating over chunks for non-contiguous buffers, collecting them into a
`Vec<Bytes>` to preserve zero-copy behavior where possible.
Added `test_writer_write_from_chain` to verify the fix.
* trigger GitHub actions
---
core/core/src/types/write/writer.rs | 56 +++++++++++++++++++++++++++++++++----
1 file changed, 51 insertions(+), 5 deletions(-)
diff --git a/core/core/src/types/write/writer.rs
b/core/core/src/types/write/writer.rs
index e35c46a65..98743c1c6 100644
--- a/core/core/src/types/write/writer.rs
+++ b/core/core/src/types/write/writer.rs
@@ -150,13 +150,36 @@ impl Writer {
///
/// This operation will write all data in given buffer into writer.
///
- /// # TODO
+ /// # Notes
+ ///
+ /// This function iterates over each chunk in the `Buf` and collects them
+ /// into a `Buffer`. For `Bytes` chunks, `copy_to_bytes` is zero-copy
+ /// (only a ref-count increment). For other chunk types (e.g., `&[u8]`),
+ /// the data will be copied.
///
- /// Optimize this function to avoid unnecessary copy.
+ /// This approach minimizes copies for chained buffers (`Chain<Bytes,
Bytes>`)
+ /// where each individual chunk can be extracted without copying.
pub async fn write_from(&mut self, bs: impl Buf) -> Result<()> {
let mut bs = bs;
- let bs = Buffer::from(bs.copy_to_bytes(bs.remaining()));
- self.write(bs).await
+
+ // Fast path: single contiguous chunk.
+ // If chunk length equals remaining bytes, the entire buffer is
contiguous.
+ // This avoids Vec allocation and loop overhead for the common case.
+ if bs.chunk().len() == bs.remaining() {
+ let bytes = bs.copy_to_bytes(bs.remaining());
+ return self.write(Buffer::from(bytes)).await;
+ }
+
+ // Slow path: multiple chunks (e.g., Chain<T, U>).
+ // Iterate over each chunk and collect them into a Vec<Bytes>.
+ // For Bytes chunks, copy_to_bytes is zero-copy.
+ // For other types, only the current chunk is copied, not the entire
buffer.
+ let mut chunks = Vec::new();
+ while bs.has_remaining() {
+ let chunk_len = bs.chunk().len();
+ chunks.push(bs.copy_to_bytes(chunk_len));
+ }
+ self.write(Buffer::from(chunks)).await
}
/// Abort the writer and clean up all written data.
@@ -356,7 +379,7 @@ impl Writer {
#[cfg(test)]
mod tests {
- use bytes::Bytes;
+ use bytes::{Buf, Bytes};
use rand::Rng;
use rand::RngCore;
use rand::rngs::ThreadRng;
@@ -408,4 +431,27 @@ mod tests {
assert_eq!(buf.to_bytes(), content);
}
+
+ #[tokio::test]
+ async fn test_writer_write_from_chain() {
+ let op = Operator::new(services::Memory::default()).unwrap().finish();
+ let path = "test_file";
+
+ let part1 = Bytes::from(gen_random_bytes());
+ let part2 = Bytes::from(gen_random_bytes());
+
+ let mut chain_same = part1.clone().chain(part2.clone());
+ let chain = part1.chain(part2);
+
+ let mut writer = op.writer(path).await.unwrap();
+ writer.write_from(chain).await.expect("write must succeed");
+ writer.close().await.expect("close must succeed");
+
+ let buf = op.read(path).await.expect("read to end mut succeed");
+
+ assert_eq!(
+ buf.to_bytes(),
+ chain_same.copy_to_bytes(chain_same.remaining())
+ );
+ }
}