This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new fc245b55ba Improve `Buffer` documentation, deprecate 
`Buffer::from_bytes` add `From<Bytes>` and `From<bytes::Bytes>` impls (#6939)
fc245b55ba is described below

commit fc245b55ba85d3535f0702706e886ce67e95f1d2
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Jan 6 16:03:51 2025 -0500

    Improve `Buffer` documentation, deprecate `Buffer::from_bytes` add 
`From<Bytes>` and `From<bytes::Bytes>` impls (#6939)
    
    * Improve Bytes documentation
    
    * Improve Buffer documentation, add From<Bytes> and From<bytes::Bytes> impls
    
    * avoid linking to private docs
    
    * Deprecate `Buffer::from_bytes`
    
    * Apply suggestions from code review
    
    Co-authored-by: Jeffrey Vo <[email protected]>
    
    ---------
    
    Co-authored-by: Jeffrey Vo <[email protected]>
---
 arrow-buffer/src/buffer/immutable.rs              | 118 +++++++++++++++++-----
 arrow-buffer/src/buffer/mutable.rs                |   2 +-
 arrow-buffer/src/bytes.rs                         |   8 +-
 arrow-flight/src/decode.rs                        |   2 +-
 arrow-flight/src/sql/client.rs                    |   2 +-
 parquet/src/arrow/array_reader/byte_view_array.rs |  10 +-
 6 files changed, 104 insertions(+), 38 deletions(-)

diff --git a/arrow-buffer/src/buffer/immutable.rs 
b/arrow-buffer/src/buffer/immutable.rs
index cf1d6f3667..fd145ce230 100644
--- a/arrow-buffer/src/buffer/immutable.rs
+++ b/arrow-buffer/src/buffer/immutable.rs
@@ -28,8 +28,43 @@ use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
 use super::ops::bitwise_unary_op_helper;
 use super::{MutableBuffer, ScalarBuffer};
 
-/// Buffer represents a contiguous memory region that can be shared with other 
buffers and across
-/// thread boundaries.
+/// A contiguous memory region that can be shared with other buffers and across
+/// thread boundaries that stores Arrow data.
+///
+/// `Buffer`s can be sliced and cloned without copying the underlying data and 
can
+/// be created from memory allocated by non-Rust sources such as C/C++.
+///
+/// # Example: Create a `Buffer` from a `Vec` (without copying)
+/// ```
+/// # use arrow_buffer::Buffer;
+/// let vec: Vec<u32> = vec![1, 2, 3];
+/// let buffer = Buffer::from(vec);
+/// ```
+///
+/// # Example: Convert a `Buffer` to a `Vec` (without copying)
+///
+/// Use [`Self::into_vec`] to convert a `Buffer` back into a `Vec` if there are
+/// no other references and the types are aligned correctly.
+/// ```
+/// # use arrow_buffer::Buffer;
+/// # let vec: Vec<u32> = vec![1, 2, 3];
+/// # let buffer = Buffer::from(vec);
+/// // convert the buffer back into a Vec of u32
+/// // note this will fail if the buffer is shared or not aligned correctly
+/// let vec: Vec<u32> = buffer.into_vec().unwrap();
+/// ```
+///
+/// # Example: Create a `Buffer` from a [`bytes::Bytes`] (without copying)
+///
+/// [`bytes::Bytes`] is a common type in the Rust ecosystem for shared memory
+/// regions. You can create a buffer from a `Bytes` instance using the `From`
+/// implementation, also without copying.
+///
+/// ```
+/// # use arrow_buffer::Buffer;
+/// let bytes = bytes::Bytes::from("hello");
+/// let buffer = Buffer::from(bytes);
+///```
 #[derive(Clone, Debug)]
 pub struct Buffer {
     /// the internal byte buffer.
@@ -59,24 +94,15 @@ unsafe impl Send for Buffer where Bytes: Send {}
 unsafe impl Sync for Buffer where Bytes: Sync {}
 
 impl Buffer {
-    /// Auxiliary method to create a new Buffer
+    /// Create a new Buffer from a (internal) `Bytes`
     ///
-    /// This can be used with a [`bytes::Bytes`] via `into()`:
+    /// NOTE despite the same name, `Bytes` is an internal struct in arrow-rs
+    /// and is different than [`bytes::Bytes`].
     ///
-    /// ```
-    /// # use arrow_buffer::Buffer;
-    /// let bytes = bytes::Bytes::from_static(b"foo");
-    /// let buffer = Buffer::from_bytes(bytes.into());
-    /// ```
-    #[inline]
+    /// See examples on [`Buffer`] for ways to create a buffer from a 
[`bytes::Bytes`].
+    #[deprecated(since = "54.1.0", note = "Use Buffer::from instead")]
     pub fn from_bytes(bytes: Bytes) -> Self {
-        let length = bytes.len();
-        let ptr = bytes.as_ptr();
-        Buffer {
-            data: Arc::new(bytes),
-            ptr,
-            length,
-        }
+        Self::from(bytes)
     }
 
     /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
@@ -107,8 +133,11 @@ impl Buffer {
         buffer.into()
     }
 
-    /// Creates a buffer from an existing memory region. Ownership of the 
memory is tracked via reference counting
-    /// and the memory will be freed using the `drop` method of 
[crate::alloc::Allocation] when the reference count reaches zero.
+    /// Creates a buffer from an existing memory region.
+    ///
+    /// Ownership of the memory is tracked via reference counting
+    /// and the memory will be freed using the `drop` method of
+    /// [crate::alloc::Allocation] when the reference count reaches zero.
     ///
     /// # Arguments
     ///
@@ -155,7 +184,7 @@ impl Buffer {
         self.data.capacity()
     }
 
-    /// Tried to shrink the capacity of the buffer as much as possible, 
freeing unused memory.
+    /// Tries to shrink the capacity of the buffer as much as possible, 
freeing unused memory.
     ///
     /// If the buffer is shared, this is a no-op.
     ///
@@ -190,7 +219,7 @@ impl Buffer {
         }
     }
 
-    /// Returns whether the buffer is empty.
+    /// Returns true if the buffer is empty.
     #[inline]
     pub fn is_empty(&self) -> bool {
         self.length == 0
@@ -206,7 +235,9 @@ impl Buffer {
     }
 
     /// Returns a new [Buffer] that is a slice of this buffer starting at 
`offset`.
-    /// Doing so allows the same memory region to be shared between buffers.
+    ///
+    /// This function is `O(1)` and does not copy any data, allowing the
+    /// same memory region to be shared between buffers.
     ///
     /// # Panics
     ///
@@ -240,7 +271,10 @@ impl Buffer {
 
     /// Returns a new [Buffer] that is a slice of this buffer starting at 
`offset`,
     /// with `length` bytes.
-    /// Doing so allows the same memory region to be shared between buffers.
+    ///
+    /// This function is `O(1)` and does not copy any data, allowing the same
+    /// memory region to be shared between buffers.
+    ///
     /// # Panics
     /// Panics iff `(offset + length)` is larger than the existing length.
     pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
@@ -328,10 +362,16 @@ impl Buffer {
             })
     }
 
-    /// Returns `Vec` for mutating the buffer
+    /// Converts self into a `Vec`, if possible.
+    ///
+    /// This can be used to reuse / mutate the underlying data.
     ///
-    /// Returns `Err(self)` if this buffer does not have the same [`Layout`] as
-    /// the destination Vec or contains a non-zero offset
+    /// # Errors
+    ///
+    /// Returns `Err(self)` if
+    /// 1. this buffer does not have the same [`Layout`] as the destination Vec
+    /// 2. contains a non-zero offset
+    /// 3. The buffer is shared
     pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
         let layout = match self.data.deallocation() {
             Deallocation::Standard(l) => l,
@@ -414,7 +454,29 @@ impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Buffer {
     }
 }
 
-/// Creating a `Buffer` instance by storing the boolean values into the buffer
+/// Convert from internal `Bytes` (not [`bytes::Bytes`]) to `Buffer`
+impl From<Bytes> for Buffer {
+    #[inline]
+    fn from(bytes: Bytes) -> Self {
+        let length = bytes.len();
+        let ptr = bytes.as_ptr();
+        Self {
+            data: Arc::new(bytes),
+            ptr,
+            length,
+        }
+    }
+}
+
+/// Convert from [`bytes::Bytes`], not internal `Bytes` to `Buffer`
+impl From<bytes::Bytes> for Buffer {
+    fn from(bytes: bytes::Bytes) -> Self {
+        let bytes: Bytes = bytes.into();
+        Self::from(bytes)
+    }
+}
+
+/// Create a `Buffer` instance by storing the boolean values into the buffer
 impl FromIterator<bool> for Buffer {
     fn from_iter<I>(iter: I) -> Self
     where
@@ -447,7 +509,9 @@ impl<T: ArrowNativeType> From<BufferBuilder<T>> for Buffer {
 
 impl Buffer {
     /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) 
length.
+    ///
     /// Prefer this to `collect` whenever possible, as it is ~60% faster.
+    ///
     /// # Example
     /// ```
     /// # use arrow_buffer::buffer::Buffer;
diff --git a/arrow-buffer/src/buffer/mutable.rs 
b/arrow-buffer/src/buffer/mutable.rs
index c4315a1d64..5ad55e306e 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -328,7 +328,7 @@ impl MutableBuffer {
     pub(super) fn into_buffer(self) -> Buffer {
         let bytes = unsafe { Bytes::new(self.data, self.len, 
Deallocation::Standard(self.layout)) };
         std::mem::forget(self);
-        Buffer::from_bytes(bytes)
+        Buffer::from(bytes)
     }
 
     /// View this buffer as a mutable slice of a specific type.
diff --git a/arrow-buffer/src/bytes.rs b/arrow-buffer/src/bytes.rs
index 77724137ae..b811bd2c6b 100644
--- a/arrow-buffer/src/bytes.rs
+++ b/arrow-buffer/src/bytes.rs
@@ -28,14 +28,18 @@ use crate::buffer::dangling_ptr;
 
 /// A continuous, fixed-size, immutable memory region that knows how to 
de-allocate itself.
 ///
-/// This structs' API is inspired by the `bytes::Bytes`, but it is not limited 
to using rust's
-/// global allocator nor u8 alignment.
+/// Note that this structure is an internal implementation detail of the
+/// arrow-rs crate. While it has the same name and similar API as
+/// [`bytes::Bytes`] it is not limited to rust's global allocator nor u8
+/// alignment. It is possible to create a `Bytes` from `bytes::Bytes` using the
+/// `From` implementation.
 ///
 /// In the most common case, this buffer is allocated using 
[`alloc`](std::alloc::alloc)
 /// with an alignment of [`ALIGNMENT`](crate::alloc::ALIGNMENT)
 ///
 /// When the region is allocated by a different allocator, 
[Deallocation::Custom], this calls the
 /// custom deallocator to deallocate the region when it is no longer needed.
+///
 pub struct Bytes {
     /// The raw pointer to be beginning of the region
     ptr: NonNull<u8>,
diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs
index 7bafc38430..760fc926fc 100644
--- a/arrow-flight/src/decode.rs
+++ b/arrow-flight/src/decode.rs
@@ -295,7 +295,7 @@ impl FlightDataDecoder {
                     ));
                 };
 
-                let buffer = Buffer::from_bytes(data.data_body.into());
+                let buffer = Buffer::from(data.data_body);
                 let dictionary_batch = 
message.header_as_dictionary_batch().ok_or_else(|| {
                     FlightError::protocol(
                         "Could not get dictionary batch from DictionaryBatch 
message",
diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs
index a6e228737b..6d3ac3dbe6 100644
--- a/arrow-flight/src/sql/client.rs
+++ b/arrow-flight/src/sql/client.rs
@@ -721,7 +721,7 @@ pub fn arrow_data_from_flight_data(
 
             let dictionaries_by_field = HashMap::new();
             let record_batch = read_record_batch(
-                &Buffer::from_bytes(flight_data.data_body.into()),
+                &Buffer::from(flight_data.data_body),
                 ipc_record_batch,
                 arrow_schema_ref.clone(),
                 &dictionaries_by_field,
diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs 
b/parquet/src/arrow/array_reader/byte_view_array.rs
index 5845e2c08c..92a8b0592d 100644
--- a/parquet/src/arrow/array_reader/byte_view_array.rs
+++ b/parquet/src/arrow/array_reader/byte_view_array.rs
@@ -316,9 +316,8 @@ impl ByteViewArrayDecoderPlain {
     }
 
     pub fn read(&mut self, output: &mut ViewBuffer, len: usize) -> 
Result<usize> {
-        // Here we convert `bytes::Bytes` into `arrow_buffer::Bytes`, which is 
zero copy
-        // Then we convert `arrow_buffer::Bytes` into `arrow_buffer:Buffer`, 
which is also zero copy
-        let buf = arrow_buffer::Buffer::from_bytes(self.buf.clone().into());
+        // Zero copy convert `bytes::Bytes` into `arrow_buffer::Buffer`
+        let buf = arrow_buffer::Buffer::from(self.buf.clone());
         let block_id = output.append_block(buf);
 
         let to_read = len.min(self.max_remaining_values);
@@ -549,9 +548,8 @@ impl ByteViewArrayDecoderDeltaLength {
 
         let src_lengths = &self.lengths[self.length_offset..self.length_offset 
+ to_read];
 
-        // Here we convert `bytes::Bytes` into `arrow_buffer::Bytes`, which is 
zero copy
-        // Then we convert `arrow_buffer::Bytes` into `arrow_buffer:Buffer`, 
which is also zero copy
-        let bytes = arrow_buffer::Buffer::from_bytes(self.data.clone().into());
+        // Zero copy convert `bytes::Bytes` into `arrow_buffer::Buffer`
+        let bytes = Buffer::from(self.data.clone());
         let block_id = output.append_block(bytes);
 
         let mut current_offset = self.data_offset;

Reply via email to