This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 1e9e5a2cb Add better documentation, examples and builer-style API to 
`ByteView` (#6479)
1e9e5a2cb is described below

commit 1e9e5a2cbf9b5ba43730cb634ff4b913b0e16324
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Oct 1 16:02:36 2024 -0400

    Add better documentation, examples and builer-style API to `ByteView` 
(#6479)
    
    * Add better documentation, examples and builer-style API to `ByteView`
    
    * simplify tests
    
    * Apply suggestions from code review
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    
    * Moar inline
    
    * Update arrow-data/src/byte_view.rs
    
    Co-authored-by: Xiangpeng Hao <[email protected]>
    
    * fmt
    
    * Update test so it fails at the right place
    
    ---------
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    Co-authored-by: Xiangpeng Hao <[email protected]>
---
 arrow-array/src/array/byte_view_array.rs | 29 +++++++------
 arrow-data/src/byte_view.rs              | 72 +++++++++++++++++++++++++++++++-
 2 files changed, 84 insertions(+), 17 deletions(-)

diff --git a/arrow-array/src/array/byte_view_array.rs 
b/arrow-array/src/array/byte_view_array.rs
index c53478d8b..b1b558057 100644
--- a/arrow-array/src/array/byte_view_array.rs
+++ b/arrow-array/src/array/byte_view_array.rs
@@ -44,8 +44,11 @@ use super::ByteArrayType;
 ///
 /// # See Also
 ///
-/// See [`StringViewArray`] for storing utf8 encoded string data and
-/// [`BinaryViewArray`] for storing bytes.
+/// * [`StringViewArray`] for storing utf8 encoded string data
+/// * [`BinaryViewArray`] for storing bytes
+/// * [`ByteView`] to interpret `u128`s layout of the views.
+///
+/// [`ByteView`]: arrow_data::ByteView
 ///
 /// # Notes
 ///
@@ -872,12 +875,9 @@ mod tests {
     #[should_panic(expected = "Invalid buffer index at 0: got index 3 but only 
has 1 buffers")]
     fn new_with_invalid_view_data() {
         let v = "large payload over 12 bytes";
-        let view = ByteView {
-            length: 13,
-            prefix: u32::from_le_bytes(v.as_bytes()[0..4].try_into().unwrap()),
-            buffer_index: 3,
-            offset: 1,
-        };
+        let view = ByteView::new(13, &v.as_bytes()[0..4])
+            .with_buffer_index(3)
+            .with_offset(1);
         let views = ScalarBuffer::from(vec![view.into()]);
         let buffers = vec![Buffer::from_slice_ref(v)];
         StringViewArray::new(views, buffers, None);
@@ -888,13 +888,12 @@ mod tests {
         expected = "Encountered non-UTF-8 data at index 0: invalid utf-8 
sequence of 1 bytes from index 0"
     )]
     fn new_with_invalid_utf8_data() {
-        let v: Vec<u8> = vec![0xf0, 0x80, 0x80, 0x80];
-        let view = ByteView {
-            length: v.len() as u32,
-            prefix: u32::from_le_bytes(v[0..4].try_into().unwrap()),
-            buffer_index: 0,
-            offset: 0,
-        };
+        let v: Vec<u8> = vec![
+            // invalid UTF8
+            0xf0, 0x80, 0x80, 0x80, // more bytes to make it larger than 12
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00,
+        ];
+        let view = ByteView::new(v.len() as u32, &v[0..4]);
         let views = ScalarBuffer::from(vec![view.into()]);
         let buffers = vec![Buffer::from_slice_ref(v)];
         StringViewArray::new(views, buffers, None);
diff --git a/arrow-data/src/byte_view.rs b/arrow-data/src/byte_view.rs
index 6f6d6d175..3b3ec6246 100644
--- a/arrow-data/src/byte_view.rs
+++ b/arrow-data/src/byte_view.rs
@@ -21,8 +21,40 @@ use arrow_schema::ArrowError;
 /// Helper to access views of [`GenericByteViewArray`] (`StringViewArray` and
 /// `BinaryViewArray`) where the length is greater than 12 bytes.
 ///
-/// See the documentation on [`GenericByteViewArray`] for more information on
-/// the layout of the views.
+/// See Also:
+/// * [`GenericByteViewArray`] for more information on the layout of the views.
+/// * [`validate_binary_view`] and [`validate_string_view`] to validate
+///
+/// # Example: Create a new u128 view
+///
+/// ```rust
+/// # use arrow_data::ByteView;;
+/// // Create a view for a string of length 20
+/// // first four bytes are "Rust"
+/// // stored in buffer 3
+/// // at offset 42
+/// let prefix = "Rust";
+/// let view = ByteView::new(20, prefix.as_bytes())
+///   .with_buffer_index(3)
+///   .with_offset(42);
+///
+/// // create the final u128
+/// let v = view.as_u128();
+/// assert_eq!(v, 0x2a000000037473755200000014);
+/// ```
+///
+/// # Example: decode a `u128` into its constituent fields
+/// ```rust
+/// # use arrow_data::ByteView;
+/// // Convert a u128 to a ByteView
+/// // See validate_{string,binary}_view functions to validate
+/// let v = ByteView::from(0x2a000000037473755200000014);
+///
+/// assert_eq!(v.length, 20);
+/// assert_eq!(v.prefix, 0x74737552);
+/// assert_eq!(v.buffer_index, 3);
+/// assert_eq!(v.offset, 42);
+/// ```
 ///
 /// [`GenericByteViewArray`]: 
https://docs.rs/arrow/latest/arrow/array/struct.GenericByteViewArray.html
 #[derive(Debug, Copy, Clone, Default)]
@@ -39,6 +71,42 @@ pub struct ByteView {
 }
 
 impl ByteView {
+    /// Construct a [`ByteView`] for data `length` of bytes with the specified 
prefix.
+    ///
+    /// See example on [`ByteView`] docs
+    ///
+    /// Notes:
+    /// * the length should always be greater than 12 (Data less than 12
+    ///   bytes is stored as an inline view)
+    /// * buffer and offset are set to `0`
+    ///
+    /// # Panics
+    /// If the prefix is not exactly 4 bytes
+    #[inline]
+    pub fn new(length: u32, prefix: &[u8]) -> Self {
+        debug_assert!(length > 12);
+        Self {
+            length,
+            prefix: u32::from_le_bytes(prefix.try_into().unwrap()),
+            buffer_index: 0,
+            offset: 0,
+        }
+    }
+
+    /// Set the [`Self::buffer_index`] field
+    #[inline]
+    pub fn with_buffer_index(mut self, buffer_index: u32) -> Self {
+        self.buffer_index = buffer_index;
+        self
+    }
+
+    /// Set the [`Self::offset`] field
+    #[inline]
+    pub fn with_offset(mut self, offset: u32) -> Self {
+        self.offset = offset;
+        self
+    }
+
     #[inline(always)]
     /// Convert `ByteView` to `u128` by concatenating the fields
     pub fn as_u128(self) -> u128 {

Reply via email to