This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 6c59b76375 Minor: `pub use ByteView` in arrow and improve 
documentation (#6275)
6c59b76375 is described below

commit 6c59b7637592e4b67b18762b8313f91086c0d5d8
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Aug 20 14:46:31 2024 -0400

    Minor: `pub use ByteView` in arrow and improve documentation (#6275)
    
    * Minor: `pub use ByteView` in arrow and improve documentation
    
    * clarify docs more
---
 arrow-array/src/array/byte_view_array.rs | 23 +++++++++++++++--------
 arrow-data/src/byte_view.rs              |  7 +++++++
 arrow/src/array/mod.rs                   |  2 +-
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/arrow-array/src/array/byte_view_array.rs 
b/arrow-array/src/array/byte_view_array.rs
index 42f945838a..a155b6ab22 100644
--- a/arrow-array/src/array/byte_view_array.rs
+++ b/arrow-array/src/array/byte_view_array.rs
@@ -52,7 +52,7 @@ use super::ByteArrayType;
 /// not by value. as there are many different buffer layouts to represent the
 /// same data (e.g. different offsets, different buffer sizes, etc).
 ///
-/// # Layout
+/// # Layout: "views" and buffers
 ///
 /// A `GenericByteViewArray` stores variable length byte strings. An array of
 /// `N` elements is stored as `N` fixed length "views" and a variable number
@@ -75,10 +75,12 @@ use super::ByteArrayType;
 ///                          0    31       63      95    127
 /// ```
 ///
-/// * Strings with length <= 12 are stored directly in the view.
+/// * Strings with length <= 12 are stored directly in the view. See
+///   [`Self::inline_value`] to access the inlined prefix from a short view.
 ///
 /// * Strings with length > 12: The first four bytes are stored inline in the
-///   view and the entire string is stored in one of the buffers.
+///   view and the entire string is stored in one of the buffers. See 
[`ByteView`]
+///   to access the fields of the these views.
 ///
 /// Unlike [`GenericByteArray`], there are no constraints on the offsets other
 /// than they must point into a valid buffer. However, they can be out of 
order,
@@ -89,6 +91,8 @@ use super::ByteArrayType;
 /// separate buffer while the string "LavaMonster" is stored inlined in the
 /// view. In this case, the same bytes for "Fish" are used to store both 
strings.
 ///
+/// [`ByteView`]: arrow_data::ByteView
+///
 /// ```text
 ///                                                                            
┌───┐
 ///                         ┌──────┬──────┬──────┬──────┐               offset 
│...│
@@ -261,9 +265,12 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         unsafe { self.value_unchecked(i) }
     }
 
-    /// Returns the element at index `i`
+    /// Returns the element at index `i` without bounds checking
+    ///
     /// # Safety
-    /// Caller is responsible for ensuring that the index is within the bounds 
of the array
+    ///
+    /// Caller is responsible for ensuring that the index is within the bounds
+    /// of the array
     pub unsafe fn value_unchecked(&self, idx: usize) -> &T::Native {
         let v = self.views.get_unchecked(idx);
         let len = *v as u32;
@@ -278,7 +285,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         T::Native::from_bytes_unchecked(b)
     }
 
-    /// Returns the inline value of the view.
+    /// Returns the first `len` bytes the inline value of the view.
     ///
     /// # Safety
     /// - The `view` must be a valid element from `Self::views()` that adheres 
to the view layout.
@@ -289,7 +296,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         std::slice::from_raw_parts((view as *const u128 as *const 
u8).wrapping_add(4), len)
     }
 
-    /// constructs a new iterator
+    /// Constructs a new iterator for iterating over the values of this array
     pub fn iter(&self) -> ArrayIter<&Self> {
         ArrayIter::new(self)
     }
@@ -358,7 +365,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         builder.finish()
     }
 
-    /// Comparing two [`GenericByteViewArray`] at index `left_idx` and 
`right_idx`
+    /// Compare two [`GenericByteViewArray`] at index `left_idx` and 
`right_idx`
     ///
     /// Comparing two ByteView types are non-trivial.
     /// It takes a bit of patience to understand why we don't just compare two 
&[u8] directly.
diff --git a/arrow-data/src/byte_view.rs b/arrow-data/src/byte_view.rs
index b8b1731ac6..a2e9d135fd 100644
--- a/arrow-data/src/byte_view.rs
+++ b/arrow-data/src/byte_view.rs
@@ -18,6 +18,13 @@
 use arrow_buffer::Buffer;
 use arrow_schema::ArrowError;
 
+/// Helper to access views of [`GenericByteViewArray`] (`StringViewArray` and
+/// `BinaryViewArray`) where the length is greater than 12 bytes.
+///
+/// See the documentation on [`GenericByteViewArray`] for more information on
+/// the layout of the views.
+///
+/// [`GenericByteViewArray`]: 
https://docs.rs/arrow/latest/arrow/array/struct.GenericByteViewArray.html
 #[derive(Debug, Copy, Clone, Default)]
 #[repr(C)]
 pub struct ByteView {
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index 242c9148ca..410e9d5af2 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -25,7 +25,7 @@ pub use arrow_array::cast::*;
 pub use arrow_array::iterator::*;
 pub use arrow_array::*;
 pub use arrow_data::{
-    layout, ArrayData, ArrayDataBuilder, ArrayDataRef, BufferSpec, 
DataTypeLayout,
+    layout, ArrayData, ArrayDataBuilder, ArrayDataRef, BufferSpec, ByteView, 
DataTypeLayout,
 };
 
 pub use arrow_data::transform::{Capacities, MutableArrayData};

Reply via email to