This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new fdbef3f940 Improve `ListArray` documentation for slices (#7039)
fdbef3f940 is described below

commit fdbef3f9409c668105fc32614d5de278ae7bbe52
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Feb 2 07:07:26 2025 -0500

    Improve `ListArray` documentation for slices (#7039)
    
    * Improve ListArray documentation for slices
    
    * more
    
    * ASCII ART
    
    * Update arrow-array/src/array/list_array.rs
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    
    * Update arrow-array/src/array/list_array.rs
    
    Co-authored-by: Raz Luvaton <[email protected]>
    
    * Apply suggestions from code review
    
    Co-authored-by: Raz Luvaton <[email protected]>
    
    * Fix diagram
    
    ---------
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    Co-authored-by: Raz Luvaton <[email protected]>
---
 arrow-array/src/array/list_array.rs | 56 ++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/arrow-array/src/array/list_array.rs 
b/arrow-array/src/array/list_array.rs
index b53bbe7abc..009a7b7a50 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -120,8 +120,41 @@ impl OffsetSizeTrait for i64 {
 ///                 (offsets[i],            │   ListArray               (Array)
 ///                offsets[i+1])                                    └ ─ ─ ─ ─ 
─ ─ ┘    │
 ///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ 
─ ─ ─ ─ ─
+/// ```
+///
+/// # Slicing
+///
+/// Slicing a `ListArray` creates a new `ListArray` without copying any data,
+/// but this means the [`Self::values`] and [`Self::offsets`] may have 
"unused" data
 ///
+/// For example, calling `slice(1, 3)` on the `ListArray` in the above example
+/// would result in the following. Note
 ///
+/// 1. `Values` array is unchanged
+/// 2. `Offsets` do not start at `0`, nor cover all values in the Values array.
+///
+/// ```text
+///                                 ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
+///                                                         ┌ ─ ─ ─ ─ ─ ─ ┐    
│  ╔═══╗
+///                                 │                         ╔═══╗ ╔═══╗      
   ║   ║  Not used
+///                                                         │ ║ 1 ║ ║ A ║ │ 0  
│  ╚═══╝
+///  ┌─────────────┐  ┌───────┐     │     ┌───┐   ┌───┐       ╠═══╣ ╠═══╣
+///  │ [] (empty)  │  │ (3,3) │           │ 1 │   │ 3 │     │ ║ 1 ║ ║ B ║ │ 1  
│
+///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╠═══╣ ╠═══╣
+///  │    NULL     │  │ (3,4) │           │ 0 │   │ 3 │     │ ║ 1 ║ ║ C ║ │ 2  
│
+///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╠───╣ ╠───╣
+///  │     [D]     │  │ (4,5) │           │ 1 │   │ 4 │     │ │ 0 │ │ ? │ │ 3  
│
+///  └─────────────┘  └───────┘     │     └───┘   ├───┤       ├───┤ ├───┤
+///                                               │ 5 │     │ │ 1 │ │ D │ │ 4  
│
+///                                 │             └───┘       ├───┤ ├───┤
+///                                                         │ │ 0 │ │ ? │ │ 5  
│
+///                                 │  Validity               ╠═══╣ ╠═══╣
+///     Logical       Logical          (nulls)   Offsets    │ ║ 1 ║ ║ F ║ │ 6  
│
+///      Values       Offsets       │                         ╚═══╝ ╚═══╝
+///                                                         │    Values   │    
│
+///                 (offsets[i],    │   ListArray               (Array)
+///                offsets[i+1])                            └ ─ ─ ─ ─ ─ ─ ┘    
│
+///                                 └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
 /// ```
 ///
 /// [`StringArray`]: crate::array::StringArray
@@ -263,13 +296,22 @@ impl<OffsetSize: OffsetSizeTrait> 
GenericListArray<OffsetSize> {
     /// Returns a reference to the offsets of this list
     ///
     /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
-    /// allowing for zero-copy cloning
+    /// allowing for zero-copy cloning.
+    ///
+    /// Notes: The `offsets` may not start at 0 and may not cover all values in
+    /// [`Self::values`]. This can happen when the list array was sliced via
+    /// [`Self::slice`]. See documentation for [`Self`] for more details.
     #[inline]
     pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
         &self.value_offsets
     }
 
     /// Returns a reference to the values of this list
+    ///
+    /// Note: The list array may not refer to all values in the `values` array.
+    /// For example if the list array was sliced via [`Self::slice`] values 
will
+    /// still contain values both before and after the slice. See documentation
+    /// for [`Self`] for more details.
     #[inline]
     pub fn values(&self) -> &ArrayRef {
         &self.values
@@ -296,7 +338,9 @@ impl<OffsetSize: OffsetSizeTrait> 
GenericListArray<OffsetSize> {
         self.values.slice(start, end - start)
     }
 
-    /// Returns the offset values in the offsets buffer
+    /// Returns the offset values in the offsets buffer.
+    ///
+    /// See [`Self::offsets`] for more details.
     #[inline]
     pub fn value_offsets(&self) -> &[OffsetSize] {
         &self.value_offsets
@@ -325,6 +369,10 @@ impl<OffsetSize: OffsetSizeTrait> 
GenericListArray<OffsetSize> {
     }
 
     /// Returns a zero-copy slice of this array with the indicated offset and 
length.
+    ///
+    /// Notes: this method does *NOT* slice the underlying values array or 
modify
+    /// the values in the offsets buffer. See [`Self::values`] and
+    /// [`Self::offsets`] for more information.
     pub fn slice(&self, offset: usize, length: usize) -> Self {
         Self {
             data_type: self.data_type.clone(),
@@ -556,12 +604,12 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for 
GenericListArray<OffsetSiz
 
 /// A [`GenericListArray`] of variable size lists, storing offsets as `i32`.
 ///
-// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a 
[`ListArray`]
+/// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a 
[`ListArray`]
 pub type ListArray = GenericListArray<i32>;
 
 /// A [`GenericListArray`] of variable size lists, storing offsets as `i64`.
 ///
-// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to 
construct a [`LargeListArray`]
+/// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to 
construct a [`LargeListArray`]
 pub type LargeListArray = GenericListArray<i64>;
 
 #[cfg(test)]

Reply via email to