This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new fdbef3f940 Improve `ListArray` documentation for slices (#7039)
fdbef3f940 is described below
commit fdbef3f9409c668105fc32614d5de278ae7bbe52
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Feb 2 07:07:26 2025 -0500
Improve `ListArray` documentation for slices (#7039)
* Improve ListArray documentation for slices
* more
* ASCII ART
* Update arrow-array/src/array/list_array.rs
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
* Update arrow-array/src/array/list_array.rs
Co-authored-by: Raz Luvaton <[email protected]>
* Apply suggestions from code review
Co-authored-by: Raz Luvaton <[email protected]>
* Fix diagram
---------
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
Co-authored-by: Raz Luvaton <[email protected]>
---
arrow-array/src/array/list_array.rs | 56 ++++++++++++++++++++++++++++++++++---
1 file changed, 52 insertions(+), 4 deletions(-)
diff --git a/arrow-array/src/array/list_array.rs
b/arrow-array/src/array/list_array.rs
index b53bbe7abc..009a7b7a50 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -120,8 +120,41 @@ impl OffsetSizeTrait for i64 {
/// (offsets[i], │ ListArray (Array)
/// offsets[i+1]) └ ─ ─ ─ ─
─ ─ ┘ │
/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
─ ─ ─ ─ ─
+/// ```
+///
+/// # Slicing
+///
+/// Slicing a `ListArray` creates a new `ListArray` without copying any data,
+/// but this means the [`Self::values`] and [`Self::offsets`] may have
"unused" data
///
+/// For example, calling `slice(1, 3)` on the `ListArray` in the above example
+/// would result in the following. Note
///
+/// 1. `Values` array is unchanged
+/// 2. `Offsets` do not start at `0`, nor cover all values in the Values array.
+///
+/// ```text
+/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
+/// ┌ ─ ─ ─ ─ ─ ─ ┐
│ ╔═══╗
+/// │ ╔═══╗ ╔═══╗
║ ║ Not used
+/// │ ║ 1 ║ ║ A ║ │ 0
│ ╚═══╝
+/// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ╠═══╣ ╠═══╣
+/// │ [] (empty) │ │ (3,3) │ │ 1 │ │ 3 │ │ ║ 1 ║ ║ B ║ │ 1
│
+/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ╠═══╣ ╠═══╣
+/// │ NULL │ │ (3,4) │ │ 0 │ │ 3 │ │ ║ 1 ║ ║ C ║ │ 2
│
+/// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ╠───╣ ╠───╣
+/// │ [D] │ │ (4,5) │ │ 1 │ │ 4 │ │ │ 0 │ │ ? │ │ 3
│
+/// └─────────────┘ └───────┘ │ └───┘ ├───┤ ├───┤ ├───┤
+/// │ 5 │ │ │ 1 │ │ D │ │ 4
│
+/// │ └───┘ ├───┤ ├───┤
+/// │ │ 0 │ │ ? │ │ 5
│
+/// │ Validity ╠═══╣ ╠═══╣
+/// Logical Logical (nulls) Offsets │ ║ 1 ║ ║ F ║ │ 6
│
+/// Values Offsets │ ╚═══╝ ╚═══╝
+/// │ Values │
│
+/// (offsets[i], │ ListArray (Array)
+/// offsets[i+1]) └ ─ ─ ─ ─ ─ ─ ┘
│
+/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
/// ```
///
/// [`StringArray`]: crate::array::StringArray
@@ -263,13 +296,22 @@ impl<OffsetSize: OffsetSizeTrait>
GenericListArray<OffsetSize> {
/// Returns a reference to the offsets of this list
///
/// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
- /// allowing for zero-copy cloning
+ /// allowing for zero-copy cloning.
+ ///
+ /// Notes: The `offsets` may not start at 0 and may not cover all values in
+ /// [`Self::values`]. This can happen when the list array was sliced via
+ /// [`Self::slice`]. See documentation for [`Self`] for more details.
#[inline]
pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
&self.value_offsets
}
/// Returns a reference to the values of this list
+ ///
+ /// Note: The list array may not refer to all values in the `values` array.
+ /// For example if the list array was sliced via [`Self::slice`] values
will
+ /// still contain values both before and after the slice. See documentation
+ /// for [`Self`] for more details.
#[inline]
pub fn values(&self) -> &ArrayRef {
&self.values
@@ -296,7 +338,9 @@ impl<OffsetSize: OffsetSizeTrait>
GenericListArray<OffsetSize> {
self.values.slice(start, end - start)
}
- /// Returns the offset values in the offsets buffer
+ /// Returns the offset values in the offsets buffer.
+ ///
+ /// See [`Self::offsets`] for more details.
#[inline]
pub fn value_offsets(&self) -> &[OffsetSize] {
&self.value_offsets
@@ -325,6 +369,10 @@ impl<OffsetSize: OffsetSizeTrait>
GenericListArray<OffsetSize> {
}
/// Returns a zero-copy slice of this array with the indicated offset and
length.
+ ///
+ /// Notes: this method does *NOT* slice the underlying values array or
modify
+ /// the values in the offsets buffer. See [`Self::values`] and
+ /// [`Self::offsets`] for more information.
pub fn slice(&self, offset: usize, length: usize) -> Self {
Self {
data_type: self.data_type.clone(),
@@ -556,12 +604,12 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for
GenericListArray<OffsetSiz
/// A [`GenericListArray`] of variable size lists, storing offsets as `i32`.
///
-// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a
[`ListArray`]
+/// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a
[`ListArray`]
pub type ListArray = GenericListArray<i32>;
/// A [`GenericListArray`] of variable size lists, storing offsets as `i64`.
///
-// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to
construct a [`LargeListArray`]
+/// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to
construct a [`LargeListArray`]
pub type LargeListArray = GenericListArray<i64>;
#[cfg(test)]