This is an automated email from the ASF dual-hosted git repository.

Jefffrey pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new da09872193 Implement native interleave for ListView (#9558)
da09872193 is described below

commit da09872193bb3805a3177678bf9005ddeea2333d
Author: Vegard Stikbakke <[email protected]>
AuthorDate: Wed May 20 03:44:03 2026 +0200

    Implement native interleave for ListView (#9558)
    
    This PR adds a native implementation of interleave for the ListView type
    which uses a good heuristic thanks to @asubiotto, either
    1. copy each row's elements and put them all into a new flat array, or
    2. concatenate all source value array (and adjust offsets).
    
    The latter is best when there is sharing of elements.
    
    Closes #9342.
    
    ---------
    
    Signed-off-by: Alfonso Subiotto Marques <[email protected]>
    Co-authored-by: Alfonso Subiotto Marques <[email protected]>
---
 arrow-select/src/interleave.rs | 232 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 232 insertions(+)

diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs
index 1bd34a7f3a..13d28747df 100644
--- a/arrow-select/src/interleave.rs
+++ b/arrow-select/src/interleave.rs
@@ -114,6 +114,8 @@ pub fn interleave(
             DataType::Int64 => interleave_run_end::<Int64Type>(values, 
indices),
             t => unreachable!("illegal run-end type {t}"),
         },
+        DataType::ListView(field) => interleave_list_view::<i32>(values, 
indices, field),
+        DataType::LargeListView(field) => interleave_list_view::<i64>(values, 
indices, field),
         _ => interleave_fallback(values, indices)
     }
 }
@@ -482,6 +484,114 @@ fn interleave_run_end<R: RunEndIndexType>(
     )?))
 }
 
+fn interleave_list_view<O: OffsetSizeTrait>(
+    values: &[&dyn Array],
+    indices: &[(usize, usize)],
+    field: &FieldRef,
+) -> Result<ArrayRef, ArrowError> {
+    let interleaved = Interleave::<'_, GenericListViewArray<O>>::new(values, 
indices);
+
+    // Pick whichever strategy produces fewer child elements:
+    // - Per-row copy: total = sum of selected sizes. Better for sparse 
selections.
+    // - Concat + offset adjustment: total = sum of source backing array 
lengths.
+    //   Better when rows share backing elements via overlapping offset/size 
ranges.
+    let concat_cost: usize = interleaved.arrays.iter().map(|lv| 
lv.values().len()).sum();
+    let per_row_cost: usize = indices
+        .iter()
+        .map(|&(a, r)| interleaved.arrays[a].sizes()[r].as_usize())
+        .sum();
+
+    if per_row_cost <= concat_cost {
+        interleave_list_view_copy::<O>(&interleaved, indices, field)
+    } else {
+        interleave_list_view_concat::<O>(&interleaved, indices, field)
+    }
+}
+
+/// Per-row copy: copies each selected row's child elements into a new flat 
array.
+fn interleave_list_view_copy<O: OffsetSizeTrait>(
+    interleaved: &Interleave<'_, GenericListViewArray<O>>,
+    indices: &[(usize, usize)],
+    field: &FieldRef,
+) -> Result<ArrayRef, ArrowError> {
+    let mut capacity = 0usize;
+    let mut offsets = Vec::with_capacity(indices.len());
+    let mut sizes = Vec::with_capacity(indices.len());
+    for &(array_idx, row_idx) in indices {
+        let list = interleaved.arrays[array_idx];
+        let size = list.sizes()[row_idx].as_usize();
+        offsets.push(
+            O::from_usize(capacity).ok_or_else(|| 
ArrowError::OffsetOverflowError(capacity))?,
+        );
+        sizes.push(O::from_usize(size).ok_or_else(|| 
ArrowError::OffsetOverflowError(size))?);
+        capacity += size;
+    }
+
+    let child_data: Vec<_> = interleaved
+        .arrays
+        .iter()
+        .map(|list| list.values().to_data())
+        .collect();
+    let child_data_refs: Vec<_> = child_data.iter().collect();
+    let mut mutable_child = MutableArrayData::new(child_data_refs, false, 
capacity);
+    for &(array_idx, row_idx) in indices {
+        let list = interleaved.arrays[array_idx];
+        let start = list.offsets()[row_idx].as_usize();
+        let size = list.sizes()[row_idx].as_usize();
+        if size > 0 {
+            mutable_child.extend(array_idx, start, start + size);
+        }
+    }
+
+    Ok(Arc::new(GenericListViewArray::<O>::new(
+        field.clone(),
+        offsets.into(),
+        sizes.into(),
+        make_array(mutable_child.freeze()),
+        interleaved.nulls.clone(),
+    )))
+}
+
+/// Concat backing arrays: concatenates all source value arrays and adjusts 
offsets.
+/// Preserves within-source element sharing.
+fn interleave_list_view_concat<O: OffsetSizeTrait>(
+    interleaved: &Interleave<'_, GenericListViewArray<O>>,
+    indices: &[(usize, usize)],
+    field: &FieldRef,
+) -> Result<ArrayRef, ArrowError> {
+    let child_arrays: Vec<&dyn Array> = interleaved
+        .arrays
+        .iter()
+        .map(|lv| lv.values().as_ref())
+        .collect();
+    let mut base_offsets = Vec::with_capacity(interleaved.arrays.len());
+    let mut running = 0usize;
+    for lv in &interleaved.arrays {
+        base_offsets.push(running);
+        running += lv.values().len();
+    }
+    let combined_values = concat(&child_arrays)?;
+
+    let mut new_offsets = Vec::with_capacity(indices.len());
+    let mut new_sizes = Vec::with_capacity(indices.len());
+    for &(array_idx, row_idx) in indices {
+        let lv = interleaved.arrays[array_idx];
+        let adjusted = lv.offsets()[row_idx].as_usize() + 
base_offsets[array_idx];
+        new_offsets.push(
+            O::from_usize(adjusted).ok_or_else(|| 
ArrowError::OffsetOverflowError(adjusted))?,
+        );
+        new_sizes.push(lv.sizes()[row_idx]);
+    }
+
+    Ok(Arc::new(GenericListViewArray::<O>::new(
+        field.clone(),
+        new_offsets.into(),
+        new_sizes.into(),
+        combined_values,
+        interleaved.nulls.clone(),
+    )))
+}
+
 /// Fallback implementation of interleave using [`MutableArrayData`]
 fn interleave_fallback(
     values: &[&dyn Array],
@@ -841,6 +951,128 @@ mod tests {
         test_interleave_lists::<i64>();
     }
 
+    fn test_interleave_list_views<O: OffsetSizeTrait>() {
+        // [[1, 2], null, [3]]
+        let mut a = GenericListBuilder::<O, _>::new(Int32Builder::new());
+        a.values().append_value(1);
+        a.values().append_value(2);
+        a.append(true);
+        a.append(false);
+        a.values().append_value(3);
+        a.append(true);
+        let a: GenericListViewArray<O> = a.finish().into();
+
+        // [[4], null, [5, 6, null]]
+        let mut b = GenericListBuilder::<O, _>::new(Int32Builder::new());
+        b.values().append_value(4);
+        b.append(true);
+        b.append(false);
+        b.values().append_value(5);
+        b.values().append_value(6);
+        b.values().append_null();
+        b.append(true);
+        let b: GenericListViewArray<O> = b.finish().into();
+
+        let values = interleave(&[&a, &b], &[(0, 2), (0, 1), (1, 0), (1, 2), 
(1, 1)]).unwrap();
+        let v = values
+            .as_any()
+            .downcast_ref::<GenericListViewArray<O>>()
+            .unwrap();
+
+        // [[3], null, [4], [5, 6, null], null]
+        let mut expected = GenericListBuilder::<O, 
_>::new(Int32Builder::new());
+        expected.values().append_value(3);
+        expected.append(true);
+        expected.append(false);
+        expected.values().append_value(4);
+        expected.append(true);
+        expected.values().append_value(5);
+        expected.values().append_value(6);
+        expected.values().append_null();
+        expected.append(true);
+        expected.append(false);
+        let expected: GenericListViewArray<O> = expected.finish().into();
+
+        assert_eq!(v, &expected);
+    }
+
+    #[test]
+    fn test_list_views() {
+        test_interleave_list_views::<i32>();
+    }
+
+    #[test]
+    fn test_large_list_views() {
+        test_interleave_list_views::<i64>();
+    }
+
+    #[test]
+    fn test_interleave_list_view_overlapping() {
+        let field = Arc::new(Field::new_list_field(DataType::Int64, false));
+
+        // lv_a: 10 rows, two groups of 5 sharing the same backing elements.
+        //   rows 0-4 → offset 0, size 5 → [0,1,2,3,4]
+        //   rows 5-9 → offset 5, size 5 → [5,6,7,8,9]
+        let lv_a = ListViewArray::new(
+            Arc::clone(&field),
+            ScalarBuffer::from(vec![0i32, 0, 0, 0, 0, 5, 5, 5, 5, 5]),
+            ScalarBuffer::from(vec![5i32; 10]),
+            Arc::new(Int64Array::from_iter_values(0..10)),
+            None,
+        );
+
+        // lv_b: 8 rows, two groups of 4 sharing the same backing elements.
+        //   rows 0-3 → offset 0, size 3 → [100,101,102]
+        //   rows 4-7 → offset 3, size 3 → [103,104,105]
+        let lv_b = ListViewArray::new(
+            Arc::clone(&field),
+            ScalarBuffer::from(vec![0i32, 0, 0, 0, 3, 3, 3, 3]),
+            ScalarBuffer::from(vec![3i32; 8]),
+            Arc::new(Int64Array::from_iter_values(100..106)),
+            None,
+        );
+
+        let indices: Vec<(usize, usize)> = vec![
+            (0, 0),
+            (1, 0),
+            (0, 5),
+            (1, 4),
+            (0, 1),
+            (1, 1),
+            (0, 6),
+            (1, 5),
+        ];
+        let result = interleave(&[&lv_a as &dyn Array, &lv_b as &dyn Array], 
&indices).unwrap();
+        result
+            .to_data()
+            .validate_full()
+            .expect("result must be valid");
+
+        let result_lv = result.as_list_view::<i32>();
+        assert_eq!(result_lv.len(), 8);
+        assert_eq!(
+            result_lv.value(0).as_primitive::<Int64Type>().values(),
+            &[0, 1, 2, 3, 4]
+        );
+        assert_eq!(
+            result_lv.value(1).as_primitive::<Int64Type>().values(),
+            &[100, 101, 102]
+        );
+        assert_eq!(
+            result_lv.value(2).as_primitive::<Int64Type>().values(),
+            &[5, 6, 7, 8, 9]
+        );
+        assert_eq!(
+            result_lv.value(3).as_primitive::<Int64Type>().values(),
+            &[103, 104, 105]
+        );
+
+        // Backing elements = sum of source arrays (10 + 6 = 16), not per-row
+        // expansion (8 rows × avg ~4 = 32). Overlapping sharing is preserved.
+        let total_input_elements = lv_a.values().len() + lv_b.values().len();
+        assert_eq!(result_lv.values().len(), total_input_elements);
+    }
+
     #[test]
     fn test_struct_without_nulls() {
         let fields = Fields::from(vec![

Reply via email to