This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 5e32cc60e5 Support more operations on ListView (#8645)
5e32cc60e5 is described below

commit 5e32cc60e5e2f3119dc1d56ea925c4daf17f13df
Author: Andrew Duffy <[email protected]>
AuthorDate: Mon Oct 27 14:23:39 2025 -0400

    Support more operations on ListView (#8645)
    
    # Which issue does this PR close?
    
    Part of #5375
    
    Vortex was encountering some issues after we switched our preferred List
    type to `ListView`, the first thing we noticed was that
    `arrow_select::filter_array` would fail on ListView (and LargeListView,
    though we don't use that).
    
    This PR addresses some missing select kernel implementations for
    ListView and LargeListView.
    
    This also fixes an existing bug in the ArrayData validation for ListView
    arrays that would trigger an out of bounds index panic.
    
    # Are these changes tested?
    
    - [x] filter_array
    - [x] concat
    - [x] take
    
    
    # Are there any user-facing changes?
    
    ListView/LargeListView can now be used with the `take`, `concat` and
    `filter_array` kernels
    
    You can now use the `PartialEq` to compare ListView arrays.
    
    ---------
    
    Signed-off-by: Andrew Duffy <[email protected]>
---
 arrow-data/src/data.rs                |  10 +-
 arrow-data/src/equal/list_view.rs     | 129 +++++++++++++++++++++++
 arrow-data/src/equal/mod.rs           |  10 +-
 arrow-data/src/transform/list_view.rs |  56 ++++++++++
 arrow-data/src/transform/mod.rs       |  33 +++---
 arrow-select/src/concat.rs            | 186 +++++++++++++++++++++++++++++++---
 arrow-select/src/filter.rs            |  97 ++++++++++++++++++
 arrow-select/src/take.rs              | 149 +++++++++++++++++++++++++++
 arrow/tests/array_equal.rs            | 129 ++++++++++++++++++++++-
 9 files changed, 767 insertions(+), 32 deletions(-)

diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index c31ac0c6e6..91957e14f3 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -980,7 +980,15 @@ impl ArrayData {
     ) -> Result<(), ArrowError> {
         let offsets: &[T] = self.typed_buffer(0, self.len)?;
         let sizes: &[T] = self.typed_buffer(1, self.len)?;
-        for i in 0..values_length {
+        if offsets.len() != sizes.len() {
+            return Err(ArrowError::ComputeError(format!(
+                "ListView offsets len {} does not match sizes len {}",
+                offsets.len(),
+                sizes.len()
+            )));
+        }
+
+        for i in 0..sizes.len() {
             let size = sizes[i].to_usize().ok_or_else(|| {
                 ArrowError::InvalidArgumentError(format!(
                     "Error converting size[{}] ({}) to usize for {}",
diff --git a/arrow-data/src/equal/list_view.rs 
b/arrow-data/src/equal/list_view.rs
new file mode 100644
index 0000000000..c7cb31db90
--- /dev/null
+++ b/arrow-data/src/equal/list_view.rs
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::ArrayData;
+use crate::data::count_nulls;
+use crate::equal::equal_values;
+use arrow_buffer::ArrowNativeType;
+use num_integer::Integer;
+
+pub(super) fn list_view_equal<T: ArrowNativeType + Integer>(
+    lhs: &ArrayData,
+    rhs: &ArrayData,
+    lhs_start: usize,
+    rhs_start: usize,
+    len: usize,
+) -> bool {
+    let lhs_offsets = lhs.buffer::<T>(0);
+    let lhs_sizes = lhs.buffer::<T>(1);
+
+    let rhs_offsets = rhs.buffer::<T>(0);
+    let rhs_sizes = rhs.buffer::<T>(1);
+
+    let lhs_data = &lhs.child_data()[0];
+    let rhs_data = &rhs.child_data()[0];
+
+    let lhs_null_count = count_nulls(lhs.nulls(), lhs_start, len);
+    let rhs_null_count = count_nulls(rhs.nulls(), rhs_start, len);
+
+    if lhs_null_count != rhs_null_count {
+        return false;
+    }
+
+    if lhs_null_count == 0 {
+        // non-null pathway: all sizes must be equal, and all values must be 
equal
+        let lhs_range_sizes = &lhs_sizes[lhs_start..lhs_start + len];
+        let rhs_range_sizes = &rhs_sizes[rhs_start..rhs_start + len];
+
+        if lhs_range_sizes.len() != rhs_range_sizes.len() {
+            return false;
+        }
+
+        if lhs_range_sizes != rhs_range_sizes {
+            return false;
+        }
+
+        // Check values for equality
+        let lhs_range_offsets = &lhs_offsets[lhs_start..lhs_start + len];
+        let rhs_range_offsets = &rhs_offsets[rhs_start..rhs_start + len];
+
+        if lhs_range_offsets.len() != rhs_range_offsets.len() {
+            return false;
+        }
+
+        for ((&lhs_offset, &rhs_offset), &size) in lhs_range_offsets
+            .iter()
+            .zip(rhs_range_offsets)
+            .zip(lhs_range_sizes)
+        {
+            let lhs_offset = lhs_offset.to_usize().unwrap();
+            let rhs_offset = rhs_offset.to_usize().unwrap();
+            let size = size.to_usize().unwrap();
+
+            // Check if offsets are valid for the given range
+            if !equal_values(lhs_data, rhs_data, lhs_offset, rhs_offset, size) 
{
+                return false;
+            }
+        }
+    } else {
+        // Need to integrate validity check in the inner loop.
+        // non-null pathway: all sizes must be equal, and all values must be 
equal
+        let lhs_range_sizes = &lhs_sizes[lhs_start..lhs_start + len];
+        let rhs_range_sizes = &rhs_sizes[rhs_start..rhs_start + len];
+
+        let lhs_nulls = lhs.nulls().unwrap().slice(lhs_start, len);
+        let rhs_nulls = rhs.nulls().unwrap().slice(rhs_start, len);
+
+        // Sizes can differ if values are null
+        if lhs_range_sizes.len() != rhs_range_sizes.len() {
+            return false;
+        }
+
+        // Check values for equality, with null checking
+        let lhs_range_offsets = &lhs_offsets[lhs_start..lhs_start + len];
+        let rhs_range_offsets = &rhs_offsets[rhs_start..rhs_start + len];
+
+        if lhs_range_offsets.len() != rhs_range_offsets.len() {
+            return false;
+        }
+
+        for (index, ((&lhs_offset, &rhs_offset), &size)) in lhs_range_offsets
+            .iter()
+            .zip(rhs_range_offsets)
+            .zip(lhs_range_sizes)
+            .enumerate()
+        {
+            let lhs_is_null = lhs_nulls.is_null(index);
+            let rhs_is_null = rhs_nulls.is_null(index);
+
+            if lhs_is_null != rhs_is_null {
+                return false;
+            }
+
+            let lhs_offset = lhs_offset.to_usize().unwrap();
+            let rhs_offset = rhs_offset.to_usize().unwrap();
+            let size = size.to_usize().unwrap();
+
+            // Check if values match in the range
+            if !lhs_is_null && !equal_values(lhs_data, rhs_data, lhs_offset, 
rhs_offset, size) {
+                return false;
+            }
+        }
+    }
+
+    true
+}
diff --git a/arrow-data/src/equal/mod.rs b/arrow-data/src/equal/mod.rs
index 1c16ee2f8a..7a310b1240 100644
--- a/arrow-data/src/equal/mod.rs
+++ b/arrow-data/src/equal/mod.rs
@@ -30,6 +30,7 @@ mod dictionary;
 mod fixed_binary;
 mod fixed_list;
 mod list;
+mod list_view;
 mod null;
 mod primitive;
 mod run;
@@ -41,6 +42,8 @@ mod variable_size;
 // these methods assume the same type, len and null count.
 // For this reason, they are not exposed and are instead used
 // to build the generic functions below (`equal_range` and `equal`).
+use self::run::run_equal;
+use crate::equal::list_view::list_view_equal;
 use boolean::boolean_equal;
 use byte_view::byte_view_equal;
 use dictionary::dictionary_equal;
@@ -53,8 +56,6 @@ use structure::struct_equal;
 use union::union_equal;
 use variable_size::variable_sized_equal;
 
-use self::run::run_equal;
-
 /// Compares the values of two [ArrayData] starting at `lhs_start` and 
`rhs_start` respectively
 /// for `len` slots.
 #[inline]
@@ -104,10 +105,9 @@ fn equal_values(
             byte_view_equal(lhs, rhs, lhs_start, rhs_start, len)
         }
         DataType::List(_) => list_equal::<i32>(lhs, rhs, lhs_start, rhs_start, 
len),
-        DataType::ListView(_) | DataType::LargeListView(_) => {
-            unimplemented!("ListView/LargeListView not yet implemented")
-        }
         DataType::LargeList(_) => list_equal::<i64>(lhs, rhs, lhs_start, 
rhs_start, len),
+        DataType::ListView(_) => list_view_equal::<i32>(lhs, rhs, lhs_start, 
rhs_start, len),
+        DataType::LargeListView(_) => list_view_equal::<i64>(lhs, rhs, 
lhs_start, rhs_start, len),
         DataType::FixedSizeList(_, _) => fixed_list_equal(lhs, rhs, lhs_start, 
rhs_start, len),
         DataType::Struct(_) => struct_equal(lhs, rhs, lhs_start, rhs_start, 
len),
         DataType::Union(_, _) => union_equal(lhs, rhs, lhs_start, rhs_start, 
len),
diff --git a/arrow-data/src/transform/list_view.rs 
b/arrow-data/src/transform/list_view.rs
new file mode 100644
index 0000000000..9b66a6a6ab
--- /dev/null
+++ b/arrow-data/src/transform/list_view.rs
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::ArrayData;
+use crate::transform::_MutableArrayData;
+use arrow_buffer::ArrowNativeType;
+use num_integer::Integer;
+use num_traits::CheckedAdd;
+
+pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
+    array: &ArrayData,
+) -> crate::transform::Extend<'_> {
+    let offsets = array.buffer::<T>(0);
+    let sizes = array.buffer::<T>(1);
+    Box::new(
+        move |mutable: &mut _MutableArrayData, _index: usize, start: usize, 
len: usize| {
+            let offset_buffer = &mut mutable.buffer1;
+            let sizes_buffer = &mut mutable.buffer2;
+
+            for &offset in &offsets[start..start + len] {
+                offset_buffer.push(offset);
+            }
+
+            // sizes
+            for &size in &sizes[start..start + len] {
+                sizes_buffer.push(size);
+            }
+
+            // the beauty of views is that we don't need to copy child_data, 
we just splat
+            // the offsets and sizes.
+        },
+    )
+}
+
+pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut 
_MutableArrayData, len: usize) {
+    let offset_buffer = &mut mutable.buffer1;
+    let sizes_buffer = &mut mutable.buffer2;
+
+    // We push 0 as a placeholder for NULL values in both the offsets and sizes
+    (0..len).for_each(|_| offset_buffer.push(T::default()));
+    (0..len).for_each(|_| sizes_buffer.push(T::default()));
+}
diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs
index 5b994046e6..c6052817bf 100644
--- a/arrow-data/src/transform/mod.rs
+++ b/arrow-data/src/transform/mod.rs
@@ -33,6 +33,7 @@ mod boolean;
 mod fixed_binary;
 mod fixed_size_list;
 mod list;
+mod list_view;
 mod null;
 mod primitive;
 mod run;
@@ -265,10 +266,9 @@ fn build_extend(array: &ArrayData) -> Extend<'_> {
         DataType::LargeUtf8 | DataType::LargeBinary => 
variable_size::build_extend::<i64>(array),
         DataType::BinaryView | DataType::Utf8View => unreachable!("should use 
build_extend_view"),
         DataType::Map(_, _) | DataType::List(_) => 
list::build_extend::<i32>(array),
-        DataType::ListView(_) | DataType::LargeListView(_) => {
-            unimplemented!("ListView/LargeListView not implemented")
-        }
         DataType::LargeList(_) => list::build_extend::<i64>(array),
+        DataType::ListView(_) => list_view::build_extend::<i32>(array),
+        DataType::LargeListView(_) => list_view::build_extend::<i64>(array),
         DataType::Dictionary(_, _) => unreachable!("should use 
build_extend_dictionary"),
         DataType::Struct(_) => structure::build_extend(array),
         DataType::FixedSizeBinary(_) => fixed_binary::build_extend(array),
@@ -313,10 +313,9 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls 
{
         DataType::LargeUtf8 | DataType::LargeBinary => 
variable_size::extend_nulls::<i64>,
         DataType::BinaryView | DataType::Utf8View => 
primitive::extend_nulls::<u128>,
         DataType::Map(_, _) | DataType::List(_) => list::extend_nulls::<i32>,
-        DataType::ListView(_) | DataType::LargeListView(_) => {
-            unimplemented!("ListView/LargeListView not implemented")
-        }
         DataType::LargeList(_) => list::extend_nulls::<i64>,
+        DataType::ListView(_) => list_view::extend_nulls::<i32>,
+        DataType::LargeListView(_) => list_view::extend_nulls::<i64>,
         DataType::Dictionary(child_data_type, _) => match 
child_data_type.as_ref() {
             DataType::UInt8 => primitive::extend_nulls::<u8>,
             DataType::UInt16 => primitive::extend_nulls::<u16>,
@@ -450,7 +449,11 @@ impl<'a> MutableArrayData<'a> {
                 new_buffers(data_type, *capacity)
             }
             (
-                DataType::List(_) | DataType::LargeList(_) | 
DataType::FixedSizeList(_, _),
+                DataType::List(_)
+                | DataType::LargeList(_)
+                | DataType::ListView(_)
+                | DataType::LargeListView(_)
+                | DataType::FixedSizeList(_, _),
                 Capacities::List(capacity, _),
             ) => {
                 array_capacity = *capacity;
@@ -491,10 +494,11 @@ impl<'a> MutableArrayData<'a> {
             | DataType::Utf8View
             | DataType::Interval(_)
             | DataType::FixedSizeBinary(_) => vec![],
-            DataType::ListView(_) | DataType::LargeListView(_) => {
-                unimplemented!("ListView/LargeListView not implemented")
-            }
-            DataType::Map(_, _) | DataType::List(_) | DataType::LargeList(_) 
=> {
+            DataType::Map(_, _)
+            | DataType::List(_)
+            | DataType::LargeList(_)
+            | DataType::ListView(_)
+            | DataType::LargeListView(_) => {
                 let children = arrays
                     .iter()
                     .map(|array| &array.child_data()[0])
@@ -785,7 +789,12 @@ impl<'a> MutableArrayData<'a> {
                 b.insert(0, data.buffer1.into());
                 b
             }
-            DataType::Utf8 | DataType::Binary | DataType::LargeUtf8 | 
DataType::LargeBinary => {
+            DataType::Utf8
+            | DataType::Binary
+            | DataType::LargeUtf8
+            | DataType::LargeBinary
+            | DataType::ListView(_)
+            | DataType::LargeListView(_) => {
                 vec![data.buffer1.into(), data.buffer2.into()]
             }
             DataType::Union(_, mode) => {
diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs
index 83bc5c2763..3bfdd31ccf 100644
--- a/arrow-select/src/concat.rs
+++ b/arrow-select/src/concat.rs
@@ -37,7 +37,9 @@ use arrow_array::builder::{
 use arrow_array::cast::AsArray;
 use arrow_array::types::*;
 use arrow_array::*;
-use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, 
OffsetBuffer};
+use arrow_buffer::{
+    ArrowNativeType, BooleanBufferBuilder, MutableBuffer, NullBuffer, 
OffsetBuffer, ScalarBuffer,
+};
 use arrow_data::ArrayDataBuilder;
 use arrow_data::transform::{Capacities, MutableArrayData};
 use arrow_schema::{ArrowError, DataType, FieldRef, Fields, SchemaRef};
@@ -206,6 +208,63 @@ fn concat_lists<OffsetSize: OffsetSizeTrait>(
     Ok(Arc::new(array))
 }
 
+fn concat_list_view<OffsetSize: OffsetSizeTrait>(
+    arrays: &[&dyn Array],
+    field: &FieldRef,
+) -> Result<ArrayRef, ArrowError> {
+    let mut output_len = 0;
+    let mut list_has_nulls = false;
+
+    let lists = arrays
+        .iter()
+        .map(|x| x.as_list_view::<OffsetSize>())
+        .inspect(|l| {
+            output_len += l.len();
+            list_has_nulls |= l.null_count() != 0;
+        })
+        .collect::<Vec<_>>();
+
+    let lists_nulls = list_has_nulls.then(|| {
+        let mut nulls = BooleanBufferBuilder::new(output_len);
+        for l in &lists {
+            match l.nulls() {
+                Some(n) => nulls.append_buffer(n.inner()),
+                None => nulls.append_n(l.len(), true),
+            }
+        }
+        NullBuffer::new(nulls.finish())
+    });
+
+    let values: Vec<&dyn Array> = lists.iter().map(|l| 
l.values().as_ref()).collect();
+
+    let concatenated_values = concat(values.as_slice())?;
+
+    let sizes: ScalarBuffer<OffsetSize> = lists.iter().flat_map(|x| 
x.sizes()).copied().collect();
+
+    let mut offsets = MutableBuffer::with_capacity(lists.iter().map(|l| 
l.offsets().len()).sum());
+    let mut global_offset = OffsetSize::zero();
+    for l in lists.iter() {
+        for &offset in l.offsets() {
+            offsets.push(offset + global_offset);
+        }
+
+        // advance the offsets
+        global_offset += OffsetSize::from_usize(l.values().len()).unwrap();
+    }
+
+    let offsets = ScalarBuffer::from(offsets);
+
+    let array = GenericListViewArray::try_new(
+        field.clone(),
+        offsets,
+        sizes,
+        concatenated_values,
+        lists_nulls,
+    )?;
+
+    Ok(Arc::new(array))
+}
+
 fn concat_primitives<T: ArrowPrimitiveType>(arrays: &[&dyn Array]) -> 
Result<ArrayRef, ArrowError> {
     let mut builder = 
PrimitiveBuilder::<T>::with_capacity(arrays.iter().map(|a| a.len()).sum())
         .with_data_type(arrays[0].data_type().clone());
@@ -422,6 +481,8 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, 
ArrowError> {
         }
         DataType::List(field) => concat_lists::<i32>(arrays, field),
         DataType::LargeList(field) => concat_lists::<i64>(arrays, field),
+        DataType::ListView(field) => concat_list_view::<i32>(arrays, field),
+        DataType::LargeListView(field) => concat_list_view::<i64>(arrays, 
field),
         DataType::Struct(fields) => concat_structs(arrays, fields),
         DataType::Utf8 => concat_bytes::<Utf8Type>(arrays),
         DataType::LargeUtf8 => concat_bytes::<LargeUtf8Type>(arrays),
@@ -500,7 +561,9 @@ pub fn concat_batches<'a>(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow_array::builder::{GenericListBuilder, StringDictionaryBuilder};
+    use arrow_array::builder::{
+        GenericListBuilder, Int64Builder, ListViewBuilder, 
StringDictionaryBuilder,
+    };
     use arrow_schema::{Field, Schema};
     use std::fmt::Debug;
 
@@ -768,7 +831,7 @@ mod tests {
 
     #[test]
     fn test_concat_primitive_list_arrays() {
-        let list1 = vec![
+        let list1 = [
             Some(vec![Some(-1), Some(-1), Some(2), None, None]),
             Some(vec![]),
             None,
@@ -776,14 +839,14 @@ mod tests {
         ];
         let list1_array = ListArray::from_iter_primitive::<Int64Type, _, 
_>(list1.clone());
 
-        let list2 = vec![
+        let list2 = [
             None,
             Some(vec![Some(100), None, Some(101)]),
             Some(vec![Some(102)]),
         ];
         let list2_array = ListArray::from_iter_primitive::<Int64Type, _, 
_>(list2.clone());
 
-        let list3 = vec![Some(vec![Some(1000), Some(1001)])];
+        let list3 = [Some(vec![Some(1000), Some(1001)])];
         let list3_array = ListArray::from_iter_primitive::<Int64Type, _, 
_>(list3.clone());
 
         let array_result = concat(&[&list1_array, &list2_array, 
&list3_array]).unwrap();
@@ -796,7 +859,7 @@ mod tests {
 
     #[test]
     fn test_concat_primitive_list_arrays_slices() {
-        let list1 = vec![
+        let list1 = [
             Some(vec![Some(-1), Some(-1), Some(2), None, None]),
             Some(vec![]), // In slice
             None,         // In slice
@@ -806,7 +869,7 @@ mod tests {
         let list1_array = list1_array.slice(1, 2);
         let list1_values = list1.into_iter().skip(1).take(2);
 
-        let list2 = vec![
+        let list2 = [
             None,
             Some(vec![Some(100), None, Some(101)]),
             Some(vec![Some(102)]),
@@ -825,7 +888,7 @@ mod tests {
 
     #[test]
     fn test_concat_primitive_list_arrays_sliced_lengths() {
-        let list1 = vec![
+        let list1 = [
             Some(vec![Some(-1), Some(-1), Some(2), None, None]), // In slice
             Some(vec![]),                                        // In slice
             None,                                                // In slice
@@ -835,7 +898,7 @@ mod tests {
         let list1_array = list1_array.slice(0, 3); // no offset, but not all 
values
         let list1_values = list1.into_iter().take(3);
 
-        let list2 = vec![
+        let list2 = [
             None,
             Some(vec![Some(100), None, Some(101)]),
             Some(vec![Some(102)]),
@@ -856,7 +919,7 @@ mod tests {
 
     #[test]
     fn test_concat_primitive_fixed_size_list_arrays() {
-        let list1 = vec![
+        let list1 = [
             Some(vec![Some(-1), None]),
             None,
             Some(vec![Some(10), Some(20)]),
@@ -864,7 +927,7 @@ mod tests {
         let list1_array =
             FixedSizeListArray::from_iter_primitive::<Int64Type, _, 
_>(list1.clone(), 2);
 
-        let list2 = vec![
+        let list2 = [
             None,
             Some(vec![Some(100), None]),
             Some(vec![Some(102), Some(103)]),
@@ -872,7 +935,7 @@ mod tests {
         let list2_array =
             FixedSizeListArray::from_iter_primitive::<Int64Type, _, 
_>(list2.clone(), 2);
 
-        let list3 = vec![Some(vec![Some(1000), Some(1001)])];
+        let list3 = [Some(vec![Some(1000), Some(1001)])];
         let list3_array =
             FixedSizeListArray::from_iter_primitive::<Int64Type, _, 
_>(list3.clone(), 2);
 
@@ -885,6 +948,105 @@ mod tests {
         assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
     }
 
+    #[test]
+    fn test_concat_list_view_arrays() {
+        let list1 = [
+            Some(vec![Some(-1), None]),
+            None,
+            Some(vec![Some(10), Some(20)]),
+        ];
+        let mut list1_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list1.iter() {
+            list1_array.append_option(v.clone());
+        }
+        let list1_array = list1_array.finish();
+
+        let list2 = [
+            None,
+            Some(vec![Some(100), None]),
+            Some(vec![Some(102), Some(103)]),
+        ];
+        let mut list2_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list2.iter() {
+            list2_array.append_option(v.clone());
+        }
+        let list2_array = list2_array.finish();
+
+        let list3 = [Some(vec![Some(1000), Some(1001)])];
+        let mut list3_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list3.iter() {
+            list3_array.append_option(v.clone());
+        }
+        let list3_array = list3_array.finish();
+
+        let array_result = concat(&[&list1_array, &list2_array, 
&list3_array]).unwrap();
+
+        let expected: Vec<_> = 
list1.into_iter().chain(list2).chain(list3).collect();
+        let mut array_expected = ListViewBuilder::new(Int64Builder::new());
+        for v in expected.iter() {
+            array_expected.append_option(v.clone());
+        }
+        let array_expected = array_expected.finish();
+
+        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
+    }
+
+    #[test]
+    fn test_concat_sliced_list_view_arrays() {
+        let list1 = [
+            Some(vec![Some(-1), None]),
+            None,
+            Some(vec![Some(10), Some(20)]),
+        ];
+        let mut list1_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list1.iter() {
+            list1_array.append_option(v.clone());
+        }
+        let list1_array = list1_array.finish();
+
+        let list2 = [
+            None,
+            Some(vec![Some(100), None]),
+            Some(vec![Some(102), Some(103)]),
+        ];
+        let mut list2_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list2.iter() {
+            list2_array.append_option(v.clone());
+        }
+        let list2_array = list2_array.finish();
+
+        let list3 = [Some(vec![Some(1000), Some(1001)])];
+        let mut list3_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list3.iter() {
+            list3_array.append_option(v.clone());
+        }
+        let list3_array = list3_array.finish();
+
+        // Concat sliced arrays.
+        // ListView slicing will slice the offset/sizes but preserve the 
original values child.
+        let array_result = concat(&[
+            &list1_array.slice(1, 2),
+            &list2_array.slice(1, 2),
+            &list3_array.slice(0, 1),
+        ])
+        .unwrap();
+
+        let expected: Vec<_> = vec![
+            None,
+            Some(vec![Some(10), Some(20)]),
+            Some(vec![Some(100), None]),
+            Some(vec![Some(102), Some(103)]),
+            Some(vec![Some(1000), Some(1001)]),
+        ];
+        let mut array_expected = ListViewBuilder::new(Int64Builder::new());
+        for v in expected.iter() {
+            array_expected.append_option(v.clone());
+        }
+        let array_expected = array_expected.finish();
+
+        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
+    }
+
     #[test]
     fn test_concat_struct_arrays() {
         let field = Arc::new(Field::new("field", DataType::Int64, true));
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 5c21a4adca..cd132de39d 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -380,6 +380,12 @@ fn filter_array(values: &dyn Array, predicate: 
&FilterPredicate) -> Result<Array
             DataType::FixedSizeBinary(_) => {
                 
Ok(Arc::new(filter_fixed_size_binary(values.as_fixed_size_binary(), predicate)))
             }
+            DataType::ListView(_) => {
+                Ok(Arc::new(filter_list_view::<i32>(values.as_list_view(), 
predicate)))
+            }
+            DataType::LargeListView(_) => {
+                Ok(Arc::new(filter_list_view::<i64>(values.as_list_view(), 
predicate)))
+            }
             DataType::RunEndEncoded(_, _) => {
                 downcast_run_array!{
                     values => Ok(Arc::new(filter_run_end_array(values, 
predicate)?)),
@@ -894,6 +900,34 @@ fn filter_sparse_union(
     })
 }
 
+/// `filter` implementation for list views
+fn filter_list_view<OffsetType: OffsetSizeTrait>(
+    array: &GenericListViewArray<OffsetType>,
+    predicate: &FilterPredicate,
+) -> GenericListViewArray<OffsetType> {
+    let filtered_offsets = filter_native::<OffsetType>(array.offsets(), 
predicate);
+    let filtered_sizes = filter_native::<OffsetType>(array.sizes(), predicate);
+
+    // Filter the nulls
+    let nulls = if let Some((null_count, nulls)) = 
filter_null_mask(array.nulls(), predicate) {
+        let buffer = BooleanBuffer::new(nulls, 0, predicate.count);
+
+        Some(unsafe { NullBuffer::new_unchecked(buffer, null_count) })
+    } else {
+        None
+    };
+
+    let list_data = ArrayDataBuilder::new(array.data_type().clone())
+        .nulls(nulls)
+        .buffers(vec![filtered_offsets, filtered_sizes])
+        .child_data(vec![array.values().to_data()])
+        .len(predicate.count);
+
+    let list_data = unsafe { list_data.build_unchecked() };
+
+    GenericListViewArray::from(list_data)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1404,6 +1438,69 @@ mod tests {
         assert_eq!(&make_array(expected), &result);
     }
 
+    fn test_case_filter_list_view<T: OffsetSizeTrait>() {
+        // [[1, 2], null, [], [3,4]]
+        let mut list_array = GenericListViewBuilder::<T, 
_>::new(Int32Builder::new());
+        list_array.append_value([Some(1), Some(2)]);
+        list_array.append_null();
+        list_array.append_value([]);
+        list_array.append_value([Some(3), Some(4)]);
+
+        let list_array = list_array.finish();
+        let predicate = BooleanArray::from_iter([true, false, true, false]);
+
+        // Filter result: [[1, 2], []]
+        let filtered = filter(&list_array, &predicate)
+            .unwrap()
+            .as_list_view::<T>()
+            .clone();
+
+        let mut expected =
+            GenericListViewBuilder::<T, 
_>::with_capacity(Int32Builder::with_capacity(5), 3);
+        expected.append_value([Some(1), Some(2)]);
+        expected.append_value([]);
+        let expected = expected.finish();
+
+        assert_eq!(&filtered, &expected);
+    }
+
+    fn test_case_filter_sliced_list_view<T: OffsetSizeTrait>() {
+        // [[1, 2], null, [], [3,4]]
+        let mut list_array =
+            GenericListViewBuilder::<T, 
_>::with_capacity(Int32Builder::with_capacity(6), 4);
+        list_array.append_value([Some(1), Some(2)]);
+        list_array.append_null();
+        list_array.append_value([]);
+        list_array.append_value([Some(3), Some(4)]);
+
+        let list_array = list_array.finish();
+
+        // Sliced: [null, [], [3, 4]]
+        let sliced = list_array.slice(1, 3);
+        let predicate = BooleanArray::from_iter([false, false, true]);
+
+        // Filter result: [[1, 2], []]
+        let filtered = filter(&sliced, &predicate)
+            .unwrap()
+            .as_list_view::<T>()
+            .clone();
+
+        let mut expected = GenericListViewBuilder::<T, 
_>::new(Int32Builder::new());
+        expected.append_value([Some(3), Some(4)]);
+        let expected = expected.finish();
+
+        assert_eq!(&filtered, &expected);
+    }
+
+    #[test]
+    fn test_filter_list_view_array() {
+        test_case_filter_list_view::<i32>();
+        test_case_filter_list_view::<i64>();
+
+        test_case_filter_sliced_list_view::<i32>();
+        test_case_filter_sliced_list_view::<i64>();
+    }
+
     #[test]
     fn test_slice_iterator_bits() {
         let filter_values = (0..64).map(|i| i == 1).collect::<Vec<bool>>();
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index dfe6903dc4..eec4ffa14e 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -218,6 +218,12 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
         DataType::LargeList(_) => {
             Ok(Arc::new(take_list::<_, Int64Type>(values.as_list(), indices)?))
         }
+        DataType::ListView(_) => {
+            Ok(Arc::new(take_list_view::<_, Int32Type>(values.as_list_view(), 
indices)?))
+        }
+        DataType::LargeListView(_) => {
+            Ok(Arc::new(take_list_view::<_, Int64Type>(values.as_list_view(), 
indices)?))
+        }
         DataType::FixedSizeList(_, length) => {
             let values = values
                 .as_any()
@@ -621,6 +627,33 @@ where
     Ok(GenericListArray::<OffsetType::Native>::from(list_data))
 }
 
+fn take_list_view<IndexType, OffsetType>(
+    values: &GenericListViewArray<OffsetType::Native>,
+    indices: &PrimitiveArray<IndexType>,
+) -> Result<GenericListViewArray<OffsetType::Native>, ArrowError>
+where
+    IndexType: ArrowPrimitiveType,
+    OffsetType: ArrowPrimitiveType,
+    OffsetType::Native: OffsetSizeTrait,
+{
+    let taken_offsets = take_native(values.offsets(), indices);
+    let taken_sizes = take_native(values.sizes(), indices);
+    let nulls = take_nulls(values.nulls(), indices);
+
+    let list_view_data = ArrayDataBuilder::new(values.data_type().clone())
+        .len(indices.len())
+        .nulls(nulls)
+        .buffers(vec![taken_offsets.into(), taken_sizes.into()])
+        .child_data(vec![values.values().to_data()]);
+
+    // SAFETY: all buffers and child nodes for ListView added in constructor
+    let list_view_data = unsafe { list_view_data.build_unchecked() };
+
+    Ok(GenericListViewArray::<OffsetType::Native>::from(
+        list_view_data,
+    ))
+}
+
 /// `take` implementation for `FixedSizeListArray`
 ///
 /// Calculates the index and indexed offset for the inner array,
@@ -980,6 +1013,7 @@ mod tests {
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
     use arrow_data::ArrayData;
     use arrow_schema::{Field, Fields, TimeUnit, UnionFields};
+    use num_traits::ToPrimitive;
 
     fn test_take_decimal_arrays(
         data: Vec<Option<i128>>,
@@ -1821,6 +1855,55 @@ mod tests {
         }};
     }
 
+    fn test_take_list_view_generic<OffsetType: OffsetSizeTrait, ValuesType: 
ArrowPrimitiveType, F>(
+        values: Vec<Option<Vec<Option<ValuesType::Native>>>>,
+        take_indices: Vec<Option<usize>>,
+        expected: Vec<Option<Vec<Option<ValuesType::Native>>>>,
+        mapper: F,
+    ) where
+        F: Fn(GenericListViewArray<OffsetType>) -> 
GenericListViewArray<OffsetType>,
+    {
+        let mut list_view_array =
+            GenericListViewBuilder::<OffsetType, 
_>::new(PrimitiveBuilder::<ValuesType>::new());
+
+        for value in values {
+            list_view_array.append_option(value);
+        }
+        let list_view_array = list_view_array.finish();
+        let list_view_array = mapper(list_view_array);
+
+        let mut indices = UInt64Builder::new();
+        for idx in take_indices {
+            indices.append_option(idx.map(|i| i.to_u64().unwrap()));
+        }
+        let indices = indices.finish();
+
+        let taken = take(&list_view_array, &indices, None)
+            .unwrap()
+            .as_list_view()
+            .clone();
+
+        let mut expected_array =
+            GenericListViewBuilder::<OffsetType, 
_>::new(PrimitiveBuilder::<ValuesType>::new());
+        for value in expected {
+            expected_array.append_option(value);
+        }
+        let expected_array = expected_array.finish();
+
+        assert_eq!(taken, expected_array);
+    }
+
+    macro_rules! list_view_test_case {
+        (values: $values:expr, indices: $indices:expr, expected: $expected: 
expr) => {{
+            test_take_list_view_generic::<i32, Int8Type, _>($values, $indices, 
$expected, |x| x);
+            test_take_list_view_generic::<i64, Int8Type, _>($values, $indices, 
$expected, |x| x);
+        }};
+        (values: $values:expr, transform: $fn:expr, indices: $indices:expr, 
expected: $expected: expr) => {{
+            test_take_list_view_generic::<i32, Int8Type, _>($values, $indices, 
$expected, $fn);
+            test_take_list_view_generic::<i64, Int8Type, _>($values, $indices, 
$expected, $fn);
+        }};
+    }
+
     fn do_take_fixed_size_list_test<T>(
         length: <Int32Type as ArrowPrimitiveType>::Native,
         input_data: Vec<Option<Vec<Option<T::Native>>>>,
@@ -1871,6 +1954,72 @@ mod tests {
         test_take_list_with_nulls!(i64, LargeList, LargeListArray);
     }
 
+    #[test]
+    fn test_test_take_list_view_reversed() {
+        // Take reversed indices
+        list_view_test_case! {
+            values: vec![
+                Some(vec![Some(1), None, Some(3)]),
+                None,
+                Some(vec![Some(7), Some(8), None]),
+            ],
+            indices: vec![Some(2), Some(1), Some(0)],
+            expected: vec![
+                Some(vec![Some(7), Some(8), None]),
+                None,
+                Some(vec![Some(1), None, Some(3)]),
+            ]
+        }
+    }
+
+    #[test]
+    fn test_take_list_view_null_indices() {
+        // Take with null indices
+        list_view_test_case! {
+            values: vec![
+                Some(vec![Some(1), None, Some(3)]),
+                None,
+                Some(vec![Some(7), Some(8), None]),
+            ],
+            indices: vec![None, Some(0), None],
+            expected: vec![None, Some(vec![Some(1), None, Some(3)]), None]
+        }
+    }
+
+    #[test]
+    fn test_take_list_view_null_values() {
+        // Take at null values
+        list_view_test_case! {
+            values: vec![
+                Some(vec![Some(1), None, Some(3)]),
+                None,
+                Some(vec![Some(7), Some(8), None]),
+            ],
+            indices: vec![Some(1), Some(1), Some(1), None, None],
+            expected: vec![None; 5]
+        }
+    }
+
+    #[test]
+    fn test_take_list_view_sliced() {
+        // Take null indices/values, with slicing.
+        list_view_test_case! {
+            values: vec![
+                Some(vec![Some(1)]),
+                None,
+                None,
+                Some(vec![Some(2), Some(3)]),
+                Some(vec![Some(4), Some(5)]),
+                None,
+            ],
+            transform: |l| l.slice(2, 4),
+            indices: vec![Some(0), Some(3), None, Some(1), Some(2)],
+            expected: vec![
+                None, None, None, Some(vec![Some(2), Some(3)]), 
Some(vec![Some(4), Some(5)])
+            ]
+        }
+    }
+
     #[test]
     fn test_take_fixed_size_list() {
         do_take_fixed_size_list_test::<Int32Type>(
diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs
index 7fc8b0be7a..381054a25d 100644
--- a/arrow/tests/array_equal.rs
+++ b/arrow/tests/array_equal.rs
@@ -22,11 +22,17 @@ use arrow::array::{
     StringDictionaryBuilder, StructArray, UnionBuilder, make_array,
 };
 use arrow::datatypes::{Int16Type, Int32Type};
-use arrow_array::builder::{StringBuilder, StringViewBuilder, StructBuilder};
-use arrow_array::{DictionaryArray, FixedSizeListArray, StringViewArray};
+use arrow_array::builder::{
+    GenericListViewBuilder, StringBuilder, StringViewBuilder, StructBuilder,
+};
+use arrow_array::cast::AsArray;
+use arrow_array::{
+    DictionaryArray, FixedSizeListArray, GenericListViewArray, PrimitiveArray, 
StringViewArray,
+};
 use arrow_buffer::{Buffer, ToByteSlice};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{DataType, Field, Fields};
+use arrow_select::take::take;
 use std::sync::Arc;
 
 #[test]
@@ -756,6 +762,125 @@ fn test_fixed_list_offsets() {
     test_equal(&a_slice, &b_slice, true);
 }
 
+fn create_list_view_array<
+    O: OffsetSizeTrait,
+    U: IntoIterator<Item = Option<i32>>,
+    T: IntoIterator<Item = Option<U>>,
+>(
+    data: T,
+) -> GenericListViewArray<O> {
+    let mut builder = GenericListViewBuilder::<O, _>::new(Int32Builder::new());
+    for d in data {
+        if let Some(v) = d {
+            builder.append_value(v);
+        } else {
+            builder.append_null();
+        }
+    }
+
+    builder.finish()
+}
+
+fn test_test_list_view_array<T: OffsetSizeTrait>() {
+    let a = create_list_view_array::<T, _, _>([
+        None,
+        Some(vec![Some(1), None, Some(2)]),
+        Some(vec![Some(3), Some(4), Some(5), None]),
+    ]);
+    let b = create_list_view_array::<T, _, _>([
+        None,
+        Some(vec![Some(1), None, Some(2)]),
+        Some(vec![Some(3), Some(4), Some(5), None]),
+    ]);
+
+    test_equal(&a, &b, true);
+
+    // Simple non-matching arrays by reordering
+    let b = create_list_view_array::<T, _, _>([
+        Some(vec![Some(3), Some(4), Some(5), None]),
+        Some(vec![Some(1), None, Some(2)]),
+    ]);
+    test_equal(&a, &b, false);
+
+    // reorder using take yields equal values
+    let indices: PrimitiveArray<Int32Type> = vec![None, Some(1), 
Some(0)].into();
+    let b = take(&b, &indices, None)
+        .unwrap()
+        .as_list_view::<T>()
+        .clone();
+
+    test_equal(&a, &b, true);
+
+    // Slicing one side yields unequal again
+    let a = a.slice(1, 2);
+
+    test_equal(&a, &b, false);
+
+    // Slicing the other to match makes them equal again
+    let b = b.slice(1, 2);
+
+    test_equal(&a, &b, true);
+}
+
+// Special test for List<ListView<i32>>.
+// This tests the equal_ranges kernel
+fn test_sliced_list_of_list_view<T: OffsetSizeTrait>() {
+    // First list view is created using the builder, with elements not 
deduplicated.
+    let mut a = ListBuilder::new(GenericListViewBuilder::<T, 
_>::new(Int32Builder::new()));
+
+    a.append_value([Some(vec![Some(1), Some(2), Some(3)]), Some(vec![])]);
+    a.append_null();
+    a.append_value([
+        Some(vec![Some(1), Some(2), Some(3)]),
+        None,
+        Some(vec![Some(6)]),
+    ]);
+
+    let a = a.finish();
+    // a = [[[1,2,3], []], null, [[4, null], [5], null, [6]]]
+
+    // First list view is created using the builder, with elements not 
deduplicated.
+    let mut b = ListBuilder::new(GenericListViewBuilder::<T, 
_>::new(Int32Builder::new()));
+
+    // Add an extra row that we will slice off, adjust the List offsets
+    b.append_value([Some(vec![Some(0), Some(0), Some(0)])]);
+    b.append_value([Some(vec![Some(1), Some(2), Some(3)]), Some(vec![])]);
+    b.append_null();
+    b.append_value([
+        Some(vec![Some(1), Some(2), Some(3)]),
+        None,
+        Some(vec![Some(6)]),
+    ]);
+
+    let b = b.finish();
+    // b = [[[0, 0, 0]], [[1,2,3], []], null, [[4, null], [5], null, [6]]]
+    let b = b.slice(1, 3);
+    // b = [[[1,2,3], []], null, [[4, null], [5], null, [6]]] but the outer 
ListArray
+    // has an offset
+
+    test_equal(&a, &b, true);
+}
+
+#[test]
+fn test_list_view_array() {
+    test_test_list_view_array::<i32>();
+}
+
+#[test]
+fn test_large_list_view_array() {
+    test_test_list_view_array::<i64>();
+}
+
+#[test]
+fn test_nested_list_view_array() {
+    test_sliced_list_of_list_view::<i32>();
+}
+
+#[test]
+fn test_nested_large_list_view_array() {
+    test_sliced_list_of_list_view::<i64>();
+}
+
 #[test]
 fn test_struct_equal() {
     let strings: ArrayRef = Arc::new(StringArray::from(vec![


Reply via email to