(arrow-rs) branch main updated: Add List to `interleave_kernels` benchmark (#8980)

dheres Thu, 11 Dec 2025 02:21:06 -0800

This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git



The following commit(s) were added to refs/heads/main by this push:
     new 026a2606ae Add List to `interleave_kernels` benchmark (#8980)
026a2606ae is described below

commit 026a2606aee66ade1abf3716622b1de872368baf
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Dec 11 05:20:54 2025 -0500

    Add List to `interleave_kernels` benchmark (#8980)
    
    # Which issue does this PR close?
    
    
    - Part of  https://github.com/apache/arrow-rs/pull/8953
    
    # Rationale for this change
    
    While reviewing https://github.com/apache/arrow-rs/pull/8953 from
    @asubiotto I noticed there was no benchmark for interleave with
    ListArray. Let's add some so we can evaluate the performance impact of
    ttps://github.com/apache/arrow-rs/pull/8953 and future changes.
    
    # What changes are included in this PR?
    
    Add benchmark for list interleaving
    
    # Are these changes tested?
    I ran the bechmarks manually
    ```shell
    cargo bench --bench interleave_kernels -- list
    ```
    
    # Are there any user-facing changes?
    
    No
---
 arrow/benches/interleave_kernels.rs |  7 ++++++
 arrow/src/util/bench_util.rs        | 43 +++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/arrow/benches/interleave_kernels.rs 
b/arrow/benches/interleave_kernels.rs
index f906416acb..8daf42a144 100644
--- a/arrow/benches/interleave_kernels.rs
+++ b/arrow/benches/interleave_kernels.rs
@@ -116,6 +116,11 @@ fn add_benchmark(c: &mut Criterion) {
 
     let string_view = create_string_view_array(1024, 0.0);
 
+    // use 8192 as a standard list size for better coverage
+    let list_i64 = create_primitive_list_array_with_seed::<i32, 
Int64Type>(8192, 0.1, 0.1, 20, 42);
+    let list_i64_no_nulls =
+        create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.0, 
0.0, 20, 42);
+
     let cases: &[(&str, &dyn Array)] = &[
         ("i32(0.0)", &i32),
         ("i32(0.5)", &i32_opt),
@@ -136,6 +141,8 @@ fn add_benchmark(c: &mut Criterion) {
             "struct(i32(0.0), str(20, 0.0)",
             &struct_i32_no_nulls_string_no_nulls,
         ),
+        ("list<i64>(0.1,0.1,20)", &list_i64),
+        ("list<i64>(0.0,0.0,20)", &list_i64_no_nulls),
     ];
 
     for (prefix, base) in cases {
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index d85eb4aafd..9f83a50f4f 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -398,6 +398,49 @@ pub fn create_string_dict_array<K: ArrowDictionaryKeyType>(
     data.iter().map(|x| x.as_deref()).collect()
 }
 
+/// Create a List/LargeList Array  of primitive values
+///
+/// Arguments:
+/// - `size`: number of lists in the array
+/// - `null_density`: density of nulls in the list array
+/// - `list_null_density`: density of nulls in the primitive arrays inside the 
lists
+/// - `max_list_size`: maximum size of each list (actual size is random 
between 0 and max_list_size)
+/// - `seed`: seed for the random number generator
+pub fn create_primitive_list_array_with_seed<O, T>(
+    size: usize,
+    null_density: f32,
+    list_null_density: f32,
+    max_list_size: usize,
+    seed: u64,
+) -> GenericListArray<O>
+where
+    O: OffsetSizeTrait,
+    T: ArrowPrimitiveType,
+    StandardUniform: Distribution<T::Native>,
+{
+    let mut rng = StdRng::seed_from_u64(seed);
+
+    let values = (0..size).map(|_| {
+        if rng.random::<f32>() < null_density {
+            None
+        } else {
+            let list_size = rng.random_range(0..=max_list_size);
+            let list_values: Vec<Option<T::Native>> = (0..list_size)
+                .map(|_| {
+                    if rng.random::<f32>() < list_null_density {
+                        None
+                    } else {
+                        Some(rng.random())
+                    }
+                })
+                .collect();
+            Some(list_values)
+        }
+    });
+
+    GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
+}
+
 /// Create primitive run array for given logical and physical array lengths
 pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
     logical_array_len: usize,

(arrow-rs) branch main updated: Add List to `interleave_kernels` benchmark (#8980)

Reply via email to