This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 026a2606ae Add List to `interleave_kernels` benchmark (#8980)
026a2606ae is described below
commit 026a2606aee66ade1abf3716622b1de872368baf
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Dec 11 05:20:54 2025 -0500
Add List to `interleave_kernels` benchmark (#8980)
# Which issue does this PR close?
- Part of https://github.com/apache/arrow-rs/pull/8953
# Rationale for this change
While reviewing https://github.com/apache/arrow-rs/pull/8953 from
@asubiotto I noticed there was no benchmark for interleave with
ListArray. Let's add some so we can evaluate the performance impact of
ttps://github.com/apache/arrow-rs/pull/8953 and future changes.
# What changes are included in this PR?
Add benchmark for list interleaving
# Are these changes tested?
I ran the bechmarks manually
```shell
cargo bench --bench interleave_kernels -- list
```
# Are there any user-facing changes?
No
---
arrow/benches/interleave_kernels.rs | 7 ++++++
arrow/src/util/bench_util.rs | 43 +++++++++++++++++++++++++++++++++++++
2 files changed, 50 insertions(+)
diff --git a/arrow/benches/interleave_kernels.rs
b/arrow/benches/interleave_kernels.rs
index f906416acb..8daf42a144 100644
--- a/arrow/benches/interleave_kernels.rs
+++ b/arrow/benches/interleave_kernels.rs
@@ -116,6 +116,11 @@ fn add_benchmark(c: &mut Criterion) {
let string_view = create_string_view_array(1024, 0.0);
+ // use 8192 as a standard list size for better coverage
+ let list_i64 = create_primitive_list_array_with_seed::<i32,
Int64Type>(8192, 0.1, 0.1, 20, 42);
+ let list_i64_no_nulls =
+ create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.0,
0.0, 20, 42);
+
let cases: &[(&str, &dyn Array)] = &[
("i32(0.0)", &i32),
("i32(0.5)", &i32_opt),
@@ -136,6 +141,8 @@ fn add_benchmark(c: &mut Criterion) {
"struct(i32(0.0), str(20, 0.0)",
&struct_i32_no_nulls_string_no_nulls,
),
+ ("list<i64>(0.1,0.1,20)", &list_i64),
+ ("list<i64>(0.0,0.0,20)", &list_i64_no_nulls),
];
for (prefix, base) in cases {
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index d85eb4aafd..9f83a50f4f 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -398,6 +398,49 @@ pub fn create_string_dict_array<K: ArrowDictionaryKeyType>(
data.iter().map(|x| x.as_deref()).collect()
}
+/// Create a List/LargeList Array of primitive values
+///
+/// Arguments:
+/// - `size`: number of lists in the array
+/// - `null_density`: density of nulls in the list array
+/// - `list_null_density`: density of nulls in the primitive arrays inside the
lists
+/// - `max_list_size`: maximum size of each list (actual size is random
between 0 and max_list_size)
+/// - `seed`: seed for the random number generator
+pub fn create_primitive_list_array_with_seed<O, T>(
+ size: usize,
+ null_density: f32,
+ list_null_density: f32,
+ max_list_size: usize,
+ seed: u64,
+) -> GenericListArray<O>
+where
+ O: OffsetSizeTrait,
+ T: ArrowPrimitiveType,
+ StandardUniform: Distribution<T::Native>,
+{
+ let mut rng = StdRng::seed_from_u64(seed);
+
+ let values = (0..size).map(|_| {
+ if rng.random::<f32>() < null_density {
+ None
+ } else {
+ let list_size = rng.random_range(0..=max_list_size);
+ let list_values: Vec<Option<T::Native>> = (0..list_size)
+ .map(|_| {
+ if rng.random::<f32>() < list_null_density {
+ None
+ } else {
+ Some(rng.random())
+ }
+ })
+ .collect();
+ Some(list_values)
+ }
+ });
+
+ GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
+}
+
/// Create primitive run array for given logical and physical array lengths
pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
logical_array_len: usize,