This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 62df32e29d Add benchmark for casting to RunEndEncoded (REE) (#8710)
62df32e29d is described below
commit 62df32e29d03b69c3eeeb12268fdd539ebd00098
Author: Vegard Stikbakke <[email protected]>
AuthorDate: Mon Oct 27 20:24:02 2025 +0100
Add benchmark for casting to RunEndEncoded (REE) (#8710)
Closes #8709.
Adds bench `cast_ree` which can be run with `cargo bench --bench
cast_ree`.
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow-cast/Cargo.toml | 1 +
arrow/benches/cast_kernels.rs | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 41 insertions(+)
diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml
index f3309783fb..fb5ad1af3d 100644
--- a/arrow-cast/Cargo.toml
+++ b/arrow-cast/Cargo.toml
@@ -75,3 +75,4 @@ harness = false
[[bench]]
name = "parse_decimal"
harness = false
+
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index a54529c8d1..040c118a1e 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -359,6 +359,46 @@ fn add_benchmark(c: &mut Criterion) {
c.bench_function("cast binary view to string view", |b| {
b.iter(|| cast_array(&binary_view_array, DataType::Utf8View))
});
+
+ c.bench_function("cast string single run to ree<int32>", |b| {
+ let source_array = StringArray::from(vec!["a"; 8192]);
+ let array_ref = Arc::new(source_array) as ArrayRef;
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ Arc::new(Field::new("values", DataType::Utf8, true)),
+ );
+ b.iter(|| cast(&array_ref, &target_type).unwrap());
+ });
+
+ c.bench_function("cast runs of 10 string to ree<int32>", |b| {
+ let source_array: Int32Array = (0..8192).map(|i| i / 10).collect();
+ let array_ref = Arc::new(source_array) as ArrayRef;
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ Arc::new(Field::new("values", DataType::Int32, true)),
+ );
+ b.iter(|| cast(&array_ref, &target_type).unwrap());
+ });
+
+ c.bench_function("cast runs of 1000 int32s to ree<int32>", |b| {
+ let source_array: Int32Array = (0..8192).map(|i| i / 1000).collect();
+ let array_ref = Arc::new(source_array) as ArrayRef;
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ Arc::new(Field::new("values", DataType::Int32, true)),
+ );
+ b.iter(|| cast(&array_ref, &target_type).unwrap());
+ });
+
+ c.bench_function("cast no runs of int32s to ree<int32>", |b| {
+ let source_array: Int32Array = (0..8192).collect();
+ let array_ref = Arc::new(source_array) as ArrayRef;
+ let target_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ Arc::new(Field::new("values", DataType::Int32, true)),
+ );
+ b.iter(|| cast(&array_ref, &target_type).unwrap());
+ });
}
criterion_group!(benches, add_benchmark);