This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new cfc2b88d1d Add coalesce inline-view filter benchmarks (#10050)
cfc2b88d1d is described below
commit cfc2b88d1d4e95e2de0dc5eeba4c23694058a903
Author: ClSlaid <[email protected]>
AuthorDate: Wed Jun 3 05:09:37 2026 +0800
Add coalesce inline-view filter benchmarks (#10050)
This is a benchmark-only companion patch for
https://github.com/apache/arrow-rs/pull/9755.
It keeps the functional changes out of this PR and only adds benchmark
coverage in `arrow/benches/coalesce_kernels.rs` so the coalesce
inline-view filter work can be tested independently.
Benchmark coverage included:
- filter and take coalesce benchmarks
- primitive schemas
- single-column `Utf8View` and `BinaryView`
- mixed primitive + `Utf8View` and primitive + `BinaryView` schemas
- filter cases for short inline strings with `max_string_len=8`
- filter/take cases for longer view strings, including
`max_string_len=20`, `30`, and `128` depending on scenario
Coverage note:
- The filter benchmarks cover the main short-inline path targeted by
#9755 for both `Utf8View` and `BinaryView`.
- The take benchmarks cover `Utf8View`/`BinaryView` and mixed schemas,
but do not add `max_string_len=8` take variants. This patch keeps the
benchmark changes aligned with the benchmark patch currently carried by
#9755.
Validation:
```text
cargo fmt --package arrow
cargo bench --bench coalesce_kernels -- --list
git diff --check
```
---
arrow/benches/coalesce_kernels.rs | 98 +++++++++++++++++++++++++++++++++++++++
1 file changed, 98 insertions(+)
diff --git a/arrow/benches/coalesce_kernels.rs
b/arrow/benches/coalesce_kernels.rs
index 0816d1a2e8..4affcc346e 100644
--- a/arrow/benches/coalesce_kernels.rs
+++ b/arrow/benches/coalesce_kernels.rs
@@ -51,6 +51,13 @@ fn add_all_filter_benchmarks(c: &mut Criterion) {
true,
)]));
+ // Single BinaryViewArray
+ let single_binaryview_schema = SchemaRef::new(Schema::new(vec![Field::new(
+ "value",
+ DataType::BinaryView,
+ true,
+ )]));
+
// Mixed primitive, StringViewArray
let mixed_utf8view_schema = SchemaRef::new(Schema::new(vec![
Field::new("int32_val", DataType::Int32, true),
@@ -58,6 +65,13 @@ fn add_all_filter_benchmarks(c: &mut Criterion) {
Field::new("utf8view_val", DataType::Utf8View, true),
]));
+ // Mixed primitive, BinaryViewArray
+ let mixed_binaryview_schema = SchemaRef::new(Schema::new(vec![
+ Field::new("int32_val", DataType::Int32, true),
+ Field::new("float_val", DataType::Float64, true),
+ Field::new("binaryview_val", DataType::BinaryView, true),
+ ]));
+
// Mixed primitive, StringArray
let mixed_utf8_schema = SchemaRef::new(Schema::new(vec![
Field::new("int32_val", DataType::Int32, true),
@@ -106,6 +120,42 @@ fn add_all_filter_benchmarks(c: &mut Criterion) {
}
.build();
+ FilterBenchmarkBuilder {
+ c,
+ name: "single_utf8view (max_string_len=8)",
+ batch_size,
+ num_output_batches: 50,
+ null_density,
+ selectivity,
+ max_string_len: 8,
+ schema: &single_schema,
+ }
+ .build();
+
+ FilterBenchmarkBuilder {
+ c,
+ name: "single_binaryview",
+ batch_size,
+ num_output_batches: 50,
+ null_density,
+ selectivity,
+ max_string_len: 30,
+ schema: &single_binaryview_schema,
+ }
+ .build();
+
+ FilterBenchmarkBuilder {
+ c,
+ name: "single_binaryview (max_string_len=8)",
+ batch_size,
+ num_output_batches: 50,
+ null_density,
+ selectivity,
+ max_string_len: 8,
+ schema: &single_binaryview_schema,
+ }
+ .build();
+
// Model mostly short strings, but some longer ones
FilterBenchmarkBuilder {
c,
@@ -119,6 +169,18 @@ fn add_all_filter_benchmarks(c: &mut Criterion) {
}
.build();
+ FilterBenchmarkBuilder {
+ c,
+ name: "mixed_utf8view (max_string_len=8)",
+ batch_size,
+ num_output_batches: 20,
+ null_density,
+ selectivity,
+ max_string_len: 8,
+ schema: &mixed_utf8view_schema,
+ }
+ .build();
+
// Model mostly longer strings
FilterBenchmarkBuilder {
c,
@@ -132,6 +194,42 @@ fn add_all_filter_benchmarks(c: &mut Criterion) {
}
.build();
+ FilterBenchmarkBuilder {
+ c,
+ name: "mixed_binaryview (max_string_len=20)",
+ batch_size,
+ num_output_batches: 20,
+ null_density,
+ selectivity,
+ max_string_len: 20,
+ schema: &mixed_binaryview_schema,
+ }
+ .build();
+
+ FilterBenchmarkBuilder {
+ c,
+ name: "mixed_binaryview (max_string_len=8)",
+ batch_size,
+ num_output_batches: 20,
+ null_density,
+ selectivity,
+ max_string_len: 8,
+ schema: &mixed_binaryview_schema,
+ }
+ .build();
+
+ FilterBenchmarkBuilder {
+ c,
+ name: "mixed_binaryview (max_string_len=128)",
+ batch_size,
+ num_output_batches: 20,
+ null_density,
+ selectivity,
+ max_string_len: 128,
+ schema: &mixed_binaryview_schema,
+ }
+ .build();
+
FilterBenchmarkBuilder {
c,
name: "mixed_utf8",