This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new cfc2b88d1d Add coalesce inline-view filter benchmarks (#10050)
cfc2b88d1d is described below

commit cfc2b88d1d4e95e2de0dc5eeba4c23694058a903
Author: ClSlaid <[email protected]>
AuthorDate: Wed Jun 3 05:09:37 2026 +0800

    Add coalesce inline-view filter benchmarks (#10050)
    
    This is a benchmark-only companion patch for
    https://github.com/apache/arrow-rs/pull/9755.
    
    It keeps the functional changes out of this PR and only adds benchmark
    coverage in `arrow/benches/coalesce_kernels.rs` so the coalesce
    inline-view filter work can be tested independently.
    
    Benchmark coverage included:
    
    - filter and take coalesce benchmarks
    - primitive schemas
    - single-column `Utf8View` and `BinaryView`
    - mixed primitive + `Utf8View` and primitive + `BinaryView` schemas
    - filter cases for short inline strings with `max_string_len=8`
    - filter/take cases for longer view strings, including
    `max_string_len=20`, `30`, and `128` depending on scenario
    
    Coverage note:
    
    - The filter benchmarks cover the main short-inline path targeted by
    #9755 for both `Utf8View` and `BinaryView`.
    - The take benchmarks cover `Utf8View`/`BinaryView` and mixed schemas,
    but do not add `max_string_len=8` take variants. This patch keeps the
    benchmark changes aligned with the benchmark patch currently carried by
    #9755.
    
    Validation:
    
    ```text
    cargo fmt --package arrow
    cargo bench --bench coalesce_kernels -- --list
    git diff --check
    ```
---
 arrow/benches/coalesce_kernels.rs | 98 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/arrow/benches/coalesce_kernels.rs 
b/arrow/benches/coalesce_kernels.rs
index 0816d1a2e8..4affcc346e 100644
--- a/arrow/benches/coalesce_kernels.rs
+++ b/arrow/benches/coalesce_kernels.rs
@@ -51,6 +51,13 @@ fn add_all_filter_benchmarks(c: &mut Criterion) {
         true,
     )]));
 
+    // Single BinaryViewArray
+    let single_binaryview_schema = SchemaRef::new(Schema::new(vec![Field::new(
+        "value",
+        DataType::BinaryView,
+        true,
+    )]));
+
     // Mixed primitive, StringViewArray
     let mixed_utf8view_schema = SchemaRef::new(Schema::new(vec![
         Field::new("int32_val", DataType::Int32, true),
@@ -58,6 +65,13 @@ fn add_all_filter_benchmarks(c: &mut Criterion) {
         Field::new("utf8view_val", DataType::Utf8View, true),
     ]));
 
+    // Mixed primitive, BinaryViewArray
+    let mixed_binaryview_schema = SchemaRef::new(Schema::new(vec![
+        Field::new("int32_val", DataType::Int32, true),
+        Field::new("float_val", DataType::Float64, true),
+        Field::new("binaryview_val", DataType::BinaryView, true),
+    ]));
+
     // Mixed primitive, StringArray
     let mixed_utf8_schema = SchemaRef::new(Schema::new(vec![
         Field::new("int32_val", DataType::Int32, true),
@@ -106,6 +120,42 @@ fn add_all_filter_benchmarks(c: &mut Criterion) {
             }
             .build();
 
+            FilterBenchmarkBuilder {
+                c,
+                name: "single_utf8view (max_string_len=8)",
+                batch_size,
+                num_output_batches: 50,
+                null_density,
+                selectivity,
+                max_string_len: 8,
+                schema: &single_schema,
+            }
+            .build();
+
+            FilterBenchmarkBuilder {
+                c,
+                name: "single_binaryview",
+                batch_size,
+                num_output_batches: 50,
+                null_density,
+                selectivity,
+                max_string_len: 30,
+                schema: &single_binaryview_schema,
+            }
+            .build();
+
+            FilterBenchmarkBuilder {
+                c,
+                name: "single_binaryview (max_string_len=8)",
+                batch_size,
+                num_output_batches: 50,
+                null_density,
+                selectivity,
+                max_string_len: 8,
+                schema: &single_binaryview_schema,
+            }
+            .build();
+
             // Model mostly short strings, but some longer ones
             FilterBenchmarkBuilder {
                 c,
@@ -119,6 +169,18 @@ fn add_all_filter_benchmarks(c: &mut Criterion) {
             }
             .build();
 
+            FilterBenchmarkBuilder {
+                c,
+                name: "mixed_utf8view (max_string_len=8)",
+                batch_size,
+                num_output_batches: 20,
+                null_density,
+                selectivity,
+                max_string_len: 8,
+                schema: &mixed_utf8view_schema,
+            }
+            .build();
+
             // Model mostly longer strings
             FilterBenchmarkBuilder {
                 c,
@@ -132,6 +194,42 @@ fn add_all_filter_benchmarks(c: &mut Criterion) {
             }
             .build();
 
+            FilterBenchmarkBuilder {
+                c,
+                name: "mixed_binaryview (max_string_len=20)",
+                batch_size,
+                num_output_batches: 20,
+                null_density,
+                selectivity,
+                max_string_len: 20,
+                schema: &mixed_binaryview_schema,
+            }
+            .build();
+
+            FilterBenchmarkBuilder {
+                c,
+                name: "mixed_binaryview (max_string_len=8)",
+                batch_size,
+                num_output_batches: 20,
+                null_density,
+                selectivity,
+                max_string_len: 8,
+                schema: &mixed_binaryview_schema,
+            }
+            .build();
+
+            FilterBenchmarkBuilder {
+                c,
+                name: "mixed_binaryview (max_string_len=128)",
+                batch_size,
+                num_output_batches: 20,
+                null_density,
+                selectivity,
+                max_string_len: 128,
+                schema: &mixed_binaryview_schema,
+            }
+            .build();
+
             FilterBenchmarkBuilder {
                 c,
                 name: "mixed_utf8",

Reply via email to