This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 56525efbd5 Add Parquet RowSelection benchmark (#6623)
56525efbd5 is described below

commit 56525efbd5f37b89d1b56aa51709cab9f81bc89e
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Fri Oct 25 03:57:47 2024 -0500

    Add Parquet RowSelection benchmark (#6623)
    
    * add benchmark
    
    * add and_then benchmark
    
    * fix ci
    
    * update bench
---
 parquet/Cargo.toml              |  5 +++
 parquet/benches/row_selector.rs | 87 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)

diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index 8ab78efe0c..32bc13b62a 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -222,5 +222,10 @@ harness = false
 name = "metadata"
 harness = false
 
+[[bench]]
+name = "row_selector"
+harness = false
+required-features = ["arrow"]
+
 [lib]
 bench = false
diff --git a/parquet/benches/row_selector.rs b/parquet/benches/row_selector.rs
new file mode 100644
index 0000000000..32f0d6a560
--- /dev/null
+++ b/parquet/benches/row_selector.rs
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::BooleanArray;
+use criterion::*;
+use parquet::arrow::arrow_reader::RowSelection;
+use rand::Rng;
+
+/// Generates a random RowSelection with a specified selection ratio.
+///
+/// # Arguments
+///
+/// * `total_rows` - The total number of rows in the selection.
+/// * `selection_ratio` - The ratio of rows to select (e.g., 1/3 for ~33% 
selection).
+///
+/// # Returns
+///
+/// * A `BooleanArray` instance with randomly selected rows based on the 
provided ratio.
+fn generate_random_row_selection(total_rows: usize, selection_ratio: f64) -> 
BooleanArray {
+    let mut rng = rand::thread_rng();
+    let bools: Vec<bool> = (0..total_rows)
+        .map(|_| rng.gen_bool(selection_ratio))
+        .collect();
+    BooleanArray::from(bools)
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let total_rows = 300_000;
+    let selection_ratio = 1.0 / 3.0;
+
+    // Generate two random RowSelections with approximately 1/3 of the rows 
selected.
+    let row_selection_a =
+        RowSelection::from_filters(&[generate_random_row_selection(total_rows, 
selection_ratio)]);
+    let row_selection_b =
+        RowSelection::from_filters(&[generate_random_row_selection(total_rows, 
selection_ratio)]);
+
+    // Benchmark the intersection of the two RowSelections.
+    c.bench_function("intersection", |b| {
+        b.iter(|| {
+            let intersection = row_selection_a.intersection(&row_selection_b);
+            criterion::black_box(intersection);
+        })
+    });
+
+    c.bench_function("union", |b| {
+        b.iter(|| {
+            let union = row_selection_a.union(&row_selection_b);
+            criterion::black_box(union);
+        })
+    });
+
+    c.bench_function("from_filters", |b| {
+        let boolean_array = generate_random_row_selection(total_rows, 
selection_ratio);
+        b.iter(|| {
+            let array = boolean_array.clone();
+            let selection = RowSelection::from_filters(&[array]);
+            criterion::black_box(selection);
+        })
+    });
+
+    c.bench_function("and_then", |b| {
+        let selected = row_selection_a.row_count();
+        let sub_selection =
+            
RowSelection::from_filters(&[generate_random_row_selection(selected, 
selection_ratio)]);
+        b.iter(|| {
+            let result = row_selection_a.and_then(&sub_selection);
+            criterion::black_box(result);
+        })
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);

Reply via email to