This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 13c9e9083ef Add eq benchmark for StringArray/StringViewArray (#5924)
13c9e9083ef is described below

commit 13c9e9083ef2ef2e24019cbbd04c00025c11e3d5
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Fri Jun 21 16:10:56 2024 -0400

    Add eq benchmark for StringArray/StringViewArray (#5924)
    
    * add neq/eq benchmark for String/ViewArray
    
    * move bench to comparsion kernel
    
    * clean unnecessary dep
    
    * make clippy happy
---
 arrow/benches/comparison_kernels.rs | 51 +++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/arrow/benches/comparison_kernels.rs 
b/arrow/benches/comparison_kernels.rs
index f330e1386cc..1e081d141a0 100644
--- a/arrow/benches/comparison_kernels.rs
+++ b/arrow/benches/comparison_kernels.rs
@@ -17,6 +17,7 @@
 
 #[macro_use]
 extern crate criterion;
+use arrow::util::test_util::seedable_rng;
 use criterion::Criterion;
 
 extern crate arrow;
@@ -27,6 +28,8 @@ use arrow::{array::*, datatypes::Float32Type, 
datatypes::Int32Type};
 use arrow_buffer::IntervalMonthDayNano;
 use arrow_string::like::*;
 use arrow_string::regexp::regexp_is_match_utf8_scalar;
+use rand::rngs::StdRng;
+use rand::Rng;
 
 const SIZE: usize = 65536;
 
@@ -55,6 +58,14 @@ fn bench_regexp_is_match_utf8_scalar(arr_a: &StringArray, 
value_b: &str) {
     .unwrap();
 }
 
+fn make_string_array(size: usize, rng: &mut StdRng) -> impl Iterator<Item = 
Option<String>> + '_ {
+    (0..size).map(|_| {
+        let len = rng.gen_range(0..64);
+        let bytes = (0..len).map(|_| rng.gen_range(0..128)).collect();
+        Some(String::from_utf8(bytes).unwrap())
+    })
+}
+
 fn add_benchmark(c: &mut Criterion) {
     let arr_a = create_primitive_array_with_seed::<Float32Type>(SIZE, 0.0, 42);
     let arr_b = create_primitive_array_with_seed::<Float32Type>(SIZE, 0.0, 43);
@@ -63,6 +74,7 @@ fn add_benchmark(c: &mut Criterion) {
     let arr_month_day_nano_b = create_month_day_nano_array_with_seed(SIZE, 
0.0, 43);
 
     let arr_string = create_string_array::<i32>(SIZE, 0.0);
+
     let scalar = Float32Array::from(vec![1.0]);
 
     c.bench_function("eq Float32", |b| b.iter(|| eq(&arr_a, &arr_b)));
@@ -138,6 +150,45 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| eq(&arr_month_day_nano_b, &scalar).unwrap())
     });
 
+    let mut rng = seedable_rng();
+    let mut array_gen = make_string_array(1024 * 1024 * 8, &mut rng);
+    let string_left = StringArray::from_iter(array_gen);
+    let string_view_left = StringViewArray::from_iter(string_left.iter());
+
+    // reference to the same rng to make sure we generate **different** array 
data,
+    // ow. the left and right will be identical
+    array_gen = make_string_array(1024 * 1024 * 8, &mut rng);
+    let string_right = StringArray::from_iter(array_gen);
+    let string_view_right = StringViewArray::from_iter(string_right.iter());
+
+    c.bench_function("eq scalar StringArray", |b| {
+        b.iter(|| {
+            eq(
+                &Scalar::new(StringArray::from_iter_values(["xxxx"])),
+                &string_left,
+            )
+            .unwrap()
+        })
+    });
+
+    c.bench_function("eq scalar StringViewArray", |b| {
+        b.iter(|| {
+            eq(
+                &Scalar::new(StringViewArray::from_iter_values(["xxxx"])),
+                &string_view_left,
+            )
+            .unwrap()
+        })
+    });
+
+    c.bench_function("eq StringArray StringArray", |b| {
+        b.iter(|| eq(&string_left, &string_right).unwrap())
+    });
+
+    c.bench_function("eq StringViewArray StringViewArray", |b| {
+        b.iter(|| eq(&string_view_left, &string_view_right).unwrap())
+    });
+
     c.bench_function("like_utf8 scalar equals", |b| {
         b.iter(|| bench_like_utf8_scalar(&arr_string, "xxxx"))
     });

Reply via email to