This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new a5519d6ac2 Consolidate sort benchmarks (#4604)
a5519d6ac2 is described below

commit a5519d6ac273e4cc4fdcd85a9e424676130795b7
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Aug 1 08:54:18 2023 +0100

    Consolidate sort benchmarks (#4604)
---
 arrow/Cargo.toml                        |   5 -
 arrow/benches/sort_kernel.rs            | 209 ++++++++++++++++++--------------
 arrow/benches/sort_kernel_primitives.rs |  59 ---------
 3 files changed, 121 insertions(+), 152 deletions(-)

diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 32f11af541..bcf6a84311 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -185,11 +185,6 @@ name = "sort_kernel"
 harness = false
 required-features = ["test_utils"]
 
-[[bench]]
-name = "sort_kernel_primitives"
-harness = false
-required-features = ["test_utils"]
-
 [[bench]]
 name = "partition_kernels"
 harness = false
diff --git a/arrow/benches/sort_kernel.rs b/arrow/benches/sort_kernel.rs
index 43a9a84d9a..8762d9eb2f 100644
--- a/arrow/benches/sort_kernel.rs
+++ b/arrow/benches/sort_kernel.rs
@@ -23,8 +23,7 @@ use std::sync::Arc;
 
 extern crate arrow;
 
-use arrow::compute::kernels::sort::{lexsort, SortColumn};
-use arrow::compute::{sort_limit, sort_to_indices};
+use arrow::compute::{lexsort, sort, sort_to_indices, SortColumn};
 use arrow::datatypes::{Int16Type, Int32Type};
 use arrow::util::bench_util::*;
 use arrow::{array::*, datatypes::Float32Type};
@@ -42,7 +41,11 @@ fn create_bool_array(size: usize, with_nulls: bool) -> 
ArrayRef {
     Arc::new(array)
 }
 
-fn bench_sort(array_a: &ArrayRef, array_b: &ArrayRef, limit: Option<usize>) {
+fn bench_sort(array: &dyn Array) {
+    criterion::black_box(sort(array, None).unwrap());
+}
+
+fn bench_lexsort(array_a: &ArrayRef, array_b: &ArrayRef, limit: Option<usize>) 
{
     let columns = vec![
         SortColumn {
             values: array_a.clone(),
@@ -57,115 +60,145 @@ fn bench_sort(array_a: &ArrayRef, array_b: &ArrayRef, 
limit: Option<usize>) {
     criterion::black_box(lexsort(&columns, limit).unwrap());
 }
 
-fn bench_sort_to_indices(array: &ArrayRef, limit: Option<usize>) {
+fn bench_sort_to_indices(array: &dyn Array, limit: Option<usize>) {
     criterion::black_box(sort_to_indices(array, None, limit).unwrap());
 }
 
-fn bench_sort_run(array: &ArrayRef, limit: Option<usize>) {
-    criterion::black_box(sort_limit(array, None, limit).unwrap());
-}
-
 fn add_benchmark(c: &mut Criterion) {
-    let arr_a = create_f32_array(2u64.pow(10) as usize, false);
-    let arr_b = create_f32_array(2u64.pow(10) as usize, false);
-
-    c.bench_function("sort 2^10", |b| b.iter(|| bench_sort(&arr_a, &arr_b, 
None)));
+    let arr = create_primitive_array::<Int32Type>(2usize.pow(10), 0.0);
+    c.bench_function("sort i64 2^10", |b| b.iter(|| bench_sort(&arr)));
 
-    let arr_a = create_f32_array(2u64.pow(12) as usize, false);
-    let arr_b = create_f32_array(2u64.pow(12) as usize, false);
+    let arr = create_primitive_array::<Int32Type>(2usize.pow(12), 0.5);
+    c.bench_function("sort i64 2^12", |b| b.iter(|| bench_sort(&arr)));
 
-    c.bench_function("sort 2^12", |b| b.iter(|| bench_sort(&arr_a, &arr_b, 
None)));
+    let arr = create_primitive_array::<Int32Type>(2usize.pow(12), 0.0);
+    c.bench_function("sort i64 nulls 2^10", |b| b.iter(|| bench_sort(&arr)));
 
-    let arr_a = create_f32_array(2u64.pow(10) as usize, true);
-    let arr_b = create_f32_array(2u64.pow(10) as usize, true);
+    let arr = create_primitive_array::<Int32Type>(2usize.pow(12), 0.5);
+    c.bench_function("sort i64 nulls 2^12", |b| b.iter(|| bench_sort(&arr)));
 
-    c.bench_function("sort nulls 2^10", |b| {
-        b.iter(|| bench_sort(&arr_a, &arr_b, None))
+    let arr = create_f32_array(2_usize.pow(12), false);
+    c.bench_function("sort f32 to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
     });
 
-    let arr_a = create_f32_array(2u64.pow(12) as usize, true);
-    let arr_b = create_f32_array(2u64.pow(12) as usize, true);
+    let arr = create_f32_array(2usize.pow(12), true);
+    c.bench_function("sort f32 nulls to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
 
-    c.bench_function("sort nulls 2^12", |b| {
-        b.iter(|| bench_sort(&arr_a, &arr_b, None))
+    let arr = create_string_array_with_len::<i32>(2usize.pow(12), 0.0, 10);
+    c.bench_function("sort string[10] to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
     });
 
-    let arr_a = create_bool_array(2u64.pow(12) as usize, false);
-    let arr_b = create_bool_array(2u64.pow(12) as usize, false);
-    c.bench_function("bool sort 2^12", |b| {
-        b.iter(|| bench_sort(&arr_a, &arr_b, None))
+    let arr = create_string_array_with_len::<i32>(2usize.pow(12), 0.5, 10);
+    c.bench_function("sort string[10] nulls to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
     });
 
-    let arr_a = create_bool_array(2u64.pow(12) as usize, true);
-    let arr_b = create_bool_array(2u64.pow(12) as usize, true);
-    c.bench_function("bool sort nulls 2^12", |b| {
-        b.iter(|| bench_sort(&arr_a, &arr_b, None))
+    let arr = create_string_dict_array::<Int32Type>(2usize.pow(12), 0.0, 10);
+    c.bench_function("sort string[10] dict to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
     });
 
-    let dict_arr = Arc::new(create_string_dict_array::<Int32Type>(
-        2u64.pow(12) as usize,
-        0.0,
-        1,
-    )) as ArrayRef;
-    c.bench_function("dict string 2^12", |b| {
-        b.iter(|| bench_sort_to_indices(&dict_arr, None))
+    let arr = create_string_dict_array::<Int32Type>(2usize.pow(12), 0.5, 10);
+    c.bench_function("sort string[10] dict nulls to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
     });
 
-    let run_encoded_array = Arc::new(create_primitive_run_array::<Int16Type, 
Int32Type>(
-        2u64.pow(12) as usize,
-        2u64.pow(10) as usize,
-    )) as ArrayRef;
+    let run_encoded_array = create_primitive_run_array::<Int16Type, Int32Type>(
+        2usize.pow(12),
+        2usize.pow(10),
+    );
+
+    c.bench_function("sort primitive run 2^12", |b| {
+        b.iter(|| bench_sort(&run_encoded_array))
+    });
 
     c.bench_function("sort primitive run to indices 2^12", |b| {
         b.iter(|| bench_sort_to_indices(&run_encoded_array, None))
     });
 
-    c.bench_function("sort primitive run to run 2^12", |b| {
-        b.iter(|| bench_sort_run(&run_encoded_array, None))
-    });
-
-    // with limit
-    {
-        let arr_a = create_f32_array(2u64.pow(12) as usize, false);
-        let arr_b = create_f32_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 10", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(10)))
-        });
-
-        let arr_a = create_f32_array(2u64.pow(12) as usize, false);
-        let arr_b = create_f32_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 100", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(100)))
-        });
-
-        let arr_a = create_f32_array(2u64.pow(12) as usize, false);
-        let arr_b = create_f32_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 1000", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(1000)))
-        });
-
-        let arr_a = create_f32_array(2u64.pow(12) as usize, false);
-        let arr_b = create_f32_array(2u64.pow(12) as usize, false);
-        c.bench_function("sort 2^12 limit 2^12", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(2u64.pow(12) as usize)))
-        });
-
-        let arr_a = create_f32_array(2u64.pow(12) as usize, true);
-        let arr_b = create_f32_array(2u64.pow(12) as usize, true);
-
-        c.bench_function("sort nulls 2^12 limit 10", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(10)))
-        });
-        c.bench_function("sort nulls 2^12 limit 100", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(100)))
-        });
-        c.bench_function("sort nulls 2^12 limit 1000", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(1000)))
-        });
-        c.bench_function("sort nulls 2^12 limit 2^12", |b| {
-            b.iter(|| bench_sort(&arr_a, &arr_b, Some(2u64.pow(12) as usize)))
-        });
-    }
+    let arr_a = create_f32_array(2usize.pow(10), false);
+    let arr_b = create_f32_array(2usize.pow(10), false);
+
+    c.bench_function("lexsort (f32, f32) 2^10", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+    });
+
+    let arr_a = create_f32_array(2usize.pow(12), false);
+    let arr_b = create_f32_array(2usize.pow(12), false);
+
+    c.bench_function("lexsort (f32, f32) 2^12", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+    });
+
+    let arr_a = create_f32_array(2usize.pow(10), true);
+    let arr_b = create_f32_array(2usize.pow(10), true);
+
+    c.bench_function("lexsort (f32, f32) nulls 2^10", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+    });
+
+    let arr_a = create_f32_array(2usize.pow(12), true);
+    let arr_b = create_f32_array(2usize.pow(12), true);
+
+    c.bench_function("lexsort (f32, f32) nulls 2^12", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+    });
+
+    let arr_a = create_bool_array(2usize.pow(12), false);
+    let arr_b = create_bool_array(2usize.pow(12), false);
+    c.bench_function("lexsort (bool, bool) 2^12", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+    });
+
+    let arr_a = create_bool_array(2usize.pow(12), true);
+    let arr_b = create_bool_array(2usize.pow(12), true);
+    c.bench_function("lexsort (bool, bool) nulls 2^12", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+    });
+
+    let arr_a = create_f32_array(2usize.pow(12), false);
+    let arr_b = create_f32_array(2usize.pow(12), false);
+    c.bench_function("lexsort (f32, f32) 2^12 limit 10", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(10)))
+    });
+
+    let arr_a = create_f32_array(2usize.pow(12), false);
+    let arr_b = create_f32_array(2usize.pow(12), false);
+    c.bench_function("lexsort (f32, f32) 2^12 limit 100", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(100)))
+    });
+
+    let arr_a = create_f32_array(2usize.pow(12), false);
+    let arr_b = create_f32_array(2usize.pow(12), false);
+    c.bench_function("lexsort (f32, f32) 2^12 limit 1000", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(1000)))
+    });
+
+    let arr_a = create_f32_array(2usize.pow(12), false);
+    let arr_b = create_f32_array(2usize.pow(12), false);
+    c.bench_function("lexsort (f32, f32) 2^12 limit 2^12", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(2usize.pow(12))))
+    });
+
+    let arr_a = create_f32_array(2usize.pow(12), true);
+    let arr_b = create_f32_array(2usize.pow(12), true);
+
+    c.bench_function("lexsort (f32, f32) nulls 2^12 limit 10", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(10)))
+    });
+    c.bench_function("lexsort (f32, f32) nulls 2^12 limit 100", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(100)))
+    });
+    c.bench_function("lexsort (f32, f32) nulls 2^12 limit 1000", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(1000)))
+    });
+    c.bench_function("lexsort (f32, f32) nulls 2^12 limit 2^12", |b| {
+        b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(2usize.pow(12))))
+    });
 }
 
 criterion_group!(benches, add_benchmark);
diff --git a/arrow/benches/sort_kernel_primitives.rs 
b/arrow/benches/sort_kernel_primitives.rs
deleted file mode 100644
index ca9183580b..0000000000
--- a/arrow/benches/sort_kernel_primitives.rs
+++ /dev/null
@@ -1,59 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use arrow_ord::sort::sort;
-use criterion::Criterion;
-
-use std::sync::Arc;
-
-extern crate arrow;
-
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Int64Type};
-
-fn create_i64_array(size: usize, with_nulls: bool) -> ArrayRef {
-    let null_density = if with_nulls { 0.5 } else { 0.0 };
-    let array = create_primitive_array::<Int64Type>(size, null_density);
-    Arc::new(array)
-}
-
-fn bench_sort(array: &ArrayRef) {
-    criterion::black_box(sort(criterion::black_box(array), None).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
-    let arr_a = create_i64_array(2u64.pow(10) as usize, false);
-
-    c.bench_function("sort 2^10", |b| b.iter(|| bench_sort(&arr_a)));
-
-    let arr_a = create_i64_array(2u64.pow(12) as usize, false);
-
-    c.bench_function("sort 2^12", |b| b.iter(|| bench_sort(&arr_a)));
-
-    let arr_a = create_i64_array(2u64.pow(10) as usize, true);
-
-    c.bench_function("sort nulls 2^10", |b| b.iter(|| bench_sort(&arr_a)));
-
-    let arr_a = create_i64_array(2u64.pow(12) as usize, true);
-
-    c.bench_function("sort nulls 2^12", |b| b.iter(|| bench_sort(&arr_a)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);

Reply via email to