This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new a5519d6ac2 Consolidate sort benchmarks (#4604)
a5519d6ac2 is described below
commit a5519d6ac273e4cc4fdcd85a9e424676130795b7
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Aug 1 08:54:18 2023 +0100
Consolidate sort benchmarks (#4604)
---
arrow/Cargo.toml | 5 -
arrow/benches/sort_kernel.rs | 209 ++++++++++++++++++--------------
arrow/benches/sort_kernel_primitives.rs | 59 ---------
3 files changed, 121 insertions(+), 152 deletions(-)
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 32f11af541..bcf6a84311 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -185,11 +185,6 @@ name = "sort_kernel"
harness = false
required-features = ["test_utils"]
-[[bench]]
-name = "sort_kernel_primitives"
-harness = false
-required-features = ["test_utils"]
-
[[bench]]
name = "partition_kernels"
harness = false
diff --git a/arrow/benches/sort_kernel.rs b/arrow/benches/sort_kernel.rs
index 43a9a84d9a..8762d9eb2f 100644
--- a/arrow/benches/sort_kernel.rs
+++ b/arrow/benches/sort_kernel.rs
@@ -23,8 +23,7 @@ use std::sync::Arc;
extern crate arrow;
-use arrow::compute::kernels::sort::{lexsort, SortColumn};
-use arrow::compute::{sort_limit, sort_to_indices};
+use arrow::compute::{lexsort, sort, sort_to_indices, SortColumn};
use arrow::datatypes::{Int16Type, Int32Type};
use arrow::util::bench_util::*;
use arrow::{array::*, datatypes::Float32Type};
@@ -42,7 +41,11 @@ fn create_bool_array(size: usize, with_nulls: bool) ->
ArrayRef {
Arc::new(array)
}
-fn bench_sort(array_a: &ArrayRef, array_b: &ArrayRef, limit: Option<usize>) {
+fn bench_sort(array: &dyn Array) {
+ criterion::black_box(sort(array, None).unwrap());
+}
+
+fn bench_lexsort(array_a: &ArrayRef, array_b: &ArrayRef, limit: Option<usize>)
{
let columns = vec![
SortColumn {
values: array_a.clone(),
@@ -57,115 +60,145 @@ fn bench_sort(array_a: &ArrayRef, array_b: &ArrayRef,
limit: Option<usize>) {
criterion::black_box(lexsort(&columns, limit).unwrap());
}
-fn bench_sort_to_indices(array: &ArrayRef, limit: Option<usize>) {
+fn bench_sort_to_indices(array: &dyn Array, limit: Option<usize>) {
criterion::black_box(sort_to_indices(array, None, limit).unwrap());
}
-fn bench_sort_run(array: &ArrayRef, limit: Option<usize>) {
- criterion::black_box(sort_limit(array, None, limit).unwrap());
-}
-
fn add_benchmark(c: &mut Criterion) {
- let arr_a = create_f32_array(2u64.pow(10) as usize, false);
- let arr_b = create_f32_array(2u64.pow(10) as usize, false);
-
- c.bench_function("sort 2^10", |b| b.iter(|| bench_sort(&arr_a, &arr_b,
None)));
+ let arr = create_primitive_array::<Int32Type>(2usize.pow(10), 0.0);
+ c.bench_function("sort i64 2^10", |b| b.iter(|| bench_sort(&arr)));
- let arr_a = create_f32_array(2u64.pow(12) as usize, false);
- let arr_b = create_f32_array(2u64.pow(12) as usize, false);
+ let arr = create_primitive_array::<Int32Type>(2usize.pow(12), 0.5);
+ c.bench_function("sort i64 2^12", |b| b.iter(|| bench_sort(&arr)));
- c.bench_function("sort 2^12", |b| b.iter(|| bench_sort(&arr_a, &arr_b,
None)));
+ let arr = create_primitive_array::<Int32Type>(2usize.pow(12), 0.0);
+ c.bench_function("sort i64 nulls 2^10", |b| b.iter(|| bench_sort(&arr)));
- let arr_a = create_f32_array(2u64.pow(10) as usize, true);
- let arr_b = create_f32_array(2u64.pow(10) as usize, true);
+ let arr = create_primitive_array::<Int32Type>(2usize.pow(12), 0.5);
+ c.bench_function("sort i64 nulls 2^12", |b| b.iter(|| bench_sort(&arr)));
- c.bench_function("sort nulls 2^10", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, None))
+ let arr = create_f32_array(2_usize.pow(12), false);
+ c.bench_function("sort f32 to indices 2^12", |b| {
+ b.iter(|| bench_sort_to_indices(&arr, None))
});
- let arr_a = create_f32_array(2u64.pow(12) as usize, true);
- let arr_b = create_f32_array(2u64.pow(12) as usize, true);
+ let arr = create_f32_array(2usize.pow(12), true);
+ c.bench_function("sort f32 nulls to indices 2^12", |b| {
+ b.iter(|| bench_sort_to_indices(&arr, None))
+ });
- c.bench_function("sort nulls 2^12", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, None))
+ let arr = create_string_array_with_len::<i32>(2usize.pow(12), 0.0, 10);
+ c.bench_function("sort string[10] to indices 2^12", |b| {
+ b.iter(|| bench_sort_to_indices(&arr, None))
});
- let arr_a = create_bool_array(2u64.pow(12) as usize, false);
- let arr_b = create_bool_array(2u64.pow(12) as usize, false);
- c.bench_function("bool sort 2^12", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, None))
+ let arr = create_string_array_with_len::<i32>(2usize.pow(12), 0.5, 10);
+ c.bench_function("sort string[10] nulls to indices 2^12", |b| {
+ b.iter(|| bench_sort_to_indices(&arr, None))
});
- let arr_a = create_bool_array(2u64.pow(12) as usize, true);
- let arr_b = create_bool_array(2u64.pow(12) as usize, true);
- c.bench_function("bool sort nulls 2^12", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, None))
+ let arr = create_string_dict_array::<Int32Type>(2usize.pow(12), 0.0, 10);
+ c.bench_function("sort string[10] dict to indices 2^12", |b| {
+ b.iter(|| bench_sort_to_indices(&arr, None))
});
- let dict_arr = Arc::new(create_string_dict_array::<Int32Type>(
- 2u64.pow(12) as usize,
- 0.0,
- 1,
- )) as ArrayRef;
- c.bench_function("dict string 2^12", |b| {
- b.iter(|| bench_sort_to_indices(&dict_arr, None))
+ let arr = create_string_dict_array::<Int32Type>(2usize.pow(12), 0.5, 10);
+ c.bench_function("sort string[10] dict nulls to indices 2^12", |b| {
+ b.iter(|| bench_sort_to_indices(&arr, None))
});
- let run_encoded_array = Arc::new(create_primitive_run_array::<Int16Type,
Int32Type>(
- 2u64.pow(12) as usize,
- 2u64.pow(10) as usize,
- )) as ArrayRef;
+ let run_encoded_array = create_primitive_run_array::<Int16Type, Int32Type>(
+ 2usize.pow(12),
+ 2usize.pow(10),
+ );
+
+ c.bench_function("sort primitive run 2^12", |b| {
+ b.iter(|| bench_sort(&run_encoded_array))
+ });
c.bench_function("sort primitive run to indices 2^12", |b| {
b.iter(|| bench_sort_to_indices(&run_encoded_array, None))
});
- c.bench_function("sort primitive run to run 2^12", |b| {
- b.iter(|| bench_sort_run(&run_encoded_array, None))
- });
-
- // with limit
- {
- let arr_a = create_f32_array(2u64.pow(12) as usize, false);
- let arr_b = create_f32_array(2u64.pow(12) as usize, false);
- c.bench_function("sort 2^12 limit 10", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, Some(10)))
- });
-
- let arr_a = create_f32_array(2u64.pow(12) as usize, false);
- let arr_b = create_f32_array(2u64.pow(12) as usize, false);
- c.bench_function("sort 2^12 limit 100", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, Some(100)))
- });
-
- let arr_a = create_f32_array(2u64.pow(12) as usize, false);
- let arr_b = create_f32_array(2u64.pow(12) as usize, false);
- c.bench_function("sort 2^12 limit 1000", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, Some(1000)))
- });
-
- let arr_a = create_f32_array(2u64.pow(12) as usize, false);
- let arr_b = create_f32_array(2u64.pow(12) as usize, false);
- c.bench_function("sort 2^12 limit 2^12", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, Some(2u64.pow(12) as usize)))
- });
-
- let arr_a = create_f32_array(2u64.pow(12) as usize, true);
- let arr_b = create_f32_array(2u64.pow(12) as usize, true);
-
- c.bench_function("sort nulls 2^12 limit 10", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, Some(10)))
- });
- c.bench_function("sort nulls 2^12 limit 100", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, Some(100)))
- });
- c.bench_function("sort nulls 2^12 limit 1000", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, Some(1000)))
- });
- c.bench_function("sort nulls 2^12 limit 2^12", |b| {
- b.iter(|| bench_sort(&arr_a, &arr_b, Some(2u64.pow(12) as usize)))
- });
- }
+ let arr_a = create_f32_array(2usize.pow(10), false);
+ let arr_b = create_f32_array(2usize.pow(10), false);
+
+ c.bench_function("lexsort (f32, f32) 2^10", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+ });
+
+ let arr_a = create_f32_array(2usize.pow(12), false);
+ let arr_b = create_f32_array(2usize.pow(12), false);
+
+ c.bench_function("lexsort (f32, f32) 2^12", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+ });
+
+ let arr_a = create_f32_array(2usize.pow(10), true);
+ let arr_b = create_f32_array(2usize.pow(10), true);
+
+ c.bench_function("lexsort (f32, f32) nulls 2^10", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+ });
+
+ let arr_a = create_f32_array(2usize.pow(12), true);
+ let arr_b = create_f32_array(2usize.pow(12), true);
+
+ c.bench_function("lexsort (f32, f32) nulls 2^12", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+ });
+
+ let arr_a = create_bool_array(2usize.pow(12), false);
+ let arr_b = create_bool_array(2usize.pow(12), false);
+ c.bench_function("lexsort (bool, bool) 2^12", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+ });
+
+ let arr_a = create_bool_array(2usize.pow(12), true);
+ let arr_b = create_bool_array(2usize.pow(12), true);
+ c.bench_function("lexsort (bool, bool) nulls 2^12", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, None))
+ });
+
+ let arr_a = create_f32_array(2usize.pow(12), false);
+ let arr_b = create_f32_array(2usize.pow(12), false);
+ c.bench_function("lexsort (f32, f32) 2^12 limit 10", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(10)))
+ });
+
+ let arr_a = create_f32_array(2usize.pow(12), false);
+ let arr_b = create_f32_array(2usize.pow(12), false);
+ c.bench_function("lexsort (f32, f32) 2^12 limit 100", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(100)))
+ });
+
+ let arr_a = create_f32_array(2usize.pow(12), false);
+ let arr_b = create_f32_array(2usize.pow(12), false);
+ c.bench_function("lexsort (f32, f32) 2^12 limit 1000", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(1000)))
+ });
+
+ let arr_a = create_f32_array(2usize.pow(12), false);
+ let arr_b = create_f32_array(2usize.pow(12), false);
+ c.bench_function("lexsort (f32, f32) 2^12 limit 2^12", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(2usize.pow(12))))
+ });
+
+ let arr_a = create_f32_array(2usize.pow(12), true);
+ let arr_b = create_f32_array(2usize.pow(12), true);
+
+ c.bench_function("lexsort (f32, f32) nulls 2^12 limit 10", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(10)))
+ });
+ c.bench_function("lexsort (f32, f32) nulls 2^12 limit 100", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(100)))
+ });
+ c.bench_function("lexsort (f32, f32) nulls 2^12 limit 1000", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(1000)))
+ });
+ c.bench_function("lexsort (f32, f32) nulls 2^12 limit 2^12", |b| {
+ b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(2usize.pow(12))))
+ });
}
criterion_group!(benches, add_benchmark);
diff --git a/arrow/benches/sort_kernel_primitives.rs
b/arrow/benches/sort_kernel_primitives.rs
deleted file mode 100644
index ca9183580b..0000000000
--- a/arrow/benches/sort_kernel_primitives.rs
+++ /dev/null
@@ -1,59 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#[macro_use]
-extern crate criterion;
-use arrow_ord::sort::sort;
-use criterion::Criterion;
-
-use std::sync::Arc;
-
-extern crate arrow;
-
-use arrow::util::bench_util::*;
-use arrow::{array::*, datatypes::Int64Type};
-
-fn create_i64_array(size: usize, with_nulls: bool) -> ArrayRef {
- let null_density = if with_nulls { 0.5 } else { 0.0 };
- let array = create_primitive_array::<Int64Type>(size, null_density);
- Arc::new(array)
-}
-
-fn bench_sort(array: &ArrayRef) {
- criterion::black_box(sort(criterion::black_box(array), None).unwrap());
-}
-
-fn add_benchmark(c: &mut Criterion) {
- let arr_a = create_i64_array(2u64.pow(10) as usize, false);
-
- c.bench_function("sort 2^10", |b| b.iter(|| bench_sort(&arr_a)));
-
- let arr_a = create_i64_array(2u64.pow(12) as usize, false);
-
- c.bench_function("sort 2^12", |b| b.iter(|| bench_sort(&arr_a)));
-
- let arr_a = create_i64_array(2u64.pow(10) as usize, true);
-
- c.bench_function("sort nulls 2^10", |b| b.iter(|| bench_sort(&arr_a)));
-
- let arr_a = create_i64_array(2u64.pow(12) as usize, true);
-
- c.bench_function("sort nulls 2^12", |b| b.iter(|| bench_sort(&arr_a)));
-}
-
-criterion_group!(benches, add_benchmark);
-criterion_main!(benches);