This is an automated email from the ASF dual-hosted git repository.
nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 510f02f Speed up bound checking in `take` (#281)
510f02f is described below
commit 510f02f449193bea9df3f423d18ce7a9e4112bdf
Author: Daniƫl Heres <[email protected]>
AuthorDate: Tue May 11 07:35:05 2021 +0200
Speed up bound checking in `take` (#281)
* WIP improve take performance
* WIP
* Bound checking speed
* Simplify
* fmt
* Improve formatting
---
arrow/benches/take_kernels.rs | 19 ++++++++++++++++++-
arrow/src/compute/kernels/take.rs | 25 +++++++++++++++++++------
2 files changed, 37 insertions(+), 7 deletions(-)
diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs
index 2853eb5..b1d03d7 100644
--- a/arrow/benches/take_kernels.rs
+++ b/arrow/benches/take_kernels.rs
@@ -23,7 +23,7 @@ use rand::Rng;
extern crate arrow;
-use arrow::compute::take;
+use arrow::compute::{take, TakeOptions};
use arrow::datatypes::*;
use arrow::util::test_util::seedable_rng;
use arrow::{array::*, util::bench_util::*};
@@ -46,6 +46,12 @@ fn bench_take(values: &dyn Array, indices: &UInt32Array) {
criterion::black_box(take(values, &indices, None).unwrap());
}
+fn bench_take_bounds_check(values: &dyn Array, indices: &UInt32Array) {
+ criterion::black_box(
+ take(values, &indices, Some(TakeOptions { check_bounds: true
})).unwrap(),
+ );
+}
+
fn add_benchmark(c: &mut Criterion) {
let values = create_primitive_array::<Int32Type>(512, 0.0);
let indices = create_random_index(512, 0.0);
@@ -56,6 +62,17 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| bench_take(&values, &indices))
});
+ let values = create_primitive_array::<Int32Type>(512, 0.0);
+ let indices = create_random_index(512, 0.0);
+ c.bench_function("take check bounds i32 512", |b| {
+ b.iter(|| bench_take_bounds_check(&values, &indices))
+ });
+ let values = create_primitive_array::<Int32Type>(1024, 0.0);
+ let indices = create_random_index(1024, 0.0);
+ c.bench_function("take check bounds i32 1024", |b| {
+ b.iter(|| bench_take_bounds_check(&values, &indices))
+ });
+
let indices = create_random_index(512, 0.5);
c.bench_function("take i32 nulls 512", |b| {
b.iter(|| bench_take(&values, &indices))
diff --git a/arrow/src/compute/kernels/take.rs
b/arrow/src/compute/kernels/take.rs
index 0217573..d325ce4 100644
--- a/arrow/src/compute/kernels/take.rs
+++ b/arrow/src/compute/kernels/take.rs
@@ -100,17 +100,30 @@ where
let options = options.unwrap_or_default();
if options.check_bounds {
let len = values.len();
- for i in 0..indices.len() {
- if indices.is_valid(i) {
- let ix =
ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
+ if indices.null_count() > 0 {
+ indices.iter().flatten().try_for_each(|index| {
+ let ix = ToPrimitive::to_usize(&index).ok_or_else(|| {
ArrowError::ComputeError("Cast to usize
failed".to_string())
})?;
if ix >= len {
return Err(ArrowError::ComputeError(
- format!("Array index out of bounds, cannot get item at
index {} from {} entries", ix, len))
- );
+ format!("Array index out of bounds, cannot get item at
index {} from {} entries", ix, len))
+ );
}
- }
+ Ok(())
+ })?;
+ } else {
+ indices.values().iter().try_for_each(|index| {
+ let ix = ToPrimitive::to_usize(index).ok_or_else(|| {
+ ArrowError::ComputeError("Cast to usize
failed".to_string())
+ })?;
+ if ix >= len {
+ return Err(ArrowError::ComputeError(
+ format!("Array index out of bounds, cannot get item at
index {} from {} entries", ix, len))
+ );
+ }
+ Ok(())
+ })?
}
}
match values.data_type() {