This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 2d6352b Remove explicit simd arithmetic kernels except for
division/modulo (#1221)
2d6352b is described below
commit 2d6352b3b35a17cef2618f6475b7230ce0146d55
Author: Jörn Horstmann <[email protected]>
AuthorDate: Mon Jan 24 20:50:30 2022 +0100
Remove explicit simd arithmetic kernels except for division/modulo (#1221)
* Extend arithmetic benchmarks
* Remove explicit simd arithmetic except for div/mod because
autovectorization generates better code
* Remove unneeded return keywords
---
arrow/benches/arithmetic_kernels.rs | 62 +++++---
arrow/src/buffer/immutable.rs | 1 +
arrow/src/compute/kernels/arithmetic.rs | 242 ++++----------------------------
3 files changed, 68 insertions(+), 237 deletions(-)
diff --git a/arrow/benches/arithmetic_kernels.rs
b/arrow/benches/arithmetic_kernels.rs
index bbe4123..4be4a26 100644
--- a/arrow/benches/arithmetic_kernels.rs
+++ b/arrow/benches/arithmetic_kernels.rs
@@ -24,7 +24,6 @@ use std::sync::Arc;
extern crate arrow;
-use arrow::compute::kernels::limit::*;
use arrow::util::bench_util::*;
use arrow::{array::*, datatypes::Float32Type};
use arrow::{compute::kernels::arithmetic::*, util::test_util::seedable_rng};
@@ -59,44 +58,69 @@ fn bench_divide(arr_a: &ArrayRef, arr_b: &ArrayRef) {
criterion::black_box(divide(arr_a, arr_b).unwrap());
}
+fn bench_divide_unchecked(arr_a: &ArrayRef, arr_b: &ArrayRef) {
+ let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
+ let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
+ criterion::black_box(divide_unchecked(arr_a, arr_b).unwrap());
+}
+
fn bench_divide_scalar(array: &ArrayRef, divisor: f32) {
let array = array.as_any().downcast_ref::<Float32Array>().unwrap();
criterion::black_box(divide_scalar(array, divisor).unwrap());
}
-fn bench_limit(arr_a: &ArrayRef, max: usize) {
- criterion::black_box(limit(arr_a, max));
+fn bench_modulo(arr_a: &ArrayRef, arr_b: &ArrayRef) {
+ let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
+ let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
+ criterion::black_box(modulus(arr_a, arr_b).unwrap());
+}
+
+fn bench_modulo_scalar(array: &ArrayRef, divisor: f32) {
+ let array = array.as_any().downcast_ref::<Float32Array>().unwrap();
+ criterion::black_box(modulus_scalar(array, divisor).unwrap());
}
fn add_benchmark(c: &mut Criterion) {
- let arr_a = create_array(512, false);
- let arr_b = create_array(512, false);
+ const BATCH_SIZE: usize = 64 * 1024;
+ let arr_a = create_array(BATCH_SIZE, false);
+ let arr_b = create_array(BATCH_SIZE, false);
let scalar = seedable_rng().gen();
- c.bench_function("add 512", |b| b.iter(|| bench_add(&arr_a, &arr_b)));
- c.bench_function("subtract 512", |b| {
- b.iter(|| bench_subtract(&arr_a, &arr_b))
+ c.bench_function("add", |b| b.iter(|| bench_add(&arr_a, &arr_b)));
+ c.bench_function("subtract", |b| b.iter(|| bench_subtract(&arr_a,
&arr_b)));
+ c.bench_function("multiply", |b| b.iter(|| bench_multiply(&arr_a,
&arr_b)));
+ c.bench_function("divide", |b| b.iter(|| bench_divide(&arr_a, &arr_b)));
+ c.bench_function("divide_unchecked", |b| {
+ b.iter(|| bench_divide_unchecked(&arr_a, &arr_b))
});
- c.bench_function("multiply 512", |b| {
- b.iter(|| bench_multiply(&arr_a, &arr_b))
- });
- c.bench_function("divide 512", |b| b.iter(|| bench_divide(&arr_a,
&arr_b)));
- c.bench_function("divide_scalar 512", |b| {
+ c.bench_function("divide_scalar", |b| {
b.iter(|| bench_divide_scalar(&arr_a, scalar))
});
- c.bench_function("limit 512, 512", |b| b.iter(|| bench_limit(&arr_a,
512)));
+ c.bench_function("modulo", |b| b.iter(|| bench_modulo(&arr_a, &arr_b)));
+ c.bench_function("modulo_scalar", |b| {
+ b.iter(|| bench_modulo_scalar(&arr_a, scalar))
+ });
- let arr_a_nulls = create_array(512, false);
- let arr_b_nulls = create_array(512, false);
- c.bench_function("add_nulls_512", |b| {
+ let arr_a_nulls = create_array(BATCH_SIZE, true);
+ let arr_b_nulls = create_array(BATCH_SIZE, true);
+ c.bench_function("add_nulls", |b| {
b.iter(|| bench_add(&arr_a_nulls, &arr_b_nulls))
});
- c.bench_function("divide_nulls_512", |b| {
+ c.bench_function("divide_nulls", |b| {
b.iter(|| bench_divide(&arr_a_nulls, &arr_b_nulls))
});
- c.bench_function("divide_scalar_nulls_512", |b| {
+ c.bench_function("divide_nulls_unchecked", |b| {
+ b.iter(|| bench_divide_unchecked(&arr_a_nulls, &arr_b_nulls))
+ });
+ c.bench_function("divide_scalar_nulls", |b| {
b.iter(|| bench_divide_scalar(&arr_a_nulls, scalar))
});
+ c.bench_function("modulo_nulls", |b| {
+ b.iter(|| bench_modulo(&arr_a_nulls, &arr_b_nulls))
+ });
+ c.bench_function("modulo_scalar_nulls", |b| {
+ b.iter(|| bench_modulo_scalar(&arr_a_nulls, scalar))
+ });
}
criterion_group!(benches, add_benchmark);
diff --git a/arrow/src/buffer/immutable.rs b/arrow/src/buffer/immutable.rs
index 0823de5..a3de0d4 100644
--- a/arrow/src/buffer/immutable.rs
+++ b/arrow/src/buffer/immutable.rs
@@ -153,6 +153,7 @@ impl Buffer {
///
/// Note that this should be used cautiously, and the returned pointer
should not be
/// stored anywhere, to avoid dangling pointers.
+ #[inline]
pub fn as_ptr(&self) -> *const u8 {
unsafe { self.data.ptr().as_ptr().add(self.offset) }
}
diff --git a/arrow/src/compute/kernels/arithmetic.rs
b/arrow/src/compute/kernels/arithmetic.rs
index 4eb9576..dcdfa95 100644
--- a/arrow/src/compute/kernels/arithmetic.rs
+++ b/arrow/src/compute/kernels/arithmetic.rs
@@ -29,7 +29,6 @@ use num::{One, Zero};
use crate::buffer::Buffer;
#[cfg(feature = "simd")]
use crate::buffer::MutableBuffer;
-#[cfg(not(feature = "simd"))]
use crate::compute::kernels::arity::unary;
use crate::compute::util::combine_option_bitmap;
use crate::datatypes;
@@ -42,65 +41,6 @@ use std::borrow::BorrowMut;
#[cfg(feature = "simd")]
use std::slice::{ChunksExact, ChunksExactMut};
-/// Creates a new PrimitiveArray by applying `simd_op` to the input `array`.
-/// If the length of the array is not multiple of the number of vector lanes
-/// then the remainder of the array will be calculated using `scalar_op`.
-/// Any operation on a `NULL` value will result in a `NULL` value in the
output.
-#[cfg(feature = "simd")]
-fn simd_unary_math_op<T, SIMD_OP, SCALAR_OP>(
- array: &PrimitiveArray<T>,
- simd_op: SIMD_OP,
- scalar_op: SCALAR_OP,
-) -> PrimitiveArray<T>
-where
- T: ArrowNumericType,
- SIMD_OP: Fn(T::Simd) -> T::Simd,
- SCALAR_OP: Fn(T::Native) -> T::Native,
-{
- let lanes = T::lanes();
- let buffer_size = array.len() * std::mem::size_of::<T::Native>();
-
- let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size,
false);
-
- // safety: result is newly created above, always written as a T below
- let mut result_chunks = unsafe {
result.typed_data_mut().chunks_exact_mut(lanes) };
- let mut array_chunks = array.values().chunks_exact(lanes);
-
- result_chunks
- .borrow_mut()
- .zip(array_chunks.borrow_mut())
- .for_each(|(result_slice, input_slice)| {
- let simd_input = T::load(input_slice);
- let simd_result = T::unary_op(simd_input, &simd_op);
- T::write(simd_result, result_slice);
- });
-
- let result_remainder = result_chunks.into_remainder();
- let array_remainder = array_chunks.remainder();
-
- result_remainder.into_iter().zip(array_remainder).for_each(
- |(scalar_result, scalar_input)| {
- *scalar_result = scalar_op(*scalar_input);
- },
- );
-
- let data = unsafe {
- ArrayData::new_unchecked(
- T::DATA_TYPE,
- array.len(),
- None,
- array
- .data_ref()
- .null_buffer()
- .map(|b| b.bit_slice(array.offset(), array.len())),
- 0,
- vec![result.into()],
- vec![],
- )
- };
- PrimitiveArray::<T>::from(data)
-}
-
/// Helper function to perform math lambda function on values from two arrays.
If either
/// left or right value is null then the output value is also null, so `1 +
null` is
/// `null`.
@@ -227,79 +167,6 @@ where
Ok(PrimitiveArray::<T>::from(data))
}
-/// Creates a new PrimitiveArray by applying `simd_op` to the `left` and
`right` input arrays.
-/// If the length of the arrays is not multiple of the number of vector lanes
-/// then the remainder of the array will be calculated using `scalar_op`.
-/// Any operation on a `NULL` value will result in a `NULL` value in the
output.
-///
-/// # Errors
-///
-/// This function errors if the arrays have different lengths
-#[cfg(feature = "simd")]
-fn simd_math_op<T, SIMD_OP, SCALAR_OP>(
- left: &PrimitiveArray<T>,
- right: &PrimitiveArray<T>,
- simd_op: SIMD_OP,
- scalar_op: SCALAR_OP,
-) -> Result<PrimitiveArray<T>>
-where
- T: ArrowNumericType,
- SIMD_OP: Fn(T::Simd, T::Simd) -> T::Simd,
- SCALAR_OP: Fn(T::Native, T::Native) -> T::Native,
-{
- if left.len() != right.len() {
- return Err(ArrowError::ComputeError(
- "Cannot perform math operation on arrays of different
length".to_string(),
- ));
- }
-
- let null_bit_buffer =
- combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
- let lanes = T::lanes();
- let buffer_size = left.len() * std::mem::size_of::<T::Native>();
- let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size,
false);
-
- // safety: result is newly created above, always written as a T below
- let mut result_chunks = unsafe {
result.typed_data_mut().chunks_exact_mut(lanes) };
- let mut left_chunks = left.values().chunks_exact(lanes);
- let mut right_chunks = right.values().chunks_exact(lanes);
-
- result_chunks
- .borrow_mut()
- .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
- .for_each(|(result_slice, (left_slice, right_slice))| {
- let simd_left = T::load(left_slice);
- let simd_right = T::load(right_slice);
- let simd_result = T::bin_op(simd_left, simd_right, &simd_op);
- T::write(simd_result, result_slice);
- });
-
- let result_remainder = result_chunks.into_remainder();
- let left_remainder = left_chunks.remainder();
- let right_remainder = right_chunks.remainder();
-
- result_remainder
- .iter_mut()
- .zip(left_remainder.iter().zip(right_remainder.iter()))
- .for_each(|(scalar_result, (scalar_left, scalar_right))| {
- *scalar_result = scalar_op(*scalar_left, *scalar_right);
- });
-
- let data = unsafe {
- ArrayData::new_unchecked(
- T::DATA_TYPE,
- left.len(),
- None,
- null_bit_buffer,
- 0,
- vec![result.into()],
- vec![],
- )
- };
- Ok(PrimitiveArray::<T>::from(data))
-}
-
/// Calculates the modulus operation `left % right` on two SIMD inputs.
/// The lower-most bits of `valid_mask` specify which vector lanes are
considered as valid.
///
@@ -566,10 +433,7 @@ where
T: ArrowNumericType,
T::Native: Add<Output = T::Native>,
{
- #[cfg(feature = "simd")]
- return simd_math_op(&left, &right, |a, b| a + b, |a, b| a + b);
- #[cfg(not(feature = "simd"))]
- return math_op(left, right, |a, b| a + b);
+ math_op(left, right, |a, b| a + b)
}
/// Add every value in an array by a scalar. If any value in the array is null
then the
@@ -582,17 +446,7 @@ where
T: datatypes::ArrowNumericType,
T::Native: Add<Output = T::Native>,
{
- #[cfg(feature = "simd")]
- {
- let scalar_vector = T::init(scalar);
- return Ok(simd_unary_math_op(
- array,
- |x| x + scalar_vector,
- |x| x + scalar,
- ));
- }
- #[cfg(not(feature = "simd"))]
- return Ok(unary(array, |value| value + scalar));
+ Ok(unary(array, |value| value + scalar))
}
/// Perform `left - right` operation on two arrays. If either left or right
value is null
@@ -605,10 +459,7 @@ where
T: datatypes::ArrowNumericType,
T::Native: Sub<Output = T::Native>,
{
- #[cfg(feature = "simd")]
- return simd_math_op(&left, &right, |a, b| a - b, |a, b| a - b);
- #[cfg(not(feature = "simd"))]
- return math_op(left, right, |a, b| a - b);
+ math_op(left, right, |a, b| a - b)
}
/// Subtract every value in an array by a scalar. If any value in the array is
null then the
@@ -625,17 +476,7 @@ where
+ Div<Output = T::Native>
+ Zero,
{
- #[cfg(feature = "simd")]
- {
- let scalar_vector = T::init(scalar);
- return Ok(simd_unary_math_op(
- array,
- |x| x - scalar_vector,
- |x| x - scalar,
- ));
- }
- #[cfg(not(feature = "simd"))]
- return Ok(unary(array, |value| value - scalar));
+ Ok(unary(array, |value| value - scalar))
}
/// Perform `-` operation on an array. If value is null then the result is
also null.
@@ -644,13 +485,7 @@ where
T: datatypes::ArrowNumericType,
T::Native: Neg<Output = T::Native>,
{
- #[cfg(feature = "simd")]
- {
- let zero_vector = T::init(T::default_value());
- return Ok(simd_unary_math_op(array, |x| zero_vector - x, |x| -x));
- }
- #[cfg(not(feature = "simd"))]
- return Ok(unary(array, |x| -x));
+ Ok(unary(array, |x| -x))
}
/// Raise array with floating point values to the power of a scalar.
@@ -662,17 +497,7 @@ where
T: datatypes::ArrowFloatNumericType,
T::Native: Pow<T::Native, Output = T::Native>,
{
- #[cfg(feature = "simd")]
- {
- let raise_vector = T::init(raise);
- return Ok(simd_unary_math_op(
- array,
- |x| T::pow(x, raise_vector),
- |x| x.pow(raise),
- ));
- }
- #[cfg(not(feature = "simd"))]
- return Ok(unary(array, |x| x.pow(raise)));
+ Ok(unary(array, |x| x.pow(raise)))
}
/// Perform `left * right` operation on two arrays. If either left or right
value is null
@@ -685,10 +510,7 @@ where
T: datatypes::ArrowNumericType,
T::Native: Mul<Output = T::Native>,
{
- #[cfg(feature = "simd")]
- return simd_math_op(&left, &right, |a, b| a * b, |a, b| a * b);
- #[cfg(not(feature = "simd"))]
- return math_op(left, right, |a, b| a * b);
+ math_op(left, right, |a, b| a * b)
}
/// Multiply every value in an array by a scalar. If any value in the array is
null then the
@@ -707,17 +529,7 @@ where
+ Zero
+ One,
{
- #[cfg(feature = "simd")]
- {
- let scalar_vector = T::init(scalar);
- return Ok(simd_unary_math_op(
- array,
- |x| x * scalar_vector,
- |x| x * scalar,
- ));
- }
- #[cfg(not(feature = "simd"))]
- return Ok(unary(array, |value| value * scalar));
+ Ok(unary(array, |value| value * scalar))
}
/// Perform `left % right` operation on two arrays. If either left or right
value is null
@@ -756,6 +568,20 @@ where
return math_checked_divide_op(left, right, |a, b| a / b);
}
+/// Perform `left / right` operation on two arrays without checking for
division by zero.
+/// The result of dividing by zero follows normal floating point rules.
+/// If either left or right value is null then the result is also null. If any
right hand value is zero then the result of this
+pub fn divide_unchecked<T>(
+ left: &PrimitiveArray<T>,
+ right: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<T>>
+where
+ T: datatypes::ArrowFloatNumericType,
+ T::Native: Div<Output = T::Native>,
+{
+ math_op(left, right, |a, b| a / b)
+}
+
/// Modulus every value in an array by a scalar. If any value in the array is
null then the
/// result is also null. If the scalar is zero then the result of this
operation will be
/// `Err(ArrowError::DivideByZero)`.
@@ -771,17 +597,7 @@ where
return Err(ArrowError::DivideByZero);
}
- #[cfg(feature = "simd")]
- {
- let modulo_vector = T::init(modulo);
- return Ok(simd_unary_math_op(
- &array,
- |a| a % modulo_vector,
- |a| a % modulo,
- ));
- }
- #[cfg(not(feature = "simd"))]
- return Ok(unary(array, |a| a % modulo));
+ Ok(unary(array, |a| a % modulo))
}
/// Divide every value in an array by a scalar. If any value in the array is
null then the
@@ -798,17 +614,7 @@ where
if divisor.is_zero() {
return Err(ArrowError::DivideByZero);
}
- #[cfg(feature = "simd")]
- {
- let divisor_vector = T::init(divisor);
- return Ok(simd_unary_math_op(
- &array,
- |a| a / divisor_vector,
- |a| a / divisor,
- ));
- }
- #[cfg(not(feature = "simd"))]
- return Ok(unary(array, |a| a / divisor));
+ Ok(unary(array, |a| a / divisor))
}
#[cfg(test)]