[arrow-rs] branch master updated: Simplify and reduce code duplication in arithmetic kernels (#1161)

alamb Thu, 13 Jan 2022 10:28:06 -0800

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git



The following commit(s) were added to refs/heads/master by this push:
     new 68e5750  Simplify and reduce code duplication in arithmetic kernels 
(#1161)
68e5750 is described below

commit 68e5750b6165933aa2677e7a756e251fea22d622
Author: Jörn Horstmann <[email protected]>
AuthorDate: Thu Jan 13 19:27:52 2022 +0100

    Simplify and reduce code duplication in arithmetic kernels (#1161)
    
    * Simplify and reduce code duplication in arithmetic kernels
    
    * Update comments
---
 arrow/src/compute/kernels/arithmetic.rs | 693 ++++++--------------------------
 arrow/src/datatypes/numeric.rs          |  72 +---
 2 files changed, 123 insertions(+), 642 deletions(-)

diff --git a/arrow/src/compute/kernels/arithmetic.rs 
b/arrow/src/compute/kernels/arithmetic.rs
index 81adb0b..340c113 100644
--- a/arrow/src/compute/kernels/arithmetic.rs
+++ b/arrow/src/compute/kernels/arithmetic.rs
@@ -29,6 +29,7 @@ use num::{One, Zero};
 use crate::buffer::Buffer;
 #[cfg(feature = "simd")]
 use crate::buffer::MutableBuffer;
+#[cfg(not(feature = "simd"))]
 use crate::compute::kernels::arity::unary;
 use crate::compute::util::combine_option_bitmap;
 use crate::datatypes;
@@ -41,67 +42,16 @@ use std::borrow::BorrowMut;
 #[cfg(feature = "simd")]
 use std::slice::{ChunksExact, ChunksExactMut};
 
-/// SIMD vectorized version of `unary_math_op` above specialized for signed 
numerical values.
-#[cfg(feature = "simd")]
-fn simd_signed_unary_math_op<T, SIMD_OP, SCALAR_OP>(
-    array: &PrimitiveArray<T>,
-    simd_op: SIMD_OP,
-    scalar_op: SCALAR_OP,
-) -> Result<PrimitiveArray<T>>
-where
-    T: datatypes::ArrowSignedNumericType,
-    SIMD_OP: Fn(T::SignedSimd) -> T::SignedSimd,
-    SCALAR_OP: Fn(T::Native) -> T::Native,
-{
-    let lanes = T::lanes();
-    let buffer_size = array.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, 
false);
-
-    // safety: result is newly created above, always written as a T below
-    let mut result_chunks = unsafe { 
result.typed_data_mut().chunks_exact_mut(lanes) };
-    let mut array_chunks = array.values().chunks_exact(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(array_chunks.borrow_mut())
-        .for_each(|(result_slice, input_slice)| {
-            let simd_input = T::load_signed(input_slice);
-            let simd_result = T::signed_unary_op(simd_input, &simd_op);
-            T::write_signed(simd_result, result_slice);
-        });
-
-    let result_remainder = result_chunks.into_remainder();
-    let array_remainder = array_chunks.remainder();
-
-    result_remainder.into_iter().zip(array_remainder).for_each(
-        |(scalar_result, scalar_input)| {
-            *scalar_result = scalar_op(*scalar_input);
-        },
-    );
-
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            T::DATA_TYPE,
-            array.len(),
-            None,
-            array
-                .data_ref()
-                .null_buffer()
-                .map(|b| b.bit_slice(array.offset(), array.len())),
-            0,
-            vec![result.into()],
-            vec![],
-        )
-    };
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
+/// Creates a new PrimitiveArray by applying `simd_op` to the input `array`.
+/// If the length of the array is not multiple of the number of vector lanes
+/// then the remainder of the array will be calculated using `scalar_op`.
+/// Any operation on a `NULL` value will result in a `NULL` value in the 
output.
 #[cfg(feature = "simd")]
 fn simd_unary_math_op<T, SIMD_OP, SCALAR_OP>(
     array: &PrimitiveArray<T>,
     simd_op: SIMD_OP,
     scalar_op: SCALAR_OP,
-) -> Result<PrimitiveArray<T>>
+) -> PrimitiveArray<T>
 where
     T: ArrowNumericType,
     SIMD_OP: Fn(T::Simd) -> T::Simd,
@@ -148,7 +98,7 @@ where
             vec![],
         )
     };
-    Ok(PrimitiveArray::<T>::from(data))
+    PrimitiveArray::<T>::from(data)
 }
 
 /// Helper function to perform math lambda function on values from two arrays. 
If either
@@ -202,20 +152,23 @@ where
     Ok(PrimitiveArray::<T>::from(data))
 }
 
-/// Helper function to modulus two arrays.
+/// Helper function for operations where a valid `0` on the right array should
+/// result in an [ArrowError::DivideByZero], namely the division and modulo 
operations
 ///
 /// # Errors
 ///
 /// This function errors if:
 /// * the arrays have different lengths
-/// * a division by zero is found
-fn math_modulus<T>(
+/// * there is an element where both left and right values are valid and the 
right value is `0`
+fn math_checked_divide_op<T, F>(
     left: &PrimitiveArray<T>,
     right: &PrimitiveArray<T>,
+    op: F,
 ) -> Result<PrimitiveArray<T>>
 where
     T: ArrowNumericType,
-    T::Native: Rem<Output = T::Native> + Zero,
+    T::Native: One + Zero,
+    F: Fn(T::Native, T::Native) -> T::Native,
 {
     if left.len() != right.len() {
         return Err(ArrowError::ComputeError(
@@ -234,7 +187,7 @@ where
                     if right.is_zero() {
                         Err(ArrowError::DivideByZero)
                     } else {
-                        Ok(*left % *right)
+                        Ok(op(*left, *right))
                     }
                 } else {
                     Ok(T::default_value())
@@ -253,7 +206,7 @@ where
                 if right.is_zero() {
                     Err(ArrowError::DivideByZero)
                 } else {
-                    Ok(*left % *right)
+                    Ok(op(*left, *right))
                 }
             });
         // Safety: Iterator comes from a PrimitiveArray which reports its size 
correctly
@@ -274,111 +227,14 @@ where
     Ok(PrimitiveArray::<T>::from(data))
 }
 
-/// Helper function to divide two arrays.
+/// Creates a new PrimitiveArray by applying `simd_op` to the `left` and 
`right` input arrays.
+/// If the length of the arrays is not multiple of the number of vector lanes
+/// then the remainder of the array will be calculated using `scalar_op`.
+/// Any operation on a `NULL` value will result in a `NULL` value in the 
output.
 ///
 /// # Errors
 ///
-/// This function errors if:
-/// * the arrays have different lengths
-/// * a division by zero is found
-fn math_divide<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: Div<Output = T::Native> + Zero,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different 
length".to_string(),
-        ));
-    }
-
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let buffer = if let Some(b) = &null_bit_buffer {
-        let values = left.values().iter().zip(right.values()).enumerate().map(
-            |(i, (left, right))| {
-                let is_valid = unsafe { bit_util::get_bit_raw(b.as_ptr(), i) };
-                if is_valid {
-                    if right.is_zero() {
-                        Err(ArrowError::DivideByZero)
-                    } else {
-                        Ok(*left / *right)
-                    }
-                } else {
-                    Ok(T::default_value())
-                }
-            },
-        );
-        // Safety: Iterator comes from a PrimitiveArray which reports its size 
correctly
-        unsafe { Buffer::try_from_trusted_len_iter(values) }
-    } else {
-        // no value is null
-        let values = left
-            .values()
-            .iter()
-            .zip(right.values())
-            .map(|(left, right)| {
-                if right.is_zero() {
-                    Err(ArrowError::DivideByZero)
-                } else {
-                    Ok(*left / *right)
-                }
-            });
-        // Safety: Iterator comes from a PrimitiveArray which reports its size 
correctly
-        unsafe { Buffer::try_from_trusted_len_iter(values) }
-    }?;
-
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            T::DATA_TYPE,
-            left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![buffer],
-            vec![],
-        )
-    };
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// Scalar-modulo version of `math_modulus`.
-fn math_modulus_scalar<T>(
-    array: &PrimitiveArray<T>,
-    modulo: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: Rem<Output = T::Native> + Zero,
-{
-    if modulo.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    Ok(unary(array, |value| value % modulo))
-}
-
-/// Scalar-divisor version of `math_divide`.
-fn math_divide_scalar<T>(
-    array: &PrimitiveArray<T>,
-    divisor: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: Div<Output = T::Native> + Zero,
-{
-    if divisor.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    Ok(unary(array, |value| value / divisor))
-}
-
-/// SIMD vectorized version of `math_op` above.
+/// This function errors if the arrays have different lengths
 #[cfg(feature = "simd")]
 fn simd_math_op<T, SIMD_OP, SCALAR_OP>(
     left: &PrimitiveArray<T>,
@@ -444,9 +300,12 @@ where
     Ok(PrimitiveArray::<T>::from(data))
 }
 
-/// SIMD vectorized implementation of `left % right`.
-/// If any of the lanes marked as valid in `valid_mask` are `0` then an 
`ArrowError::DivideByZero`
-/// is returned. The contents of no-valid lanes are undefined.
+/// Calculates the modulus operation `left % right` on two SIMD inputs.
+/// The lower-most bits of `valid_mask` specify which vector lanes are 
considered as valid.
+///
+/// # Errors
+///
+/// This function returns a [`ArrowError::DivideByZero`] if a valid element in 
`right` is `0`
 #[cfg(feature = "simd")]
 #[inline]
 fn simd_checked_modulus<T: ArrowNumericType>(
@@ -478,9 +337,12 @@ where
     }
 }
 
-/// SIMD vectorized implementation of `left / right`.
-/// If any of the lanes marked as valid in `valid_mask` are `0` then an 
`ArrowError::DivideByZero`
-/// is returned. The contents of no-valid lanes are undefined.
+/// Calculates the division operation `left / right` on two SIMD inputs.
+/// The lower-most bits of `valid_mask` specify which vector lanes are 
considered as valid.
+///
+/// # Errors
+///
+/// This function returns a [`ArrowError::DivideByZero`] if a valid element in 
`right` is `0`
 #[cfg(feature = "simd")]
 #[inline]
 fn simd_checked_divide<T: ArrowNumericType>(
@@ -512,52 +374,26 @@ where
     }
 }
 
-/// Scalar implementation of `left % right` for the remainder elements after 
complete chunks have been processed using SIMD.
-/// If any of the values marked as valid in `valid_mask` are `0` then an 
`ArrowError::DivideByZero` is returned.
-#[cfg(feature = "simd")]
-#[inline]
-fn simd_checked_modulus_remainder<T: ArrowNumericType>(
-    valid_mask: Option<u64>,
-    left_chunks: ChunksExact<T::Native>,
-    right_chunks: ChunksExact<T::Native>,
-    result_chunks: ChunksExactMut<T::Native>,
-) -> Result<()>
-where
-    T::Native: Zero + Rem<Output = T::Native>,
-{
-    let result_remainder = result_chunks.into_remainder();
-    let left_remainder = left_chunks.remainder();
-    let right_remainder = right_chunks.remainder();
-
-    result_remainder
-        .iter_mut()
-        .zip(left_remainder.iter().zip(right_remainder.iter()))
-        .enumerate()
-        .try_for_each(|(i, (result_scalar, (left_scalar, right_scalar)))| {
-            if valid_mask.map(|mask| mask & (1 << i) != 0).unwrap_or(true) {
-                if *right_scalar == T::Native::zero() {
-                    return Err(ArrowError::DivideByZero);
-                }
-                *result_scalar = *left_scalar % *right_scalar;
-            }
-            Ok(())
-        })?;
-
-    Ok(())
-}
-
-/// Scalar implementation of `left / right` for the remainder elements after 
complete chunks have been processed using SIMD.
-/// If any of the values marked as valid in `valid_mask` are `0` then an 
`ArrowError::DivideByZero` is returned.
+/// Applies `op` on the remainder elements of two input chunks and writes the 
result into
+/// the remainder elements of `result_chunks`.
+/// The lower-most bits of `valid_mask` specify which elements are considered 
as valid.
+///
+/// # Errors
+///
+/// This function returns a [`ArrowError::DivideByZero`] if a valid element in 
`right` is `0`
 #[cfg(feature = "simd")]
 #[inline]
-fn simd_checked_divide_remainder<T: ArrowNumericType>(
+fn simd_checked_divide_op_remainder<T, F>(
     valid_mask: Option<u64>,
     left_chunks: ChunksExact<T::Native>,
     right_chunks: ChunksExact<T::Native>,
     result_chunks: ChunksExactMut<T::Native>,
+    op: F,
 ) -> Result<()>
 where
-    T::Native: Zero + Div<Output = T::Native>,
+    T: ArrowNumericType,
+    T::Native: Zero,
+    F: Fn(T::Native, T::Native) -> T::Native,
 {
     let result_remainder = result_chunks.into_remainder();
     let left_remainder = left_chunks.remainder();
@@ -572,7 +408,9 @@ where
                 if *right_scalar == T::Native::zero() {
                     return Err(ArrowError::DivideByZero);
                 }
-                *result_scalar = *left_scalar / *right_scalar;
+                *result_scalar = op(*left_scalar, *right_scalar);
+            } else {
+                *result_scalar = T::default_value();
             }
             Ok(())
         })?;
@@ -580,201 +418,28 @@ where
     Ok(())
 }
 
-/// Scalar-modulo version of `simd_checked_modulus_remainder`.
-#[cfg(feature = "simd")]
-#[inline]
-fn simd_checked_modulus_scalar_remainder<T: ArrowNumericType>(
-    array_chunks: ChunksExact<T::Native>,
-    modulo: T::Native,
-    result_chunks: ChunksExactMut<T::Native>,
-) -> Result<()>
-where
-    T::Native: Zero + Rem<Output = T::Native>,
-{
-    if modulo.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let result_remainder = result_chunks.into_remainder();
-    let array_remainder = array_chunks.remainder();
-
-    result_remainder
-        .iter_mut()
-        .zip(array_remainder.iter())
-        .for_each(|(result_scalar, array_scalar)| {
-            *result_scalar = *array_scalar % modulo;
-        });
-
-    Ok(())
-}
-
-/// Scalar-divisor version of `simd_checked_divide_remainder`.
-#[cfg(feature = "simd")]
-#[inline]
-fn simd_checked_divide_scalar_remainder<T: ArrowNumericType>(
-    array_chunks: ChunksExact<T::Native>,
-    divisor: T::Native,
-    result_chunks: ChunksExactMut<T::Native>,
-) -> Result<()>
-where
-    T::Native: Zero + Div<Output = T::Native>,
-{
-    if divisor.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let result_remainder = result_chunks.into_remainder();
-    let array_remainder = array_chunks.remainder();
-
-    result_remainder
-        .iter_mut()
-        .zip(array_remainder.iter())
-        .for_each(|(result_scalar, array_scalar)| {
-            *result_scalar = *array_scalar / divisor;
-        });
-
-    Ok(())
-}
-
-/// SIMD vectorized version of `modulus`.
+/// Creates a new PrimitiveArray by applying `simd_op` to the `left` and 
`right` input array.
+/// If the length of the arrays is not multiple of the number of vector lanes
+/// then the remainder of the array will be calculated using `scalar_op`.
+/// Any operation on a `NULL` value will result in a `NULL` value in the 
output.
 ///
-/// The modulus kernels need their own implementation as there is a need to 
handle situations
-/// where a modulus by `0` occurs.  This is complicated by `NULL` slots and 
padding.
-#[cfg(feature = "simd")]
-fn simd_modulus<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: One + Zero + Rem<Output = T::Native>,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different 
length".to_string(),
-        ));
-    }
-
-    // Create the combined `Bitmap`
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let lanes = T::lanes();
-    let buffer_size = left.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, 
false);
-
-    match &null_bit_buffer {
-        Some(b) => {
-            // combine_option_bitmap returns a slice or new buffer starting at 0
-            let valid_chunks = b.bit_chunks(0, left.len());
-
-            // process data in chunks of 64 elements since we also get 64 bits 
of validity information at a time
-
-            // safety: result is newly created above, always written as a T 
below
-            let mut result_chunks =
-                unsafe { result.typed_data_mut().chunks_exact_mut(64) };
-            let mut left_chunks = left.values().chunks_exact(64);
-            let mut right_chunks = right.values().chunks_exact(64);
-
-            valid_chunks
-                .iter()
-                .zip(
-                    result_chunks
-                        .borrow_mut()
-                        
.zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut())),
-                )
-                .try_for_each(
-                    |(mut mask, (result_slice, (left_slice, right_slice)))| {
-                        // split chunks further into slices corresponding to 
the vector length
-                        // the compiler is able to unroll this inner loop and 
remove bounds checks
-                        // since the outer chunk size (64) is always a 
multiple of the number of lanes
-                        result_slice
-                            .chunks_exact_mut(lanes)
-                            
.zip(left_slice.chunks_exact(lanes).zip(right_slice.chunks_exact(lanes)))
-                            .try_for_each(|(result_slice, (left_slice, 
right_slice))| -> Result<()> {
-                                let simd_left = T::load(left_slice);
-                                let simd_right = T::load(right_slice);
-
-                                let simd_result = 
simd_checked_modulus::<T>(Some(mask), simd_left, simd_right)?;
-
-                                T::write(simd_result, result_slice);
-
-                                // skip the shift and avoid overflow for u8 
type, which uses 64 lanes.
-                                mask >>= T::lanes() % 64;
-
-                                Ok(())
-                            })
-                    },
-                )?;
-
-            let valid_remainder = valid_chunks.remainder_bits();
-
-            simd_checked_modulus_remainder::<T>(
-                Some(valid_remainder),
-                left_chunks,
-                right_chunks,
-                result_chunks,
-            )?;
-        }
-        None => {
-            // safety: result is newly created above, always written as a T 
below
-            let mut result_chunks =
-                unsafe { result.typed_data_mut().chunks_exact_mut(lanes) };
-            let mut left_chunks = left.values().chunks_exact(lanes);
-            let mut right_chunks = right.values().chunks_exact(lanes);
-
-            result_chunks
-                .borrow_mut()
-                .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-                .try_for_each(
-                    |(result_slice, (left_slice, right_slice))| -> Result<()> {
-                        let simd_left = T::load(left_slice);
-                        let simd_right = T::load(right_slice);
-
-                        let simd_result =
-                            simd_checked_modulus::<T>(None, simd_left, 
simd_right)?;
-
-                        T::write(simd_result, result_slice);
-
-                        Ok(())
-                    },
-                )?;
-
-            simd_checked_modulus_remainder::<T>(
-                None,
-                left_chunks,
-                right_chunks,
-                result_chunks,
-            )?;
-        }
-    }
-
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            T::DATA_TYPE,
-            left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![result.into()],
-            vec![],
-        )
-    };
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// SIMD vectorized version of `divide`.
+/// # Errors
 ///
-/// The divide kernels need their own implementation as there is a need to 
handle situations
-/// where a divide by `0` occurs.  This is complicated by `NULL` slots and 
padding.
+/// This function errors if:
+/// * the arrays have different lengths
+/// * there is an element where both left and right values are valid and the 
right value is `0`
 #[cfg(feature = "simd")]
-fn simd_divide<T>(
+fn simd_checked_divide_op<T, SIMD_OP, SCALAR_OP>(
     left: &PrimitiveArray<T>,
     right: &PrimitiveArray<T>,
+    simd_op: SIMD_OP,
+    scalar_op: SCALAR_OP,
 ) -> Result<PrimitiveArray<T>>
 where
     T: ArrowNumericType,
-    T::Native: One + Zero + Div<Output = T::Native>,
+    T::Native: One + Zero,
+    SIMD_OP: Fn(Option<u64>, T::Simd, T::Simd) -> Result<T::Simd>,
+    SCALAR_OP: Fn(T::Native, T::Native) -> T::Native,
 {
     if left.len() != right.len() {
         return Err(ArrowError::ComputeError(
@@ -822,7 +487,7 @@ where
                                 let simd_left = T::load(left_slice);
                                 let simd_right = T::load(right_slice);
 
-                                let simd_result = 
simd_checked_divide::<T>(Some(mask), simd_left, simd_right)?;
+                                let simd_result = simd_op(Some(mask), 
simd_left, simd_right)?;
 
                                 T::write(simd_result, result_slice);
 
@@ -836,11 +501,12 @@ where
 
             let valid_remainder = valid_chunks.remainder_bits();
 
-            simd_checked_divide_remainder::<T>(
+            simd_checked_divide_op_remainder::<T, _>(
                 Some(valid_remainder),
                 left_chunks,
                 right_chunks,
                 result_chunks,
+                scalar_op,
             )?;
         }
         None => {
@@ -858,8 +524,7 @@ where
                         let simd_left = T::load(left_slice);
                         let simd_right = T::load(right_slice);
 
-                        let simd_result =
-                            simd_checked_divide::<T>(None, simd_left, 
simd_right)?;
+                        let simd_result = simd_op(None, simd_left, 
simd_right)?;
 
                         T::write(simd_result, result_slice);
 
@@ -867,11 +532,12 @@ where
                     },
                 )?;
 
-            simd_checked_divide_remainder::<T>(
+            simd_checked_divide_op_remainder::<T, _>(
                 None,
                 left_chunks,
                 right_chunks,
                 result_chunks,
+                scalar_op,
             )?;
         }
     }
@@ -890,112 +556,6 @@ where
     Ok(PrimitiveArray::<T>::from(data))
 }
 
-/// SIMD vectorized version of `modulus_scalar`.
-#[cfg(feature = "simd")]
-fn simd_modulus_scalar<T>(
-    array: &PrimitiveArray<T>,
-    modulo: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: One + Zero + Rem<Output = T::Native>,
-{
-    if modulo.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let lanes = T::lanes();
-    let buffer_size = array.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, 
false);
-
-    // safety: result is newly created above, always written as a T below
-    let mut result_chunks = unsafe { 
result.typed_data_mut().chunks_exact_mut(lanes) };
-    let mut array_chunks = array.values().chunks_exact(lanes);
-
-    let simd_right = T::init(modulo);
-
-    result_chunks
-        .borrow_mut()
-        .zip(array_chunks.borrow_mut())
-        .for_each(|(result_slice, array_slice)| {
-            let simd_left = T::load(array_slice);
-
-            let simd_result = T::bin_op(simd_left, simd_right, |a, b| a % b);
-            T::write(simd_result, result_slice);
-        });
-
-    simd_checked_modulus_scalar_remainder::<T>(array_chunks, modulo, 
result_chunks)?;
-
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            T::DATA_TYPE,
-            array.len(),
-            None,
-            array
-                .data_ref()
-                .null_buffer()
-                .map(|b| b.bit_slice(array.offset(), array.len())),
-            0,
-            vec![result.into()],
-            vec![],
-        )
-    };
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
-/// SIMD vectorized version of `divide_scalar`.
-#[cfg(feature = "simd")]
-fn simd_divide_scalar<T>(
-    array: &PrimitiveArray<T>,
-    divisor: T::Native,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: One + Zero + Div<Output = T::Native>,
-{
-    if divisor.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let lanes = T::lanes();
-    let buffer_size = array.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, 
false);
-
-    // safety: result is newly created above, always written as a T below
-    let mut result_chunks = unsafe { 
result.typed_data_mut().chunks_exact_mut(lanes) };
-    let mut array_chunks = array.values().chunks_exact(lanes);
-
-    let simd_right = T::init(divisor);
-
-    result_chunks
-        .borrow_mut()
-        .zip(array_chunks.borrow_mut())
-        .for_each(|(result_slice, array_slice)| {
-            let simd_left = T::load(array_slice);
-
-            let simd_result = T::bin_op(simd_left, simd_right, |a, b| a / b);
-            T::write(simd_result, result_slice);
-        });
-
-    simd_checked_divide_scalar_remainder::<T>(array_chunks, divisor, 
result_chunks)?;
-
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            T::DATA_TYPE,
-            array.len(),
-            None,
-            array
-                .data_ref()
-                .null_buffer()
-                .map(|b| b.bit_slice(array.offset(), array.len())),
-            0,
-            vec![result.into()],
-            vec![],
-        )
-    };
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
 /// Perform `left + right` operation on two arrays. If either left or right 
value is null
 /// then the result is also null.
 pub fn add<T>(
@@ -1004,11 +564,7 @@ pub fn add<T>(
 ) -> Result<PrimitiveArray<T>>
 where
     T: ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
+    T::Native: Add<Output = T::Native>,
 {
     #[cfg(feature = "simd")]
     return simd_math_op(&left, &right, |a, b| a + b, |a, b| a + b);
@@ -1024,18 +580,16 @@ pub fn add_scalar<T>(
 ) -> Result<PrimitiveArray<T>>
 where
     T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Rem<Output = T::Native>
-        + Zero
-        + One,
+    T::Native: Add<Output = T::Native>,
 {
     #[cfg(feature = "simd")]
     {
         let scalar_vector = T::init(scalar);
-        return simd_unary_math_op(array, |x| x + scalar_vector, |x| x + 
scalar);
+        return Ok(simd_unary_math_op(
+            array,
+            |x| x + scalar_vector,
+            |x| x + scalar,
+        ));
     }
     #[cfg(not(feature = "simd"))]
     return Ok(unary(array, |value| value + scalar));
@@ -1049,11 +603,7 @@ pub fn subtract<T>(
 ) -> Result<PrimitiveArray<T>>
 where
     T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Zero,
+    T::Native: Sub<Output = T::Native>,
 {
     #[cfg(feature = "simd")]
     return simd_math_op(&left, &right, |a, b| a - b, |a, b| a - b);
@@ -1087,11 +637,14 @@ where
 /// Perform `-` operation on an array. If value is null then the result is 
also null.
 pub fn negate<T>(array: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
 where
-    T: datatypes::ArrowSignedNumericType,
+    T: datatypes::ArrowNumericType,
     T::Native: Neg<Output = T::Native>,
 {
     #[cfg(feature = "simd")]
-    return simd_signed_unary_math_op(array, |x| -x, |x| -x);
+    {
+        let zero_vector = T::init(T::default_value());
+        return Ok(simd_unary_math_op(array, |x| zero_vector - x, |x| -x));
+    }
     #[cfg(not(feature = "simd"))]
     return Ok(unary(array, |x| -x));
 }
@@ -1108,7 +661,11 @@ where
     #[cfg(feature = "simd")]
     {
         let raise_vector = T::init(raise);
-        return simd_unary_math_op(array, |x| T::pow(x, raise_vector), |x| 
x.pow(raise));
+        return Ok(simd_unary_math_op(
+            array,
+            |x| T::pow(x, raise_vector),
+            |x| x.pow(raise),
+        ));
     }
     #[cfg(not(feature = "simd"))]
     return Ok(unary(array, |x| x.pow(raise)));
@@ -1122,12 +679,7 @@ pub fn multiply<T>(
 ) -> Result<PrimitiveArray<T>>
 where
     T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Rem<Output = T::Native>
-        + Zero,
+    T::Native: Mul<Output = T::Native>,
 {
     #[cfg(feature = "simd")]
     return simd_math_op(&left, &right, |a, b| a * b, |a, b| a * b);
@@ -1169,18 +721,14 @@ pub fn modulus<T>(
 ) -> Result<PrimitiveArray<T>>
 where
     T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Rem<Output = T::Native>
-        + Zero
-        + One,
+    T::Native: Rem<Output = T::Native> + Zero + One,
 {
     #[cfg(feature = "simd")]
-    return simd_modulus(&left, &right);
+    return simd_checked_divide_op(&left, &right, simd_checked_modulus::<T>, 
|a, b| {
+        a % b
+    });
     #[cfg(not(feature = "simd"))]
-    return math_modulus(left, right);
+    return math_checked_divide_op(left, right, |a, b| a % b);
 }
 
 /// Perform `left / right` operation on two arrays. If either left or right 
value is null
@@ -1192,18 +740,12 @@ pub fn divide<T>(
 ) -> Result<PrimitiveArray<T>>
 where
     T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Rem<Output = T::Native>
-        + Zero
-        + One,
+    T::Native: Div<Output = T::Native> + Zero + One,
 {
     #[cfg(feature = "simd")]
-    return simd_divide(&left, &right);
+    return simd_checked_divide_op(&left, &right, simd_checked_divide::<T>, |a, 
b| a / b);
     #[cfg(not(feature = "simd"))]
-    return math_divide(left, right);
+    return math_checked_divide_op(left, right, |a, b| a / b);
 }
 
 /// Modulus every value in an array by a scalar. If any value in the array is 
null then the
@@ -1215,18 +757,23 @@ pub fn modulus_scalar<T>(
 ) -> Result<PrimitiveArray<T>>
 where
     T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Rem<Output = T::Native>
-        + Zero
-        + One,
+    T::Native: Rem<Output = T::Native> + Zero,
 {
+    if modulo.is_zero() {
+        return Err(ArrowError::DivideByZero);
+    }
+
     #[cfg(feature = "simd")]
-    return simd_modulus_scalar(&array, modulo);
+    {
+        let modulo_vector = T::init(modulo);
+        return Ok(simd_unary_math_op(
+            &array,
+            |a| a % modulo_vector,
+            |a| a % modulo,
+        ));
+    }
     #[cfg(not(feature = "simd"))]
-    return math_modulus_scalar(array, modulo);
+    return Ok(unary(array, |a| a % modulo));
 }
 
 /// Divide every value in an array by a scalar. If any value in the array is 
null then the
@@ -1238,18 +785,22 @@ pub fn divide_scalar<T>(
 ) -> Result<PrimitiveArray<T>>
 where
     T: datatypes::ArrowNumericType,
-    T::Native: Add<Output = T::Native>
-        + Sub<Output = T::Native>
-        + Mul<Output = T::Native>
-        + Div<Output = T::Native>
-        + Rem<Output = T::Native>
-        + Zero
-        + One,
+    T::Native: Div<Output = T::Native> + Zero,
 {
+    if divisor.is_zero() {
+        return Err(ArrowError::DivideByZero);
+    }
     #[cfg(feature = "simd")]
-    return simd_divide_scalar(&array, divisor);
+    {
+        let divisor_vector = T::init(divisor);
+        return Ok(simd_unary_math_op(
+            &array,
+            |a| a / divisor_vector,
+            |a| a / divisor,
+        ));
+    }
     #[cfg(not(feature = "simd"))]
-    return math_divide_scalar(array, divisor);
+    return Ok(unary(array, |a| a / divisor));
 }
 
 #[cfg(test)]
diff --git a/arrow/src/datatypes/numeric.rs b/arrow/src/datatypes/numeric.rs
index 2ff53c9..b8fa871 100644
--- a/arrow/src/datatypes/numeric.rs
+++ b/arrow/src/datatypes/numeric.rs
@@ -19,9 +19,7 @@ use super::*;
 #[cfg(feature = "simd")]
 use packed_simd::*;
 #[cfg(feature = "simd")]
-use std::ops::{
-    Add, BitAnd, BitAndAssign, BitOr, BitOrAssign, Div, Mul, Neg, Not, Rem, 
Sub,
-};
+use std::ops::{Add, BitAnd, BitAndAssign, BitOr, BitOrAssign, Div, Mul, Not, 
Rem, Sub};
 
 /// A subtype of primitive type that represents numeric values.
 ///
@@ -368,74 +366,6 @@ make_numeric_type!(DurationMillisecondType, i64, i64x8, 
m64x8);
 make_numeric_type!(DurationMicrosecondType, i64, i64x8, m64x8);
 make_numeric_type!(DurationNanosecondType, i64, i64x8, m64x8);
 
-/// A subtype of primitive type that represents signed numeric values.
-///
-/// SIMD operations are defined in this trait if available on the target 
system.
-#[cfg(feature = "simd")]
-pub trait ArrowSignedNumericType: ArrowNumericType
-where
-    Self::SignedSimd: Neg<Output = Self::SignedSimd>,
-{
-    /// Defines the SIMD type that should be used for this numeric type
-    type SignedSimd;
-
-    /// Loads a slice of signed numeric type into a SIMD register
-    fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd;
-
-    /// Performs a SIMD unary operation on signed numeric type
-    fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
-        a: Self::SignedSimd,
-        op: F,
-    ) -> Self::SignedSimd;
-
-    /// Writes a signed SIMD result back to a slice
-    fn write_signed(simd_result: Self::SignedSimd, slice: &mut [Self::Native]);
-}
-
-#[cfg(not(feature = "simd"))]
-pub trait ArrowSignedNumericType: ArrowNumericType
-where
-    Self::Native: std::ops::Neg<Output = Self::Native>,
-{
-}
-
-macro_rules! make_signed_numeric_type {
-    ($impl_ty:ty, $simd_ty:ident) => {
-        #[cfg(feature = "simd")]
-        impl ArrowSignedNumericType for $impl_ty {
-            type SignedSimd = $simd_ty;
-
-            #[inline]
-            fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd {
-                unsafe { 
Self::SignedSimd::from_slice_unaligned_unchecked(slice) }
-            }
-
-            #[inline]
-            fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
-                a: Self::SignedSimd,
-                op: F,
-            ) -> Self::SignedSimd {
-                op(a)
-            }
-
-            #[inline]
-            fn write_signed(simd_result: Self::SignedSimd, slice: &mut 
[Self::Native]) {
-                unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) 
};
-            }
-        }
-
-        #[cfg(not(feature = "simd"))]
-        impl ArrowSignedNumericType for $impl_ty {}
-    };
-}
-
-make_signed_numeric_type!(Int8Type, i8x64);
-make_signed_numeric_type!(Int16Type, i16x32);
-make_signed_numeric_type!(Int32Type, i32x16);
-make_signed_numeric_type!(Int64Type, i64x8);
-make_signed_numeric_type!(Float32Type, f32x16);
-make_signed_numeric_type!(Float64Type, f64x8);
-
 #[cfg(feature = "simd")]
 pub trait ArrowFloatNumericType: ArrowNumericType {
     fn pow(base: Self::Simd, raise: Self::Simd) -> Self::Simd;

[arrow-rs] branch master updated: Simplify and reduce code duplication in arithmetic kernels (#1161)

Reply via email to