[GitHub] [arrow-rs] viirya commented on a change in pull request #1161: Simplify and reduce code duplication in arithmetic kernels

GitBox Tue, 11 Jan 2022 15:53:08 -0800


viirya commented on a change in pull request #1161:
URL: https://github.com/apache/arrow-rs/pull/1161#discussion_r782610147




##########
File path: arrow/src/compute/kernels/arithmetic.rs
##########
@@ -572,209 +386,32 @@ where
                 if *right_scalar == T::Native::zero() {
                     return Err(ArrowError::DivideByZero);
                 }
-                *result_scalar = *left_scalar / *right_scalar;
+                *result_scalar = op(*left_scalar, *right_scalar);
+            } else {
+                *result_scalar = T::default_value();
             }
             Ok(())
         })?;
 
     Ok(())
 }
 
-/// Scalar-modulo version of `simd_checked_modulus_remainder`.
-#[cfg(feature = "simd")]
-#[inline]
-fn simd_checked_modulus_scalar_remainder<T: ArrowNumericType>(
-    array_chunks: ChunksExact<T::Native>,
-    modulo: T::Native,
-    result_chunks: ChunksExactMut<T::Native>,
-) -> Result<()>
-where
-    T::Native: Zero + Rem<Output = T::Native>,
-{
-    if modulo.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let result_remainder = result_chunks.into_remainder();
-    let array_remainder = array_chunks.remainder();
-
-    result_remainder
-        .iter_mut()
-        .zip(array_remainder.iter())
-        .for_each(|(result_scalar, array_scalar)| {
-            *result_scalar = *array_scalar % modulo;
-        });
-
-    Ok(())
-}
-
-/// Scalar-divisor version of `simd_checked_divide_remainder`.
-#[cfg(feature = "simd")]
-#[inline]
-fn simd_checked_divide_scalar_remainder<T: ArrowNumericType>(
-    array_chunks: ChunksExact<T::Native>,
-    divisor: T::Native,
-    result_chunks: ChunksExactMut<T::Native>,
-) -> Result<()>
-where
-    T::Native: Zero + Div<Output = T::Native>,
-{
-    if divisor.is_zero() {
-        return Err(ArrowError::DivideByZero);
-    }
-
-    let result_remainder = result_chunks.into_remainder();
-    let array_remainder = array_chunks.remainder();
-
-    result_remainder
-        .iter_mut()
-        .zip(array_remainder.iter())
-        .for_each(|(result_scalar, array_scalar)| {
-            *result_scalar = *array_scalar / divisor;
-        });
-
-    Ok(())
-}
-
-/// SIMD vectorized version of `modulus`.
-///
-/// The modulus kernels need their own implementation as there is a need to 
handle situations
-/// where a modulus by `0` occurs.  This is complicated by `NULL` slots and 
padding.
-#[cfg(feature = "simd")]
-fn simd_modulus<T>(
-    left: &PrimitiveArray<T>,
-    right: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<T>>
-where
-    T: ArrowNumericType,
-    T::Native: One + Zero + Rem<Output = T::Native>,
-{
-    if left.len() != right.len() {
-        return Err(ArrowError::ComputeError(
-            "Cannot perform math operation on arrays of different 
length".to_string(),
-        ));
-    }
-
-    // Create the combined `Bitmap`
-    let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
-
-    let lanes = T::lanes();
-    let buffer_size = left.len() * std::mem::size_of::<T::Native>();
-    let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, 
false);
-
-    match &null_bit_buffer {
-        Some(b) => {
-            // combine_option_bitmap returns a slice or new buffer starting at 0
-            let valid_chunks = b.bit_chunks(0, left.len());
-
-            // process data in chunks of 64 elements since we also get 64 bits 
of validity information at a time
-
-            // safety: result is newly created above, always written as a T 
below
-            let mut result_chunks =
-                unsafe { result.typed_data_mut().chunks_exact_mut(64) };
-            let mut left_chunks = left.values().chunks_exact(64);
-            let mut right_chunks = right.values().chunks_exact(64);
-
-            valid_chunks
-                .iter()
-                .zip(
-                    result_chunks
-                        .borrow_mut()
-                        
.zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut())),
-                )
-                .try_for_each(
-                    |(mut mask, (result_slice, (left_slice, right_slice)))| {
-                        // split chunks further into slices corresponding to 
the vector length
-                        // the compiler is able to unroll this inner loop and 
remove bounds checks
-                        // since the outer chunk size (64) is always a 
multiple of the number of lanes
-                        result_slice
-                            .chunks_exact_mut(lanes)
-                            
.zip(left_slice.chunks_exact(lanes).zip(right_slice.chunks_exact(lanes)))
-                            .try_for_each(|(result_slice, (left_slice, 
right_slice))| -> Result<()> {
-                                let simd_left = T::load(left_slice);
-                                let simd_right = T::load(right_slice);
-
-                                let simd_result = 
simd_checked_modulus::<T>(Some(mask), simd_left, simd_right)?;
-
-                                T::write(simd_result, result_slice);
-
-                                // skip the shift and avoid overflow for u8 
type, which uses 64 lanes.
-                                mask >>= T::lanes() % 64;
-
-                                Ok(())
-                            })
-                    },
-                )?;
-
-            let valid_remainder = valid_chunks.remainder_bits();
-
-            simd_checked_modulus_remainder::<T>(
-                Some(valid_remainder),
-                left_chunks,
-                right_chunks,
-                result_chunks,
-            )?;
-        }
-        None => {
-            // safety: result is newly created above, always written as a T 
below
-            let mut result_chunks =
-                unsafe { result.typed_data_mut().chunks_exact_mut(lanes) };
-            let mut left_chunks = left.values().chunks_exact(lanes);
-            let mut right_chunks = right.values().chunks_exact(lanes);
-
-            result_chunks
-                .borrow_mut()
-                .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-                .try_for_each(
-                    |(result_slice, (left_slice, right_slice))| -> Result<()> {
-                        let simd_left = T::load(left_slice);
-                        let simd_right = T::load(right_slice);
-
-                        let simd_result =
-                            simd_checked_modulus::<T>(None, simd_left, 
simd_right)?;
-
-                        T::write(simd_result, result_slice);
-
-                        Ok(())
-                    },
-                )?;
-
-            simd_checked_modulus_remainder::<T>(
-                None,
-                left_chunks,
-                right_chunks,
-                result_chunks,
-            )?;
-        }
-    }
-
-    let data = unsafe {
-        ArrayData::new_unchecked(
-            T::DATA_TYPE,
-            left.len(),
-            None,
-            null_bit_buffer,
-            0,
-            vec![result.into()],
-            vec![],
-        )
-    };
-    Ok(PrimitiveArray::<T>::from(data))
-}
-
 /// SIMD vectorized version of `divide`.
 ///
 /// The divide kernels need their own implementation as there is a need to 
handle situations
 /// where a divide by `0` occurs.  This is complicated by `NULL` slots and 
padding.

Review comment:
       The comment needs to be updated too?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow-rs] viirya commented on a change in pull request #1161: Simplify and reduce code duplication in arithmetic kernels

Reply via email to