This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 4f52a2523 Add divide_opt kernel which produce null values on division 
by zero error (#2710)
4f52a2523 is described below

commit 4f52a252374da49d7346aeb2e1b996133f8cf6b2
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Tue Sep 13 14:23:52 2022 -0700

    Add divide_opt kernel which produce null values on division by zero error 
(#2710)
    
    * Add divide_opt kernel
    
    * Add fast-path for non-null arrays
    
    * Add doc
---
 arrow/src/compute/kernels/arithmetic.rs | 51 ++++++++++++++++++++++++++++--
 arrow/src/compute/kernels/arity.rs      | 55 ++++++++++++++++++++++++++++++++-
 2 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/arrow/src/compute/kernels/arithmetic.rs 
b/arrow/src/compute/kernels/arithmetic.rs
index 6638ae1e8..a344407e4 100644
--- a/arrow/src/compute/kernels/arithmetic.rs
+++ b/arrow/src/compute/kernels/arithmetic.rs
@@ -32,7 +32,7 @@ use crate::buffer::Buffer;
 use crate::buffer::MutableBuffer;
 use crate::compute::kernels::arity::unary;
 use crate::compute::util::combine_option_bitmap;
-use crate::compute::{binary, try_binary, try_unary, unary_dyn};
+use crate::compute::{binary, binary_opt, try_binary, try_unary, unary_dyn};
 use crate::datatypes::{
     native_op::ArrowNativeTypeOp, ArrowNumericType, DataType, Date32Type, 
Date64Type,
     IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, 
IntervalYearMonthType,
@@ -711,7 +711,7 @@ where
 }
 
 /// Perform `left + right` operation on two arrays. If either left or right 
value is null
-/// then the result is also null. Once
+/// then the result is also null.
 ///
 /// This detects overflow and returns an `Err` for that. For an 
non-overflow-checking variant,
 /// use `add` instead.
@@ -1118,6 +1118,32 @@ where
     return math_checked_divide_op(left, right, |a, b| a.div_checked(b));
 }
 
+/// Perform `left / right` operation on two arrays. If either left or right 
value is null
+/// then the result is also null.
+///
+/// If any right hand value is zero, the operation value will be replaced with 
null in the
+/// result.
+///
+/// Unlike `divide` or `divide_checked`, division by zero will get a null 
value instead
+/// returning an `Err`, this also doesn't check overflowing, overflowing will 
just wrap
+/// the result around.
+pub fn divide_opt<T>(
+    left: &PrimitiveArray<T>,
+    right: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<T>>
+where
+    T: ArrowNumericType,
+    T::Native: ArrowNativeTypeOp + Zero + One,
+{
+    Ok(binary_opt(left, right, |a, b| {
+        if b.is_zero() {
+            None
+        } else {
+            Some(a.div_wrapping(b))
+        }
+    }))
+}
+
 /// Perform `left / right` operation on two arrays. If either left or right 
value is null
 /// then the result is also null. If any right hand value is zero then the 
result of this
 /// operation will be `Err(ArrowError::DivideByZero)`.
@@ -1152,7 +1178,7 @@ pub fn divide<T>(
     right: &PrimitiveArray<T>,
 ) -> Result<PrimitiveArray<T>>
 where
-    T: datatypes::ArrowNumericType,
+    T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
 {
     math_op(left, right, |a, b| a.div_wrapping(b))
@@ -2195,4 +2221,23 @@ mod tests {
         let overflow = multiply_scalar_checked(&a, i32::MAX);
         overflow.expect_err("overflow should be detected");
     }
+
+    #[test]
+    fn test_primitive_div_opt_overflow_division_by_zero() {
+        let a = Int32Array::from(vec![i32::MIN]);
+        let b = Int32Array::from(vec![-1]);
+
+        let wrapped = divide(&a, &b);
+        let expected = Int32Array::from(vec![-2147483648]);
+        assert_eq!(expected, wrapped.unwrap());
+
+        let overflow = divide_opt(&a, &b);
+        let expected = Int32Array::from(vec![-2147483648]);
+        assert_eq!(expected, overflow.unwrap());
+
+        let b = Int32Array::from(vec![0]);
+        let overflow = divide_opt(&a, &b);
+        let expected = Int32Array::from(vec![None]);
+        assert_eq!(expected, overflow.unwrap());
+    }
 }
diff --git a/arrow/src/compute/kernels/arity.rs 
b/arrow/src/compute/kernels/arity.rs
index ee3ff5e23..fffa81af8 100644
--- a/arrow/src/compute/kernels/arity.rs
+++ b/arrow/src/compute/kernels/arity.rs
@@ -18,7 +18,7 @@
 //! Defines kernels suitable to perform operations to primitive arrays.
 
 use crate::array::{
-    Array, ArrayData, ArrayRef, BufferBuilder, DictionaryArray, PrimitiveArray,
+    Array, ArrayData, ArrayIter, ArrayRef, BufferBuilder, DictionaryArray, 
PrimitiveArray,
 };
 use crate::buffer::Buffer;
 use crate::compute::util::combine_option_bitmap;
@@ -257,6 +257,59 @@ where
     Ok(unsafe { build_primitive_array(len, buffer.finish(), null_count, 
null_buffer) })
 }
 
+/// Applies the provided binary operation across `a` and `b`, collecting the 
optional results
+/// into a [`PrimitiveArray`]. If any index is null in either `a` or `b`, the 
corresponding
+/// index in the result will also be null. The binary operation could return 
`None` which
+/// results in a new null in the collected [`PrimitiveArray`].
+///
+/// The function is only evaluated for non-null indices
+///
+/// # Panic
+///
+/// Panics if the arrays have different lengths
+pub(crate) fn binary_opt<A, B, F, O>(
+    a: &PrimitiveArray<A>,
+    b: &PrimitiveArray<B>,
+    op: F,
+) -> PrimitiveArray<O>
+where
+    A: ArrowPrimitiveType,
+    B: ArrowPrimitiveType,
+    O: ArrowPrimitiveType,
+    F: Fn(A::Native, B::Native) -> Option<O::Native>,
+{
+    assert_eq!(a.len(), b.len());
+
+    if a.is_empty() {
+        return PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE));
+    }
+
+    if a.null_count() == 0 && b.null_count() == 0 {
+        a.values()
+            .iter()
+            .zip(b.values().iter())
+            .map(|(a, b)| op(*a, *b))
+            .collect()
+    } else {
+        let iter_a = ArrayIter::new(a);
+        let iter_b = ArrayIter::new(b);
+
+        let values =
+            iter_a
+                .into_iter()
+                .zip(iter_b.into_iter())
+                .map(|(item_a, item_b)| {
+                    if let (Some(a), Some(b)) = (item_a, item_b) {
+                        op(a, b)
+                    } else {
+                        None
+                    }
+                });
+
+        values.collect()
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

Reply via email to