This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 4f52a2523 Add divide_opt kernel which produce null values on division
by zero error (#2710)
4f52a2523 is described below
commit 4f52a252374da49d7346aeb2e1b996133f8cf6b2
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Tue Sep 13 14:23:52 2022 -0700
Add divide_opt kernel which produce null values on division by zero error
(#2710)
* Add divide_opt kernel
* Add fast-path for non-null arrays
* Add doc
---
arrow/src/compute/kernels/arithmetic.rs | 51 ++++++++++++++++++++++++++++--
arrow/src/compute/kernels/arity.rs | 55 ++++++++++++++++++++++++++++++++-
2 files changed, 102 insertions(+), 4 deletions(-)
diff --git a/arrow/src/compute/kernels/arithmetic.rs
b/arrow/src/compute/kernels/arithmetic.rs
index 6638ae1e8..a344407e4 100644
--- a/arrow/src/compute/kernels/arithmetic.rs
+++ b/arrow/src/compute/kernels/arithmetic.rs
@@ -32,7 +32,7 @@ use crate::buffer::Buffer;
use crate::buffer::MutableBuffer;
use crate::compute::kernels::arity::unary;
use crate::compute::util::combine_option_bitmap;
-use crate::compute::{binary, try_binary, try_unary, unary_dyn};
+use crate::compute::{binary, binary_opt, try_binary, try_unary, unary_dyn};
use crate::datatypes::{
native_op::ArrowNativeTypeOp, ArrowNumericType, DataType, Date32Type,
Date64Type,
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
IntervalYearMonthType,
@@ -711,7 +711,7 @@ where
}
/// Perform `left + right` operation on two arrays. If either left or right
value is null
-/// then the result is also null. Once
+/// then the result is also null.
///
/// This detects overflow and returns an `Err` for that. For an
non-overflow-checking variant,
/// use `add` instead.
@@ -1118,6 +1118,32 @@ where
return math_checked_divide_op(left, right, |a, b| a.div_checked(b));
}
+/// Perform `left / right` operation on two arrays. If either left or right
value is null
+/// then the result is also null.
+///
+/// If any right hand value is zero, the operation value will be replaced with
null in the
+/// result.
+///
+/// Unlike `divide` or `divide_checked`, division by zero will get a null
value instead
+/// returning an `Err`, this also doesn't check overflowing, overflowing will
just wrap
+/// the result around.
+pub fn divide_opt<T>(
+ left: &PrimitiveArray<T>,
+ right: &PrimitiveArray<T>,
+) -> Result<PrimitiveArray<T>>
+where
+ T: ArrowNumericType,
+ T::Native: ArrowNativeTypeOp + Zero + One,
+{
+ Ok(binary_opt(left, right, |a, b| {
+ if b.is_zero() {
+ None
+ } else {
+ Some(a.div_wrapping(b))
+ }
+ }))
+}
+
/// Perform `left / right` operation on two arrays. If either left or right
value is null
/// then the result is also null. If any right hand value is zero then the
result of this
/// operation will be `Err(ArrowError::DivideByZero)`.
@@ -1152,7 +1178,7 @@ pub fn divide<T>(
right: &PrimitiveArray<T>,
) -> Result<PrimitiveArray<T>>
where
- T: datatypes::ArrowNumericType,
+ T: ArrowNumericType,
T::Native: ArrowNativeTypeOp,
{
math_op(left, right, |a, b| a.div_wrapping(b))
@@ -2195,4 +2221,23 @@ mod tests {
let overflow = multiply_scalar_checked(&a, i32::MAX);
overflow.expect_err("overflow should be detected");
}
+
+ #[test]
+ fn test_primitive_div_opt_overflow_division_by_zero() {
+ let a = Int32Array::from(vec![i32::MIN]);
+ let b = Int32Array::from(vec![-1]);
+
+ let wrapped = divide(&a, &b);
+ let expected = Int32Array::from(vec![-2147483648]);
+ assert_eq!(expected, wrapped.unwrap());
+
+ let overflow = divide_opt(&a, &b);
+ let expected = Int32Array::from(vec![-2147483648]);
+ assert_eq!(expected, overflow.unwrap());
+
+ let b = Int32Array::from(vec![0]);
+ let overflow = divide_opt(&a, &b);
+ let expected = Int32Array::from(vec![None]);
+ assert_eq!(expected, overflow.unwrap());
+ }
}
diff --git a/arrow/src/compute/kernels/arity.rs
b/arrow/src/compute/kernels/arity.rs
index ee3ff5e23..fffa81af8 100644
--- a/arrow/src/compute/kernels/arity.rs
+++ b/arrow/src/compute/kernels/arity.rs
@@ -18,7 +18,7 @@
//! Defines kernels suitable to perform operations to primitive arrays.
use crate::array::{
- Array, ArrayData, ArrayRef, BufferBuilder, DictionaryArray, PrimitiveArray,
+ Array, ArrayData, ArrayIter, ArrayRef, BufferBuilder, DictionaryArray,
PrimitiveArray,
};
use crate::buffer::Buffer;
use crate::compute::util::combine_option_bitmap;
@@ -257,6 +257,59 @@ where
Ok(unsafe { build_primitive_array(len, buffer.finish(), null_count,
null_buffer) })
}
+/// Applies the provided binary operation across `a` and `b`, collecting the
optional results
+/// into a [`PrimitiveArray`]. If any index is null in either `a` or `b`, the
corresponding
+/// index in the result will also be null. The binary operation could return
`None` which
+/// results in a new null in the collected [`PrimitiveArray`].
+///
+/// The function is only evaluated for non-null indices
+///
+/// # Panic
+///
+/// Panics if the arrays have different lengths
+pub(crate) fn binary_opt<A, B, F, O>(
+ a: &PrimitiveArray<A>,
+ b: &PrimitiveArray<B>,
+ op: F,
+) -> PrimitiveArray<O>
+where
+ A: ArrowPrimitiveType,
+ B: ArrowPrimitiveType,
+ O: ArrowPrimitiveType,
+ F: Fn(A::Native, B::Native) -> Option<O::Native>,
+{
+ assert_eq!(a.len(), b.len());
+
+ if a.is_empty() {
+ return PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE));
+ }
+
+ if a.null_count() == 0 && b.null_count() == 0 {
+ a.values()
+ .iter()
+ .zip(b.values().iter())
+ .map(|(a, b)| op(*a, *b))
+ .collect()
+ } else {
+ let iter_a = ArrayIter::new(a);
+ let iter_b = ArrayIter::new(b);
+
+ let values =
+ iter_a
+ .into_iter()
+ .zip(iter_b.into_iter())
+ .map(|(item_a, item_b)| {
+ if let (Some(a), Some(b)) = (item_a, item_b) {
+ op(a, b)
+ } else {
+ None
+ }
+ });
+
+ values.collect()
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;