This is an automated email from the ASF dual-hosted git repository. tustvold pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push: new d49cd21f9 Make consistent behavior on zeros equality on floating point types (#3510) d49cd21f9 is described below commit d49cd21f9c5ac27961041f7a2a9dbf4cea9708de Author: Liang-Chi Hsieh <vii...@gmail.com> AuthorDate: Thu Jan 12 23:50:05 2023 -0800 Make consistent behavior on zeros equality on floating point types (#3510) * Treat positive and negative float zeros as equal * Update doc * Add test * Make build_compare consistent with comparison kernels --- arrow-ord/src/comparison.rs | 67 +++++++++++++++++++++++++++++++++++++++++++++ arrow-ord/src/ord.rs | 36 +++++------------------- 2 files changed, 74 insertions(+), 29 deletions(-) diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs index 80c8b6b1c..4754aeb1f 100644 --- a/arrow-ord/src/comparison.rs +++ b/arrow-ord/src/comparison.rs @@ -628,6 +628,8 @@ macro_rules! dyn_compare_utf8_scalar { /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError> where @@ -647,6 +649,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn lt_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError> where @@ -666,6 +670,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn lt_eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError> where @@ -685,6 +691,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn gt_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError> where @@ -704,6 +712,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn gt_eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError> where @@ -723,6 +733,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn neq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray, ArrowError> where @@ -2098,6 +2110,8 @@ where /// /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. /// /// # Example @@ -2141,6 +2155,8 @@ pub fn eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arrow /// /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. /// /// # Example @@ -2186,6 +2202,8 @@ pub fn neq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arro /// /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. /// /// # Example @@ -2231,6 +2249,8 @@ pub fn lt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arrow /// /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. /// /// # Example @@ -2278,6 +2298,8 @@ pub fn lt_eq_dyn( /// /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. /// /// # Example @@ -2322,6 +2344,8 @@ pub fn gt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arrow /// /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. /// /// # Example @@ -2366,6 +2390,8 @@ pub fn gt_eq_dyn( /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn eq<T>( left: &PrimitiveArray<T>, @@ -2386,6 +2412,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn eq_scalar<T>( left: &PrimitiveArray<T>, @@ -2418,6 +2446,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn neq<T>( left: &PrimitiveArray<T>, @@ -2438,6 +2468,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn neq_scalar<T>( left: &PrimitiveArray<T>, @@ -2459,6 +2491,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn lt<T>( left: &PrimitiveArray<T>, @@ -2480,6 +2514,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn lt_scalar<T>( left: &PrimitiveArray<T>, @@ -2501,6 +2537,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn lt_eq<T>( left: &PrimitiveArray<T>, @@ -2522,6 +2560,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn lt_eq_scalar<T>( left: &PrimitiveArray<T>, @@ -2543,6 +2583,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn gt<T>( left: &PrimitiveArray<T>, @@ -2564,6 +2606,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn gt_scalar<T>( left: &PrimitiveArray<T>, @@ -2585,6 +2629,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn gt_eq<T>( left: &PrimitiveArray<T>, @@ -2606,6 +2652,8 @@ where /// If `simd` feature flag is not enabled: /// For floating values like f32 and f64, this comparison produces an ordering in accordance to /// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard. +/// Note that totalOrder treats positive and negative zeros are different. If it is necessary +/// to treat them as equal, please normalize zeros before calling this kernel. /// Please refer to `f32::total_cmp` and `f64::total_cmp`. pub fn gt_eq_scalar<T>( left: &PrimitiveArray<T>, @@ -5828,6 +5876,25 @@ mod tests { assert_eq!(e, r); } + #[test] + #[cfg(not(feature = "simd"))] + fn test_floating_zeros() { + let a = Float32Array::from(vec![0.0_f32, -0.0]); + let b = Float32Array::from(vec![-0.0_f32, 0.0]); + + let result = eq_dyn(&a, &b).unwrap(); + let excepted = BooleanArray::from(vec![false, false]); + assert_eq!(excepted, result); + + let result = eq_dyn_scalar(&a, 0.0).unwrap(); + let excepted = BooleanArray::from(vec![true, false]); + assert_eq!(excepted, result); + + let result = eq_dyn_scalar(&a, -0.0).unwrap(); + let excepted = BooleanArray::from(vec![false, true]); + assert_eq!(excepted, result); + } + #[derive(Debug)] struct ToType {} diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs index b7737c6de..00b6668ad 100644 --- a/arrow-ord/src/ord.rs +++ b/arrow-ord/src/ord.rs @@ -21,32 +21,21 @@ use arrow_array::types::*; use arrow_array::*; use arrow_buffer::ArrowNativeType; use arrow_schema::{ArrowError, DataType}; -use num::Float; use std::cmp::Ordering; /// Compare the values at two arbitrary indices in two arrays. pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>; -/// compares two floats, placing NaNs at last -fn cmp_nans_last<T: Float>(a: &T, b: &T) -> Ordering { - match (a.is_nan(), b.is_nan()) { - (true, true) => Ordering::Equal, - (true, false) => Ordering::Greater, - (false, true) => Ordering::Less, - _ => a.partial_cmp(b).unwrap(), - } -} - fn compare_primitives<T: ArrowPrimitiveType>( left: &dyn Array, right: &dyn Array, ) -> DynComparator where - T::Native: Ord, + T::Native: ArrowNativeTypeOp, { let left: PrimitiveArray<T> = PrimitiveArray::from(left.data().clone()); let right: PrimitiveArray<T> = PrimitiveArray::from(right.data().clone()); - Box::new(move |i, j| left.value(i).cmp(&right.value(j))) + Box::new(move |i, j| left.value(i).compare(right.value(j))) } fn compare_boolean(left: &dyn Array, right: &dyn Array) -> DynComparator { @@ -56,18 +45,6 @@ fn compare_boolean(left: &dyn Array, right: &dyn Array) -> DynComparator { Box::new(move |i, j| left.value(i).cmp(&right.value(j))) } -fn compare_float<T: ArrowPrimitiveType>( - left: &dyn Array, - right: &dyn Array, -) -> DynComparator -where - T::Native: Float, -{ - let left: PrimitiveArray<T> = PrimitiveArray::from(left.data().clone()); - let right: PrimitiveArray<T> = PrimitiveArray::from(right.data().clone()); - Box::new(move |i, j| cmp_nans_last(&left.value(i), &right.value(j))) -} - fn compare_string<T>(left: &dyn Array, right: &dyn Array) -> DynComparator where T: OffsetSizeTrait, @@ -197,8 +174,8 @@ pub fn build_compare( (Int16, Int16) => compare_primitives::<Int16Type>(left, right), (Int32, Int32) => compare_primitives::<Int32Type>(left, right), (Int64, Int64) => compare_primitives::<Int64Type>(left, right), - (Float32, Float32) => compare_float::<Float32Type>(left, right), - (Float64, Float64) => compare_float::<Float64Type>(left, right), + (Float32, Float32) => compare_primitives::<Float32Type>(left, right), + (Float64, Float64) => compare_primitives::<Float64Type>(left, right), (Decimal128(_, _), Decimal128(_, _)) => { compare_primitives::<Decimal128Type>(left, right) } @@ -372,6 +349,7 @@ pub mod tests { let cmp = build_compare(&array, &array).unwrap(); assert_eq!(Ordering::Less, (cmp)(0, 1)); + assert_eq!(Ordering::Equal, (cmp)(1, 1)); } #[test] @@ -380,8 +358,8 @@ pub mod tests { let cmp = build_compare(&array, &array).unwrap(); - assert_eq!(Ordering::Equal, (cmp)(0, 1)); - assert_eq!(Ordering::Equal, (cmp)(1, 0)); + assert_eq!(Ordering::Less, (cmp)(0, 1)); + assert_eq!(Ordering::Greater, (cmp)(1, 0)); } #[test]