alamb commented on code in PR #8158: URL: https://github.com/apache/arrow-rs/pull/8158#discussion_r2282550601
########## parquet/src/column/writer/mod.rs: ########## @@ -2906,6 +2922,108 @@ mod tests { } } + #[test] + fn test_float_statistics_infinity_with_nan() { + // Test column with Infinity and NaN values + let stats = + statistics_roundtrip::<FloatType>(&[1.0, f32::INFINITY, f32::NAN, 2.0, f32::NAN]); + assert!(stats.is_min_max_backwards_compatible()); + if let Statistics::Float(stats) = &stats { + assert_eq!(stats.min_opt().unwrap(), &1.0); + assert_eq!(stats.max_opt().unwrap(), &f32::INFINITY); + } else { + panic!("expecting Statistics::Float"); + } + assert_eq!(stats.nan_count_opt(), Some(2)); + } + + #[test] + fn test_float_statistics_neg_infinity_with_nan() { + // Test column with -Infinity and NaN values + let stats = statistics_roundtrip::<FloatType>(&[ + f32::NEG_INFINITY, + -1.0, + f32::NAN, + 0.0, + f32::NAN, + 1.0, + ]); + assert!(stats.is_min_max_backwards_compatible()); + if let Statistics::Float(stats) = &stats { + assert_eq!(stats.min_opt().unwrap(), &f32::NEG_INFINITY); + assert_eq!(stats.max_opt().unwrap(), &1.0); + } else { + panic!("expecting Statistics::Float"); + } + assert_eq!(stats.nan_count_opt(), Some(2)); + } + + #[test] + fn test_float_statistics_both_infinities_with_nan() { + // Test column with both +Infinity, -Infinity and NaN values + let stats = statistics_roundtrip::<FloatType>(&[ + f32::NEG_INFINITY, + f32::NAN, + 0.0, + f32::INFINITY, + f32::NAN, + ]); + assert!(stats.is_min_max_backwards_compatible()); + if let Statistics::Float(stats) = &stats { + assert_eq!(stats.min_opt().unwrap(), &f32::NEG_INFINITY); + assert_eq!(stats.max_opt().unwrap(), &f32::INFINITY); + } else { + panic!("expecting Statistics::Float"); + } + assert_eq!(stats.nan_count_opt(), Some(2)); + } + + #[test] + fn test_double_statistics_infinity_with_nan() { + // Test with f64 (double) type + let stats = statistics_roundtrip::<DoubleType>(&[ + 1.0, + f64::INFINITY, + f64::NAN, + f64::NEG_INFINITY, + f64::NAN, + 2.0, + ]); + assert!(stats.is_min_max_backwards_compatible()); + if let Statistics::Double(stats) = &stats { + assert_eq!(stats.min_opt().unwrap(), &f64::NEG_INFINITY); + assert_eq!(stats.max_opt().unwrap(), &f64::INFINITY); + } else { + panic!("expecting Statistics::Double"); + } + + assert_eq!(stats.nan_count_opt(), Some(2)); + } + + #[test] + fn test_float16_statistics_infinity_with_nan() { + // Test Float16 with Infinity and NaN + let input = [ + f16::ONE, + f16::INFINITY, + f16::NAN, + f16::NEG_INFINITY, + f16::NAN, + ] + .into_iter() + .map(|s| ByteArray::from(s).into()) + .collect::<Vec<_>>(); + + let stats = float16_statistics_roundtrip(&input); + assert!(stats.is_min_max_backwards_compatible()); + assert_eq!( + stats.min_opt().unwrap(), + &ByteArray::from(f16::NEG_INFINITY) + ); + assert_eq!(stats.max_opt().unwrap(), &ByteArray::from(f16::INFINITY)); + assert_eq!(stats.nan_count_opt(), Some(2)); + } Review Comment: I think the spec also talks about the case for all nans, so that would also be a good case to check -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org