diegoQuinas commented on code in PR #21710:
URL: https://github.com/apache/datafusion/pull/21710#discussion_r3197862307
##########
datafusion/spark/src/function/math/ceil.rs:
##########
@@ -168,137 +336,18 @@ fn spark_ceil_array(input: &Arc<dyn
arrow::array::Array>) -> Result<ColumnarValu
Ok(ColumnarValue::Array(result))
}
-#[cfg(test)]
-mod tests {
- use super::*;
- use arrow::array::{Decimal128Array, Float32Array, Float64Array,
Int64Array};
- use datafusion_common::ScalarValue;
-
- #[test]
- fn test_ceil_float64() {
- let input = Float64Array::from(vec![
- Some(125.2345),
- Some(15.0001),
- Some(0.1),
- Some(-0.9),
- Some(-1.1),
- Some(123.0),
- None,
- ]);
- let args = vec![ColumnarValue::Array(Arc::new(input))];
- let result = spark_ceil(&args).unwrap();
- let result = match result {
- ColumnarValue::Array(arr) => arr,
- _ => panic!("Expected array"),
- };
- let result = result.as_primitive::<Int64Type>();
- assert_eq!(
- result,
- &Int64Array::from(vec![
- Some(126),
- Some(16),
- Some(1),
- Some(0),
- Some(-1),
- Some(123),
- None,
- ])
- );
- }
-
- #[test]
- fn test_ceil_float32() {
- let input = Float32Array::from(vec![
- Some(125.2345f32),
- Some(15.0001f32),
- Some(0.1f32),
- Some(-0.9f32),
- Some(-1.1f32),
- Some(123.0f32),
- None,
- ]);
- let args = vec![ColumnarValue::Array(Arc::new(input))];
- let result = spark_ceil(&args).unwrap();
- let result = match result {
- ColumnarValue::Array(arr) => arr,
- _ => panic!("Expected array"),
- };
- let result = result.as_primitive::<Int64Type>();
- assert_eq!(
- result,
- &Int64Array::from(vec![
- Some(126),
- Some(16),
- Some(1),
- Some(0),
- Some(-1),
- Some(123),
- None,
- ])
- );
- }
-
- #[test]
- fn test_ceil_int64() {
- let input = Int64Array::from(vec![Some(1), Some(-1), None]);
- let args = vec![ColumnarValue::Array(Arc::new(input))];
- let result = spark_ceil(&args).unwrap();
- let result = match result {
- ColumnarValue::Array(arr) => arr,
- _ => panic!("Expected array"),
- };
- let result = result.as_primitive::<Int64Type>();
- assert_eq!(result, &Int64Array::from(vec![Some(1), Some(-1), None]));
- }
-
- #[test]
- fn test_ceil_decimal128() {
- // Decimal128(10, 2): 150 = 1.50, -150 = -1.50, 100 = 1.00
- let return_type = DataType::Decimal128(9, 0);
- let input = Decimal128Array::from(vec![Some(150), Some(-150),
Some(100), None])
- .with_data_type(DataType::Decimal128(10, 2));
- let args = vec![ColumnarValue::Array(Arc::new(input))];
- let result = spark_ceil(&args).unwrap();
- let result = match result {
- ColumnarValue::Array(arr) => arr,
- _ => panic!("Expected array"),
- };
- let result = result.as_primitive::<Decimal128Type>();
- let expected = Decimal128Array::from(vec![Some(2), Some(-1), Some(1),
None])
- .with_data_type(return_type);
- assert_eq!(result, &expected);
- }
-
- #[test]
- fn test_ceil_float64_scalar() {
- let input = ScalarValue::Float64(Some(-1.1));
- let args = vec![ColumnarValue::Scalar(input)];
- let result = match spark_ceil(&args).unwrap() {
- ColumnarValue::Scalar(v) => v,
- _ => panic!("Expected scalar"),
- };
- assert_eq!(result, ScalarValue::Int64(Some(-1)));
- }
-
- #[test]
- fn test_ceil_float32_scalar() {
- let input = ScalarValue::Float32(Some(125.2345f32));
- let args = vec![ColumnarValue::Scalar(input)];
- let result = match spark_ceil(&args).unwrap() {
- ColumnarValue::Scalar(v) => v,
- _ => panic!("Expected scalar"),
- };
- assert_eq!(result, ScalarValue::Int64(Some(126)));
- }
-
- #[test]
- fn test_ceil_int64_scalar() {
- let input = ScalarValue::Int64(Some(48));
- let args = vec![ColumnarValue::Scalar(input)];
- let result = match spark_ceil(&args).unwrap() {
- ColumnarValue::Scalar(v) => v,
- _ => panic!("Expected scalar"),
- };
- assert_eq!(result, ScalarValue::Int64(Some(48)));
+fn ceil_float<T: num_traits::Float>(value: T, scale: i32) -> T {
+ if scale >= 0 {
+ let factor = T::from(10.0f64.powi(scale)).unwrap_or_else(T::infinity);
+ if factor.is_infinite() {
+ return value;
Review Comment:
Yes — for the case this branch covers, it matches Spark.
Spark's `RoundCeil` (`mathExpressions.scala`, `RoundBase.nullSafeEval`) for
`FloatType`/`DoubleType` does:
```scala
if (d.isNaN || d.isInfinite) d
else BigDecimal(d).setScale(_scale, mode).toDouble
```
Because `BigDecimal` is arbitrary precision, `setScale(largeScale,
CEILING).toDouble` round-trips any finite double unchanged. So when our
`factor` overflows (scale ≥ 309 for f64, ≥ 39 for f32), returning `value` is
correct for finite, NaN, and ±∞ inputs.
One adjacent case I noticed while checking this: when `factor` is finite but
`value * factor` overflows (e.g. f64 `ceil(2.5, 308)`), we currently return
`±Inf` while Spark returns `2.5`. Worth a follow-up guard? Happy to add it + a
test in this PR or a separate one — let me know your preference.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]