This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new d90faefe64 Fix broken decimal->decimal casting with large scale 
reduction (#8580)
d90faefe64 is described below

commit d90faefe6418a1c8c9798bbefce2855666c33031
Author: Ryan Johnson <[email protected]>
AuthorDate: Fri Oct 10 04:35:19 2025 -0600

    Fix broken decimal->decimal casting with large scale reduction (#8580)
    
    # Which issue does this PR close?
    
    - Closes https://github.com/apache/arrow-rs/issues/8579
    
    # Rationale for this change
    
    Bug fix
    
    # What changes are included in this PR?
    
    Detect and directly handle large scale reductions, instead of failing on
    accident because the computed divisor overflows.
    
    Also, replace the `pow_checked` call with a lookup into the (already
    existing) `MAX_DECIMALXX_FOR_EACH_PRECISION` array. This requires adding
    a new `MAX_FOR_EACH_PRECISION` constant to the `DecimalType` trait, but
    the corresponding arrays were already public so this seems ok?
    
    # Are these changes tested?
    
    New unit tests exercise the scenario (and its boundary case). The tests
    fail without this fix.
    
    # Are there any user-facing changes?
    
    New constant on the public `DecimalType` trait.
    
    A class of decimal conversions that used to fail will now (correctly)
    produce zeros instead.
---
 arrow-array/src/arithmetic.rs  |  2 +-
 arrow-array/src/types.rs       |  6 +++++
 arrow-cast/src/cast/decimal.rs | 16 +++++++++---
 arrow-cast/src/cast/mod.rs     | 57 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/arrow-array/src/arithmetic.rs b/arrow-array/src/arithmetic.rs
index 73fc88e62c..52708da781 100644
--- a/arrow-array/src/arithmetic.rs
+++ b/arrow-array/src/arithmetic.rs
@@ -288,7 +288,7 @@ native_type_op!(u8);
 native_type_op!(u16);
 native_type_op!(u32);
 native_type_op!(u64);
-native_type_op!(i256, i256::ZERO, i256::ONE, i256::MIN, i256::MAX);
+native_type_op!(i256, i256::ZERO, i256::ONE);
 
 native_type_op!(IntervalDayTime, IntervalDayTime::ZERO, IntervalDayTime::ONE);
 native_type_op!(
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 1517a439ae..fda19242ee 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -1323,6 +1323,8 @@ pub trait DecimalType:
     const MAX_PRECISION: u8;
     /// Maximum no of digits after the decimal point (note the scale can be 
negative)
     const MAX_SCALE: i8;
+    /// The maximum value for each precision in `0..=MAX_PRECISION`: [0, 9, 
99, ...]
+    const MAX_FOR_EACH_PRECISION: &[Self::Native];
     /// fn to create its [`DataType`]
     const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType;
     /// Default values for [`DataType`]
@@ -1393,6 +1395,7 @@ impl DecimalType for Decimal32Type {
     const BYTE_LENGTH: usize = 4;
     const MAX_PRECISION: u8 = DECIMAL32_MAX_PRECISION;
     const MAX_SCALE: i8 = DECIMAL32_MAX_SCALE;
+    const MAX_FOR_EACH_PRECISION: &[i32] = 
&arrow_data::decimal::MAX_DECIMAL32_FOR_EACH_PRECISION;
     const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal32;
     const DEFAULT_TYPE: DataType =
         DataType::Decimal32(DECIMAL32_MAX_PRECISION, DECIMAL32_DEFAULT_SCALE);
@@ -1427,6 +1430,7 @@ impl DecimalType for Decimal64Type {
     const BYTE_LENGTH: usize = 8;
     const MAX_PRECISION: u8 = DECIMAL64_MAX_PRECISION;
     const MAX_SCALE: i8 = DECIMAL64_MAX_SCALE;
+    const MAX_FOR_EACH_PRECISION: &[i64] = 
&arrow_data::decimal::MAX_DECIMAL64_FOR_EACH_PRECISION;
     const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal64;
     const DEFAULT_TYPE: DataType =
         DataType::Decimal64(DECIMAL64_MAX_PRECISION, DECIMAL64_DEFAULT_SCALE);
@@ -1461,6 +1465,7 @@ impl DecimalType for Decimal128Type {
     const BYTE_LENGTH: usize = 16;
     const MAX_PRECISION: u8 = DECIMAL128_MAX_PRECISION;
     const MAX_SCALE: i8 = DECIMAL128_MAX_SCALE;
+    const MAX_FOR_EACH_PRECISION: &[i128] = 
&arrow_data::decimal::MAX_DECIMAL128_FOR_EACH_PRECISION;
     const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal128;
     const DEFAULT_TYPE: DataType =
         DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
@@ -1495,6 +1500,7 @@ impl DecimalType for Decimal256Type {
     const BYTE_LENGTH: usize = 32;
     const MAX_PRECISION: u8 = DECIMAL256_MAX_PRECISION;
     const MAX_SCALE: i8 = DECIMAL256_MAX_SCALE;
+    const MAX_FOR_EACH_PRECISION: &[i256] = 
&arrow_data::decimal::MAX_DECIMAL256_FOR_EACH_PRECISION;
     const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal256;
     const DEFAULT_TYPE: DataType =
         DataType::Decimal256(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index f7235d17f3..1fcae9f66a 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -188,11 +188,19 @@ where
     // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be 
possible
     let is_infallible_cast = (input_precision as i8) - delta_scale < 
(output_precision as i8);
 
-    let div = I::Native::from_decimal(10_i128)
-        .unwrap()
-        .pow_checked(delta_scale as u32)?;
+    // delta_scale is guaranteed to be > 0, but may also be larger than 
I::MAX_PRECISION. If so, the
+    // scale change divides out more digits than the input has precision and 
the result of the cast
+    // is always zero. For example, if we try to apply delta_scale=10 a 
decimal32 value, the largest
+    // possible result is 999999999/10000000000 = 0.0999999999, which rounds 
to zero. Smaller values
+    // (e.g. 1/10000000000) or larger delta_scale (e.g. 
999999999/10000000000000) produce even
+    // smaller results, which also round to zero. In that case, just return an 
array of zeros.
+    let Some(max) = I::MAX_FOR_EACH_PRECISION.get(delta_scale as usize) else {
+        let zeros = vec![O::Native::ZERO; array.len()];
+        return Ok(PrimitiveArray::new(zeros.into(), array.nulls().cloned()));
+    };
 
-    let half = div.div_wrapping(I::Native::from_usize(2).unwrap());
+    let div = max.add_wrapping(I::Native::ONE);
+    let half = div.div_wrapping(I::Native::ONE.add_wrapping(I::Native::ONE));
     let half_neg = half.neg_wrapping();
 
     let f = |x: I::Native| {
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index f89b7eab7f..aa9d4b021f 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -3084,6 +3084,32 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal32_large_scale_reduction() {
+        let array = vec![Some(-999999999), Some(0), Some(999999999), None];
+        let array = create_decimal32_array(array, 9, 3).unwrap();
+
+        // Divide out all digits of precision -- rounding could still produce 
+/- 1
+        let output_type = DataType::Decimal32(9, -6);
+        assert!(can_cast_types(array.data_type(), &output_type));
+        generate_cast_test_case!(
+            &array,
+            Decimal32Array,
+            &output_type,
+            vec![Some(-1), Some(0), Some(1), None]
+        );
+
+        // Divide out more digits than we have precision -- all-zero result
+        let output_type = DataType::Decimal32(9, -7);
+        assert!(can_cast_types(array.data_type(), &output_type));
+        generate_cast_test_case!(
+            &array,
+            Decimal32Array,
+            &output_type,
+            vec![Some(0), Some(0), Some(0), None]
+        );
+    }
+
     #[test]
     fn test_cast_decimal64_to_decimal64_overflow() {
         let input_type = DataType::Decimal64(18, 3);
@@ -3106,6 +3132,37 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_cast_decimal64_to_decimal64_large_scale_reduction() {
+        let array = vec![
+            Some(-999999999999999999),
+            Some(0),
+            Some(999999999999999999),
+            None,
+        ];
+        let array = create_decimal64_array(array, 18, 3).unwrap();
+
+        // Divide out all digits of precision -- rounding could still produce 
+/- 1
+        let output_type = DataType::Decimal64(18, -15);
+        assert!(can_cast_types(array.data_type(), &output_type));
+        generate_cast_test_case!(
+            &array,
+            Decimal64Array,
+            &output_type,
+            vec![Some(-1), Some(0), Some(1), None]
+        );
+
+        // Divide out more digits than we have precision -- all-zero result
+        let output_type = DataType::Decimal64(18, -16);
+        assert!(can_cast_types(array.data_type(), &output_type));
+        generate_cast_test_case!(
+            &array,
+            Decimal64Array,
+            &output_type,
+            vec![Some(0), Some(0), Some(0), None]
+        );
+    }
+
     #[test]
     fn test_cast_floating_to_decimals() {
         for output_type in [

Reply via email to