This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new d90faefe64 Fix broken decimal->decimal casting with large scale
reduction (#8580)
d90faefe64 is described below
commit d90faefe6418a1c8c9798bbefce2855666c33031
Author: Ryan Johnson <[email protected]>
AuthorDate: Fri Oct 10 04:35:19 2025 -0600
Fix broken decimal->decimal casting with large scale reduction (#8580)
# Which issue does this PR close?
- Closes https://github.com/apache/arrow-rs/issues/8579
# Rationale for this change
Bug fix
# What changes are included in this PR?
Detect and directly handle large scale reductions, instead of failing on
accident because the computed divisor overflows.
Also, replace the `pow_checked` call with a lookup into the (already
existing) `MAX_DECIMALXX_FOR_EACH_PRECISION` array. This requires adding
a new `MAX_FOR_EACH_PRECISION` constant to the `DecimalType` trait, but
the corresponding arrays were already public so this seems ok?
# Are these changes tested?
New unit tests exercise the scenario (and its boundary case). The tests
fail without this fix.
# Are there any user-facing changes?
New constant on the public `DecimalType` trait.
A class of decimal conversions that used to fail will now (correctly)
produce zeros instead.
---
arrow-array/src/arithmetic.rs | 2 +-
arrow-array/src/types.rs | 6 +++++
arrow-cast/src/cast/decimal.rs | 16 +++++++++---
arrow-cast/src/cast/mod.rs | 57 ++++++++++++++++++++++++++++++++++++++++++
4 files changed, 76 insertions(+), 5 deletions(-)
diff --git a/arrow-array/src/arithmetic.rs b/arrow-array/src/arithmetic.rs
index 73fc88e62c..52708da781 100644
--- a/arrow-array/src/arithmetic.rs
+++ b/arrow-array/src/arithmetic.rs
@@ -288,7 +288,7 @@ native_type_op!(u8);
native_type_op!(u16);
native_type_op!(u32);
native_type_op!(u64);
-native_type_op!(i256, i256::ZERO, i256::ONE, i256::MIN, i256::MAX);
+native_type_op!(i256, i256::ZERO, i256::ONE);
native_type_op!(IntervalDayTime, IntervalDayTime::ZERO, IntervalDayTime::ONE);
native_type_op!(
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 1517a439ae..fda19242ee 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -1323,6 +1323,8 @@ pub trait DecimalType:
const MAX_PRECISION: u8;
/// Maximum no of digits after the decimal point (note the scale can be
negative)
const MAX_SCALE: i8;
+ /// The maximum value for each precision in `0..=MAX_PRECISION`: [0, 9,
99, ...]
+ const MAX_FOR_EACH_PRECISION: &[Self::Native];
/// fn to create its [`DataType`]
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType;
/// Default values for [`DataType`]
@@ -1393,6 +1395,7 @@ impl DecimalType for Decimal32Type {
const BYTE_LENGTH: usize = 4;
const MAX_PRECISION: u8 = DECIMAL32_MAX_PRECISION;
const MAX_SCALE: i8 = DECIMAL32_MAX_SCALE;
+ const MAX_FOR_EACH_PRECISION: &[i32] =
&arrow_data::decimal::MAX_DECIMAL32_FOR_EACH_PRECISION;
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal32;
const DEFAULT_TYPE: DataType =
DataType::Decimal32(DECIMAL32_MAX_PRECISION, DECIMAL32_DEFAULT_SCALE);
@@ -1427,6 +1430,7 @@ impl DecimalType for Decimal64Type {
const BYTE_LENGTH: usize = 8;
const MAX_PRECISION: u8 = DECIMAL64_MAX_PRECISION;
const MAX_SCALE: i8 = DECIMAL64_MAX_SCALE;
+ const MAX_FOR_EACH_PRECISION: &[i64] =
&arrow_data::decimal::MAX_DECIMAL64_FOR_EACH_PRECISION;
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal64;
const DEFAULT_TYPE: DataType =
DataType::Decimal64(DECIMAL64_MAX_PRECISION, DECIMAL64_DEFAULT_SCALE);
@@ -1461,6 +1465,7 @@ impl DecimalType for Decimal128Type {
const BYTE_LENGTH: usize = 16;
const MAX_PRECISION: u8 = DECIMAL128_MAX_PRECISION;
const MAX_SCALE: i8 = DECIMAL128_MAX_SCALE;
+ const MAX_FOR_EACH_PRECISION: &[i128] =
&arrow_data::decimal::MAX_DECIMAL128_FOR_EACH_PRECISION;
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal128;
const DEFAULT_TYPE: DataType =
DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
@@ -1495,6 +1500,7 @@ impl DecimalType for Decimal256Type {
const BYTE_LENGTH: usize = 32;
const MAX_PRECISION: u8 = DECIMAL256_MAX_PRECISION;
const MAX_SCALE: i8 = DECIMAL256_MAX_SCALE;
+ const MAX_FOR_EACH_PRECISION: &[i256] =
&arrow_data::decimal::MAX_DECIMAL256_FOR_EACH_PRECISION;
const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal256;
const DEFAULT_TYPE: DataType =
DataType::Decimal256(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index f7235d17f3..1fcae9f66a 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -188,11 +188,19 @@ where
// [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be
possible
let is_infallible_cast = (input_precision as i8) - delta_scale <
(output_precision as i8);
- let div = I::Native::from_decimal(10_i128)
- .unwrap()
- .pow_checked(delta_scale as u32)?;
+ // delta_scale is guaranteed to be > 0, but may also be larger than
I::MAX_PRECISION. If so, the
+ // scale change divides out more digits than the input has precision and
the result of the cast
+ // is always zero. For example, if we try to apply delta_scale=10 a
decimal32 value, the largest
+ // possible result is 999999999/10000000000 = 0.0999999999, which rounds
to zero. Smaller values
+ // (e.g. 1/10000000000) or larger delta_scale (e.g.
999999999/10000000000000) produce even
+ // smaller results, which also round to zero. In that case, just return an
array of zeros.
+ let Some(max) = I::MAX_FOR_EACH_PRECISION.get(delta_scale as usize) else {
+ let zeros = vec![O::Native::ZERO; array.len()];
+ return Ok(PrimitiveArray::new(zeros.into(), array.nulls().cloned()));
+ };
- let half = div.div_wrapping(I::Native::from_usize(2).unwrap());
+ let div = max.add_wrapping(I::Native::ONE);
+ let half = div.div_wrapping(I::Native::ONE.add_wrapping(I::Native::ONE));
let half_neg = half.neg_wrapping();
let f = |x: I::Native| {
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index f89b7eab7f..aa9d4b021f 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -3084,6 +3084,32 @@ mod tests {
);
}
+ #[test]
+ fn test_cast_decimal32_to_decimal32_large_scale_reduction() {
+ let array = vec![Some(-999999999), Some(0), Some(999999999), None];
+ let array = create_decimal32_array(array, 9, 3).unwrap();
+
+ // Divide out all digits of precision -- rounding could still produce
+/- 1
+ let output_type = DataType::Decimal32(9, -6);
+ assert!(can_cast_types(array.data_type(), &output_type));
+ generate_cast_test_case!(
+ &array,
+ Decimal32Array,
+ &output_type,
+ vec![Some(-1), Some(0), Some(1), None]
+ );
+
+ // Divide out more digits than we have precision -- all-zero result
+ let output_type = DataType::Decimal32(9, -7);
+ assert!(can_cast_types(array.data_type(), &output_type));
+ generate_cast_test_case!(
+ &array,
+ Decimal32Array,
+ &output_type,
+ vec![Some(0), Some(0), Some(0), None]
+ );
+ }
+
#[test]
fn test_cast_decimal64_to_decimal64_overflow() {
let input_type = DataType::Decimal64(18, 3);
@@ -3106,6 +3132,37 @@ mod tests {
);
}
+ #[test]
+ fn test_cast_decimal64_to_decimal64_large_scale_reduction() {
+ let array = vec![
+ Some(-999999999999999999),
+ Some(0),
+ Some(999999999999999999),
+ None,
+ ];
+ let array = create_decimal64_array(array, 18, 3).unwrap();
+
+ // Divide out all digits of precision -- rounding could still produce
+/- 1
+ let output_type = DataType::Decimal64(18, -15);
+ assert!(can_cast_types(array.data_type(), &output_type));
+ generate_cast_test_case!(
+ &array,
+ Decimal64Array,
+ &output_type,
+ vec![Some(-1), Some(0), Some(1), None]
+ );
+
+ // Divide out more digits than we have precision -- all-zero result
+ let output_type = DataType::Decimal64(18, -16);
+ assert!(can_cast_types(array.data_type(), &output_type));
+ generate_cast_test_case!(
+ &array,
+ Decimal64Array,
+ &output_type,
+ vec![Some(0), Some(0), Some(0), None]
+ );
+ }
+
#[test]
fn test_cast_floating_to_decimals() {
for output_type in [