Re: [PR] Refactor arrow-cast decimal casting to unify the rescale logic used in Parquet variant casts [arrow-rs]

via GitHub Thu, 23 Oct 2025 10:40:42 -0700


alamb commented on code in PR #8689:
URL: https://github.com/apache/arrow-rs/pull/8689#discussion_r2456322736



##########
arrow-cast/src/cast/decimal.rs:
##########
@@ -166,50 +166,86 @@ where
     }
 }
 
-pub(crate) fn convert_to_smaller_scale_decimal<I, O>(
-    array: &PrimitiveArray<I>,
+/// Construct closures to upscale decimals from `(input_precision, 
input_scale)` to
+/// `(output_precision, output_scale)`.
+///
+/// Returns `None` if the required scale increase `delta_scale = output_scale 
- input_scale`
+/// exceeds the supported precomputed precision table 
`O::MAX_FOR_EACH_PRECISION`.
+/// In that case, the caller should treat this as an overflow for the output 
scale
+/// and handle it accordingly (e.g., return a cast error).
+#[allow(clippy::type_complexity)]
+pub fn make_upscaler<I: DecimalType, O: DecimalType>(
     input_precision: u8,
     input_scale: i8,
     output_precision: u8,
     output_scale: i8,
-    cast_options: &CastOptions,
-) -> Result<PrimitiveArray<O>, ArrowError>
+) -> Option<(
+    impl Fn(I::Native) -> Option<O::Native>,
+    Option<impl Fn(I::Native) -> O::Native>,
+)>
 where
-    I: DecimalType,
-    O: DecimalType,
     I::Native: DecimalCast + ArrowNativeTypeOp,
     O::Native: DecimalCast + ArrowNativeTypeOp,
 {
-    let error = cast_decimal_to_decimal_error::<I, O>(output_precision, 
output_scale);
-    let delta_scale = input_scale - output_scale;
-    // if the reduction of the input number through scaling (dividing) is 
greater
-    // than a possible precision loss (plus potential increase via rounding)
-    // every input number will fit into the output type
+    let delta_scale = output_scale - input_scale;
+
+    // O::MAX_FOR_EACH_PRECISION[k] stores 10^k - 1 (e.g., 9, 99, 999, ...).
+    // Adding 1 yields exactly 10^k without computing a power at runtime.
+    // Using the precomputed table avoids pow(10, k) and its checked/overflow
+    // handling, which is faster and simpler for scaling by 10^delta_scale.
+    let max = O::MAX_FOR_EACH_PRECISION.get(delta_scale as usize)?;
+    let mul = max.add_wrapping(O::Native::ONE);
+    let f = move |x| O::Native::from_decimal(x).and_then(|x| 
x.mul_checked(mul).ok());
+
+    // if the gain in precision (digits) is greater than the multiplication 
due to scaling
+    // every number will fit into the output type
     // Example: If we are starting with any number of precision 5 [xxxxx],
-    // then and decrease the scale by 3 will have the following effect on the 
representation:
-    // [xxxxx] -> [xx] (+ 1 possibly, due to rounding).
-    // The rounding may add an additional digit, so the cast to be infallible,
-    // the output type needs to have at least 3 digits of precision.
-    // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100:
-    // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be 
possible
-    let is_infallible_cast = (input_precision as i8) - delta_scale < 
(output_precision as i8);
+    // then an increase of scale by 3 will have the following effect on the 
representation:
+    // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type
+    // needs to provide at least 8 digits precision
+    let is_infallible_cast = (input_precision as i8) + delta_scale <= 
(output_precision as i8);
+    let f_infallible = is_infallible_cast
+        .then_some(move |x| 
O::Native::from_decimal(x).unwrap().mul_wrapping(mul));
+    Some((f, f_infallible))
+}
+
+/// Construct closures to downscale decimals from `(input_precision, 
input_scale)` to

Review Comment:
   same comment above



##########
arrow-cast/src/cast/decimal.rs:
##########
@@ -223,24 +259,49 @@ where
         O::Native::from_decimal(adjusted)
     };
 
-    Ok(if is_infallible_cast {
-        // make sure we don't perform calculations that don't make sense w/o 
validation
-        validate_decimal_precision_and_scale::<O>(output_precision, 
output_scale)?;

Review Comment:
   now that this function is `pub` it means it can be called from anywhere 
(including outside this crate/repo) so I think more error handling is actually 
required



##########
arrow-cast/src/cast/decimal.rs:
##########
@@ -166,50 +166,86 @@ where
     }
 }
 
-pub(crate) fn convert_to_smaller_scale_decimal<I, O>(
-    array: &PrimitiveArray<I>,
+/// Construct closures to upscale decimals from `(input_precision, 
input_scale)` to
+/// `(output_precision, output_scale)`.
+///
+/// Returns `None` if the required scale increase `delta_scale = output_scale 
- input_scale`
+/// exceeds the supported precomputed precision table 
`O::MAX_FOR_EACH_PRECISION`.
+/// In that case, the caller should treat this as an overflow for the output 
scale
+/// and handle it accordingly (e.g., return a cast error).
+#[allow(clippy::type_complexity)]
+pub fn make_upscaler<I: DecimalType, O: DecimalType>(
     input_precision: u8,
     input_scale: i8,
     output_precision: u8,
     output_scale: i8,
-    cast_options: &CastOptions,
-) -> Result<PrimitiveArray<O>, ArrowError>
+) -> Option<(
+    impl Fn(I::Native) -> Option<O::Native>,
+    Option<impl Fn(I::Native) -> O::Native>,
+)>
 where
-    I: DecimalType,
-    O: DecimalType,
     I::Native: DecimalCast + ArrowNativeTypeOp,
     O::Native: DecimalCast + ArrowNativeTypeOp,
 {
-    let error = cast_decimal_to_decimal_error::<I, O>(output_precision, 
output_scale);
-    let delta_scale = input_scale - output_scale;
-    // if the reduction of the input number through scaling (dividing) is 
greater
-    // than a possible precision loss (plus potential increase via rounding)
-    // every input number will fit into the output type
+    let delta_scale = output_scale - input_scale;
+
+    // O::MAX_FOR_EACH_PRECISION[k] stores 10^k - 1 (e.g., 9, 99, 999, ...).
+    // Adding 1 yields exactly 10^k without computing a power at runtime.
+    // Using the precomputed table avoids pow(10, k) and its checked/overflow
+    // handling, which is faster and simpler for scaling by 10^delta_scale.
+    let max = O::MAX_FOR_EACH_PRECISION.get(delta_scale as usize)?;
+    let mul = max.add_wrapping(O::Native::ONE);
+    let f = move |x| O::Native::from_decimal(x).and_then(|x| 
x.mul_checked(mul).ok());
+
+    // if the gain in precision (digits) is greater than the multiplication 
due to scaling
+    // every number will fit into the output type
     // Example: If we are starting with any number of precision 5 [xxxxx],
-    // then and decrease the scale by 3 will have the following effect on the 
representation:
-    // [xxxxx] -> [xx] (+ 1 possibly, due to rounding).
-    // The rounding may add an additional digit, so the cast to be infallible,
-    // the output type needs to have at least 3 digits of precision.
-    // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100:
-    // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be 
possible
-    let is_infallible_cast = (input_precision as i8) - delta_scale < 
(output_precision as i8);
+    // then an increase of scale by 3 will have the following effect on the 
representation:
+    // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type
+    // needs to provide at least 8 digits precision
+    let is_infallible_cast = (input_precision as i8) + delta_scale <= 
(output_precision as i8);

Review Comment:
   I see the old code did this too, but it seems like the cast `as i8` could 
porentially convert a number larger than 128 to a negative number : -- maybe 
that is ok



##########
arrow-cast/src/cast/decimal.rs:
##########
@@ -166,50 +166,86 @@ where
     }
 }
 
-pub(crate) fn convert_to_smaller_scale_decimal<I, O>(
-    array: &PrimitiveArray<I>,
+/// Construct closures to upscale decimals from `(input_precision, 
input_scale)` to
+/// `(output_precision, output_scale)`.

Review Comment:
   It was not immediately clear to me what the two closures where (I think it 
is fallable and infallable)
   
   So how about making that clearer in the docs like
   
   ```suggestion
   /// Construct closures to upscale decimals from `(input_precision, 
input_scale)` to
   /// `(output_precision, output_scale)`.
   ///
   /// Returns `(infallable_fn, fallable_fn)` where:
   /// * `infallable_fn` will panic where the requested cast can not be 
performed
   /// * `fallable_fn` will return None when the requested cast can not be 
performaned 
    
   ```



##########
arrow-cast/src/cast/decimal.rs:
##########
@@ -166,50 +166,86 @@ where
     }
 }
 
-pub(crate) fn convert_to_smaller_scale_decimal<I, O>(
-    array: &PrimitiveArray<I>,
+/// Construct closures to upscale decimals from `(input_precision, 
input_scale)` to
+/// `(output_precision, output_scale)`.
+///
+/// Returns `None` if the required scale increase `delta_scale = output_scale 
- input_scale`
+/// exceeds the supported precomputed precision table 
`O::MAX_FOR_EACH_PRECISION`.
+/// In that case, the caller should treat this as an overflow for the output 
scale
+/// and handle it accordingly (e.g., return a cast error).
+#[allow(clippy::type_complexity)]
+pub fn make_upscaler<I: DecimalType, O: DecimalType>(
     input_precision: u8,
     input_scale: i8,
     output_precision: u8,
     output_scale: i8,
-    cast_options: &CastOptions,
-) -> Result<PrimitiveArray<O>, ArrowError>
+) -> Option<(
+    impl Fn(I::Native) -> Option<O::Native>,
+    Option<impl Fn(I::Native) -> O::Native>,
+)>
 where
-    I: DecimalType,
-    O: DecimalType,
     I::Native: DecimalCast + ArrowNativeTypeOp,
     O::Native: DecimalCast + ArrowNativeTypeOp,
 {
-    let error = cast_decimal_to_decimal_error::<I, O>(output_precision, 
output_scale);
-    let delta_scale = input_scale - output_scale;
-    // if the reduction of the input number through scaling (dividing) is 
greater
-    // than a possible precision loss (plus potential increase via rounding)
-    // every input number will fit into the output type
+    let delta_scale = output_scale - input_scale;
+
+    // O::MAX_FOR_EACH_PRECISION[k] stores 10^k - 1 (e.g., 9, 99, 999, ...).
+    // Adding 1 yields exactly 10^k without computing a power at runtime.
+    // Using the precomputed table avoids pow(10, k) and its checked/overflow
+    // handling, which is faster and simpler for scaling by 10^delta_scale.
+    let max = O::MAX_FOR_EACH_PRECISION.get(delta_scale as usize)?;
+    let mul = max.add_wrapping(O::Native::ONE);
+    let f = move |x| O::Native::from_decimal(x).and_then(|x| 
x.mul_checked(mul).ok());
+
+    // if the gain in precision (digits) is greater than the multiplication 
due to scaling
+    // every number will fit into the output type
     // Example: If we are starting with any number of precision 5 [xxxxx],
-    // then and decrease the scale by 3 will have the following effect on the 
representation:
-    // [xxxxx] -> [xx] (+ 1 possibly, due to rounding).
-    // The rounding may add an additional digit, so the cast to be infallible,
-    // the output type needs to have at least 3 digits of precision.
-    // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100:
-    // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be 
possible
-    let is_infallible_cast = (input_precision as i8) - delta_scale < 
(output_precision as i8);
+    // then an increase of scale by 3 will have the following effect on the 
representation:
+    // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type
+    // needs to provide at least 8 digits precision
+    let is_infallible_cast = (input_precision as i8) + delta_scale <= 
(output_precision as i8);
+    let f_infallible = is_infallible_cast
+        .then_some(move |x| 
O::Native::from_decimal(x).unwrap().mul_wrapping(mul));
+    Some((f, f_infallible))
+}
+
+/// Construct closures to downscale decimals from `(input_precision, 
input_scale)` to
+/// `(output_precision, output_scale)`.
+///
+/// Returns `None` if the required scale reduction `delta_scale = input_scale 
- output_scale`
+/// exceeds the supported precomputed precision table 
`I::MAX_FOR_EACH_PRECISION`.
+/// In this scenario, any value would round to zero (e.g., dividing by 10^k 
where k exceeds the
+/// available precision). Callers should therefore produce zero values 
(preserving nulls) rather
+/// than returning an error.
+#[allow(clippy::type_complexity)]
+pub fn make_downscaler<I: DecimalType, O: DecimalType>(
+    input_precision: u8,
+    input_scale: i8,
+    output_precision: u8,
+    output_scale: i8,
+) -> Option<(
+    impl Fn(I::Native) -> Option<O::Native>,
+    Option<impl Fn(I::Native) -> O::Native>,
+)>
+where
+    I::Native: DecimalCast + ArrowNativeTypeOp,
+    O::Native: DecimalCast + ArrowNativeTypeOp,
+{
+    let delta_scale = input_scale - output_scale;

Review Comment:
   how do we ensure delta_scale is not negative? Given that this method is now 
`pub` it seems like we maybe need to do error checking on the scales more 
proactively



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Refactor arrow-cast decimal casting to unify the rescale logic used in Parquet variant casts [arrow-rs]

Reply via email to