256` [arrow-rs]

via GitHub Sat, 18 Oct 2025 08:53:57 -0700


scovich commented on code in PR #8552:
URL: https://github.com/apache/arrow-rs/pull/8552#discussion_r2432443526



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
     |timestamp| Self::make_value(timestamp.naive_utc())
 );
 
+/// Returns the unscaled integer representation for Arrow decimal type `O`
+/// from a `Variant`.
+///
+/// - `precision` and `scale` specify the target Arrow decimal parameters
+/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
+/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
+///
+/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
+/// returns `None` if it cannot fit the requested precision.
+pub(crate) fn variant_to_unscaled_decimal<O>(
+    variant: &Variant<'_, '_>,
+    precision: u8,
+    scale: i8,
+) -> Option<O::Native>
+where
+    O: DecimalType,
+    O::Native: DecimalCast,
+{
+    match variant {
+        Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
+            *i,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
+            *i,
+            VariantDecimal8::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
+            d.integer(),
+            VariantDecimal4::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
+            d.integer(),
+            VariantDecimal8::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
+            d.integer(),
+            VariantDecimal16::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        _ => None,
+    }
+}
+
+/// Rescale a decimal from (input_precision, input_scale) to 
(output_precision, output_scale)
+/// and return the scaled value if it fits the output precision. Similar to 
the implementation in
+/// decimal.rs in arrow-cast.
+pub(crate) fn rescale_decimal<I, O>(
+    value: I::Native,
+    input_precision: u8,
+    input_scale: i8,
+    output_precision: u8,
+    output_scale: i8,
+) -> Option<O::Native>
+where
+    I: DecimalType,
+    O: DecimalType,
+    I::Native: DecimalCast,
+    O::Native: DecimalCast,
+{
+    let delta_scale = output_scale - input_scale;
+
+    // Determine if the cast is infallible based on precision/scale math
+    let is_infallible_cast =
+        is_infallible_decimal_cast(input_precision, input_scale, 
output_precision, output_scale);
+

Review Comment:
   nit: move this whole block down to where we actually use it -- declare near 
first (only) use



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
     |timestamp| Self::make_value(timestamp.naive_utc())
 );
 
+/// Returns the unscaled integer representation for Arrow decimal type `O`
+/// from a `Variant`.
+///
+/// - `precision` and `scale` specify the target Arrow decimal parameters
+/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
+/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
+///
+/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
+/// returns `None` if it cannot fit the requested precision.
+pub(crate) fn variant_to_unscaled_decimal<O>(
+    variant: &Variant<'_, '_>,
+    precision: u8,
+    scale: i8,
+) -> Option<O::Native>
+where
+    O: DecimalType,
+    O::Native: DecimalCast,
+{
+    match variant {
+        Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
+            *i,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
+            *i,
+            VariantDecimal8::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
+            d.integer(),
+            VariantDecimal4::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
+            d.integer(),
+            VariantDecimal8::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
+            d.integer(),
+            VariantDecimal16::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        _ => None,
+    }
+}
+
+/// Rescale a decimal from (input_precision, input_scale) to 
(output_precision, output_scale)
+/// and return the scaled value if it fits the output precision. Similar to 
the implementation in
+/// decimal.rs in arrow-cast.
+pub(crate) fn rescale_decimal<I, O>(
+    value: I::Native,
+    input_precision: u8,
+    input_scale: i8,
+    output_precision: u8,
+    output_scale: i8,
+) -> Option<O::Native>
+where
+    I: DecimalType,
+    O: DecimalType,
+    I::Native: DecimalCast,
+    O::Native: DecimalCast,
+{
+    let delta_scale = output_scale - input_scale;
+
+    // Determine if the cast is infallible based on precision/scale math
+    let is_infallible_cast =
+        is_infallible_decimal_cast(input_precision, input_scale, 
output_precision, output_scale);
+
+    let scaled = if delta_scale == 0 {
+        O::Native::from_decimal(value)
+    } else if delta_scale > 0 {
+        let mul = O::Native::from_decimal(10_i128)
+            .and_then(|t| t.pow_checked(delta_scale as u32).ok())?;

Review Comment:
   Could use the same performance optimization as the negative scale case below:
   ```suggestion
           let max = O::MAX_FOR_EACH_PRECISION.get(delta_scale)?;
           let mul = max.add_wrapping(O::Native::ONE);
   ```
   (it didn't matter much in the columnar decimal cast code, but it probably 
does matter in row-wise variant cast code)



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
     |timestamp| Self::make_value(timestamp.naive_utc())
 );
 
+/// Returns the unscaled integer representation for Arrow decimal type `O`
+/// from a `Variant`.
+///
+/// - `precision` and `scale` specify the target Arrow decimal parameters
+/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
+/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
+///
+/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
+/// returns `None` if it cannot fit the requested precision.
+pub(crate) fn variant_to_unscaled_decimal<O>(
+    variant: &Variant<'_, '_>,
+    precision: u8,
+    scale: i8,
+) -> Option<O::Native>
+where
+    O: DecimalType,
+    O::Native: DecimalCast,
+{
+    match variant {
+        Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
+            *i,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
+            *i,
+            VariantDecimal8::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
+            d.integer(),
+            VariantDecimal4::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
+            d.integer(),
+            VariantDecimal8::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
+            d.integer(),
+            VariantDecimal16::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        _ => None,
+    }
+}
+
+/// Rescale a decimal from (input_precision, input_scale) to 
(output_precision, output_scale)
+/// and return the scaled value if it fits the output precision. Similar to 
the implementation in
+/// decimal.rs in arrow-cast.
+pub(crate) fn rescale_decimal<I, O>(
+    value: I::Native,
+    input_precision: u8,
+    input_scale: i8,
+    output_precision: u8,
+    output_scale: i8,
+) -> Option<O::Native>
+where
+    I: DecimalType,
+    O: DecimalType,

Review Comment:
   tiny nit to consider (saves space)
   ```suggestion
   pub(crate) fn rescale_decimal<I: DecimalType, O: DecimalType>(
       value: I::Native,
       input_precision: u8,
       input_scale: i8,
       output_precision: u8,
       output_scale: i8,
   ) -> Option<O::Native>
   where
   ```



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
     |timestamp| Self::make_value(timestamp.naive_utc())
 );
 
+/// Returns the unscaled integer representation for Arrow decimal type `O`
+/// from a `Variant`.
+///
+/// - `precision` and `scale` specify the target Arrow decimal parameters
+/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
+/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
+///
+/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
+/// returns `None` if it cannot fit the requested precision.
+pub(crate) fn variant_to_unscaled_decimal<O>(
+    variant: &Variant<'_, '_>,
+    precision: u8,
+    scale: i8,
+) -> Option<O::Native>
+where
+    O: DecimalType,
+    O::Native: DecimalCast,
+{
+    match variant {
+        Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
+            *i,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
+            *i,
+            VariantDecimal8::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
+            d.integer(),
+            VariantDecimal4::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
+            d.integer(),
+            VariantDecimal8::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
+            d.integer(),
+            VariantDecimal16::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        _ => None,
+    }
+}
+
+/// Rescale a decimal from (input_precision, input_scale) to 
(output_precision, output_scale)
+/// and return the scaled value if it fits the output precision. Similar to 
the implementation in
+/// decimal.rs in arrow-cast.
+pub(crate) fn rescale_decimal<I, O>(
+    value: I::Native,
+    input_precision: u8,
+    input_scale: i8,
+    output_precision: u8,
+    output_scale: i8,
+) -> Option<O::Native>
+where
+    I: DecimalType,
+    O: DecimalType,
+    I::Native: DecimalCast,
+    O::Native: DecimalCast,
+{
+    let delta_scale = output_scale - input_scale;
+
+    // Determine if the cast is infallible based on precision/scale math
+    let is_infallible_cast =
+        is_infallible_decimal_cast(input_precision, input_scale, 
output_precision, output_scale);
+
+    let scaled = if delta_scale == 0 {
+        O::Native::from_decimal(value)
+    } else if delta_scale > 0 {
+        let mul = O::Native::from_decimal(10_i128)
+            .and_then(|t| t.pow_checked(delta_scale as u32).ok())?;

Review Comment:
   Also -- we should benchmark, but it might be faster to multiply by one than 
to execute the branch that distinguishes between zero and positive delta scale. 
If so, we would want code like this:
   ```rust
   let (scaled, is_infallible_cast) = if delta_scale < 0 {
       // ... big comment about why ...
       let is_infallible = input_precision + delta_scale < output_precision;
   
       // ... comment about dividing out too many digits ...
       let delta_scale = delta_scale.unsigned_abs() as usize;
       let Some(max) = ... else { ... return zero ... };
         ...
       (O::Native::from_decimal(adjusted)?, is_infallible_cast)
   } else {
       // ... big comment explaining why ...
       let is_infallible_cast = input_precision + delta_scale <= 
output_precision;
   
       let max = O::MAX_FOR_EACH_PRECISION.get(delta_scale)?;
       let mul = max.add_wrapping(O::Native::ONE);
       let x = O::Native::from_decimal(value)?;
       (x.mul_checked(mul).ok()?, is_infallible_cast)
   }
   
   (is_infallible_cast || O::is_valid_decimal_precision(scaled, 
output_precision)).then(scaled)
   ```



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
     |timestamp| Self::make_value(timestamp.naive_utc())
 );
 
+/// Returns the unscaled integer representation for Arrow decimal type `O`
+/// from a `Variant`.
+///
+/// - `precision` and `scale` specify the target Arrow decimal parameters
+/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
+/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
+///
+/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
+/// returns `None` if it cannot fit the requested precision.
+pub(crate) fn variant_to_unscaled_decimal<O>(
+    variant: &Variant<'_, '_>,
+    precision: u8,
+    scale: i8,
+) -> Option<O::Native>
+where
+    O: DecimalType,
+    O::Native: DecimalCast,
+{
+    match variant {
+        Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
+            *i,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
+            *i,
+            VariantDecimal8::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
+            d.integer(),
+            VariantDecimal4::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
+            d.integer(),
+            VariantDecimal8::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
+            d.integer(),
+            VariantDecimal16::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        _ => None,
+    }
+}
+
+/// Rescale a decimal from (input_precision, input_scale) to 
(output_precision, output_scale)
+/// and return the scaled value if it fits the output precision. Similar to 
the implementation in
+/// decimal.rs in arrow-cast.
+pub(crate) fn rescale_decimal<I, O>(
+    value: I::Native,
+    input_precision: u8,
+    input_scale: i8,
+    output_precision: u8,
+    output_scale: i8,
+) -> Option<O::Native>
+where
+    I: DecimalType,
+    O: DecimalType,
+    I::Native: DecimalCast,
+    O::Native: DecimalCast,
+{
+    let delta_scale = output_scale - input_scale;
+
+    // Determine if the cast is infallible based on precision/scale math
+    let is_infallible_cast =
+        is_infallible_decimal_cast(input_precision, input_scale, 
output_precision, output_scale);
+
+    let scaled = if delta_scale == 0 {
+        O::Native::from_decimal(value)
+    } else if delta_scale > 0 {
+        let mul = O::Native::from_decimal(10_i128)
+            .and_then(|t| t.pow_checked(delta_scale as u32).ok())?;
+        O::Native::from_decimal(value).and_then(|x| x.mul_checked(mul).ok())
+    } else {
+        // delta_scale is guaranteed to be > 0, but may also be larger than 
I::MAX_PRECISION. If so, the
+        // scale change divides out more digits than the input has precision 
and the result of the cast
+        // is always zero. For example, if we try to apply delta_scale=10 a 
decimal32 value, the largest
+        // possible result is 999999999/10000000000 = 0.0999999999, which 
rounds to zero. Smaller values
+        // (e.g. 1/10000000000) or larger delta_scale (e.g. 
999999999/10000000000000) produce even
+        // smaller results, which also round to zero. In that case, just 
return an array of zeros.
+        let delta_scale = delta_scale.unsigned_abs() as usize;
+        let Some(max) = I::MAX_FOR_EACH_PRECISION.get(delta_scale) else {
+            return Some(O::Native::ZERO);
+        };
+        let div = max.add_wrapping(I::Native::ONE);
+        let half = 
div.div_wrapping(I::Native::ONE.add_wrapping(I::Native::ONE));
+        let half_neg = half.neg_wrapping();
+
+        // div is >= 10 and so this cannot overflow
+        let d = value.div_wrapping(div);
+        let r = value.mod_wrapping(div);
+
+        // Round result
+        let adjusted = match value >= I::Native::ZERO {
+            true if r >= half => d.add_wrapping(I::Native::ONE),
+            false if r <= half_neg => d.sub_wrapping(I::Native::ONE),
+            _ => d,
+        };
+        O::Native::from_decimal(adjusted)
+    };
+
+    scaled.filter(|v| is_infallible_cast || O::is_valid_decimal_precision(*v, 
output_precision))
+}
+
+/// Returns true if casting from (input_precision, input_scale) to
+/// (output_precision, output_scale) is infallible based on precision/scale 
math.
+fn is_infallible_decimal_cast(
+    input_precision: u8,
+    input_scale: i8,
+    output_precision: u8,
+    output_scale: i8,
+) -> bool {
+    let delta_scale = output_scale - input_scale;

Review Comment:
   nit: we could have passed this in, our caller already computed it. but I 
guess this is more regular?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [Variant] Support variant to `Decimal32/64/128/256` [arrow-rs]

Reply via email to