256` [arrow-rs]

via GitHub Fri, 17 Oct 2025 22:35:17 -0700


scovich commented on code in PR #8552:
URL: https://github.com/apache/arrow-rs/pull/8552#discussion_r2407040670



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
 impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
 impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
 
+macro_rules! scale_variant_decimal {
+    ($variant:expr, $variant_method:ident, $to_int_ty:expr, 
$output_scale:expr, $precision:expr, $validate:path) => {{
+        (|| -> Option<_> {
+            let variant = $variant.$variant_method()?;
+            let input_scale = variant.scale() as i8;
+            let variant = $to_int_ty(variant.integer());
+            let ten = $to_int_ty(10);
+
+            let scaled = if input_scale == $output_scale {
+                Some(variant)
+            } else if input_scale < $output_scale {
+                // scale_up means output has more fractional digits than input
+                // multiply integer by 10^(output_scale - input_scale)
+                let delta = ($output_scale - input_scale) as u32;
+                let mul = ten.checked_pow(delta)?;
+                variant.checked_mul(mul)
+            } else {
+                // scale_down means output has fewer fractional digits than 
input
+                // divide by 10^(input_scale - output_scale) with rounding
+                let delta = (input_scale - $output_scale) as u32;
+                let div = ten.checked_pow(delta)?;
+                let v = variant;
+                let d = v.checked_div(div)?;
+                let r = v % div;
+
+                // rounding in the same way as 
convert_to_smaller_scale_decimal in arrow-cast
+                let half = div.checked_div($to_int_ty(2))?;
+                let half_neg = half.checked_neg()?;
+                let adjusted = match v >= $to_int_ty(0) {
+                    true if r >= half => d.checked_add($to_int_ty(1))?,
+                    false if r <= half_neg => d.checked_sub($to_int_ty(1))?,
+                    _ => d,
+                };
+                Some(adjusted)
+            };
+
+            scaled.filter(|v| $validate(*v, $precision))
+        })()

Review Comment:
   Why is `(|| -> Option<_> { ... })()` needed here? The macro body _should_ 
Just Work because the macro is invoked inside a method that already returns 
`Option`?



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
 impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
 impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
 
+macro_rules! scale_variant_decimal {
+    ($variant:expr, $variant_method:ident, $to_int_ty:expr, 
$output_scale:expr, $precision:expr, $validate:path) => {{
+        (|| -> Option<_> {
+            let variant = $variant.$variant_method()?;
+            let input_scale = variant.scale() as i8;
+            let variant = $to_int_ty(variant.integer());

Review Comment:
   How does this conversion work? I'm guessing it's actually _not_ a conversion 
at all? if it's _not_ a conversion, we can avoid passing the redundant 
`$to_int_ty` by a type inference trick:
   ```rust
   let zero = 0;
   let variant = variant.integer() + zero; // forces `zero` to the native type
   let ten = 10 + zero; // forces `ten` to also be the native type
   ```



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
 impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
 impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
 
+macro_rules! scale_variant_decimal {
+    ($variant:expr, $variant_method:ident, $to_int_ty:expr, 
$output_scale:expr, $precision:expr, $validate:path) => {{
+        (|| -> Option<_> {
+            let variant = $variant.$variant_method()?;
+            let input_scale = variant.scale() as i8;
+            let variant = $to_int_ty(variant.integer());
+            let ten = $to_int_ty(10);
+
+            let scaled = if input_scale == $output_scale {
+                Some(variant)
+            } else if input_scale < $output_scale {
+                // scale_up means output has more fractional digits than input
+                // multiply integer by 10^(output_scale - input_scale)
+                let delta = ($output_scale - input_scale) as u32;
+                let mul = ten.checked_pow(delta)?;
+                variant.checked_mul(mul)
+            } else {
+                // scale_down means output has fewer fractional digits than 
input
+                // divide by 10^(input_scale - output_scale) with rounding
+                let delta = (input_scale - $output_scale) as u32;
+                let div = ten.checked_pow(delta)?;
+                let v = variant;
+                let d = v.checked_div(div)?;
+                let r = v % div;
+
+                // rounding in the same way as 
convert_to_smaller_scale_decimal in arrow-cast

Review Comment:
   Looking at `convert_to_smaller_scale_decimal`, virtually all the logic in 
that function is doing exactly what we want here... and then the last line just 
applies the calculation as an appropriate unary operation on the input `array`. 
Rather than duplicate the logic, is there some way we could factor it out or 
otherwise reuse it? Problem is, it's in a different crate, so the factored out 
logic would have to be `pub`...



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -2679,4 +2687,371 @@ mod test {
 
         Arc::new(struct_array)
     }
+
+    macro_rules! max_unscaled_value {
+        (32, $precision:expr) => {
+            (u32::pow(10, $precision as u32) - 1) as i32
+        };
+        (64, $precision:expr) => {
+            (u64::pow(10, $precision as u32) - 1) as i64
+        };
+        (128, $precision:expr) => {
+            (u128::pow(10, $precision as u32) - 1) as i128
+        };
+    }

Review Comment:
   I don't think we need this macro, see other comments below



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
 impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
 impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
 
+macro_rules! scale_variant_decimal {
+    ($variant:expr, $variant_method:ident, $to_int_ty:expr, 
$output_scale:expr, $precision:expr, $validate:path) => {{
+        (|| -> Option<_> {
+            let variant = $variant.$variant_method()?;
+            let input_scale = variant.scale() as i8;
+            let variant = $to_int_ty(variant.integer());
+            let ten = $to_int_ty(10);
+
+            let scaled = if input_scale == $output_scale {
+                Some(variant)
+            } else if input_scale < $output_scale {
+                // scale_up means output has more fractional digits than input
+                // multiply integer by 10^(output_scale - input_scale)
+                let delta = ($output_scale - input_scale) as u32;
+                let mul = ten.checked_pow(delta)?;
+                variant.checked_mul(mul)
+            } else {
+                // scale_down means output has fewer fractional digits than 
input
+                // divide by 10^(input_scale - output_scale) with rounding

Review Comment:
   I'm a bit nervous about rounding (the whole point of decimal is to be 
lossless, unlike floating point). But I guess in this case the user 
specifically asked for the narrower type, so the usual worries about lossy 
coercion don't apply?



##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -342,6 +375,117 @@ impl<'a, T: PrimitiveFromVariant> 
VariantToPrimitiveArrowRowBuilder<'a, T> {
     }
 }
 
+// Minimal per-decimal hook: just wraps scale_variant_decimal! with correct 
parameters
+pub(crate) trait VariantDecimalScaler: datatypes::DecimalType {
+    fn scale_from_variant(
+        value: &Variant<'_, '_>,
+        scale: i8,
+        precision: u8,
+    ) -> Option<<Self as ArrowPrimitiveType>::Native>;
+}
+
+macro_rules! impl_variant_decimal_scaler {
+    ($t:ty, $variant_method:ident, $to_native:expr, $validate:path) => {
+        impl VariantDecimalScaler for $t {
+            fn scale_from_variant(
+                value: &Variant<'_, '_>,
+                scale: i8,
+                precision: u8,
+            ) -> Option<<Self as ArrowPrimitiveType>::Native> {
+                scale_variant_decimal!(
+                    value,
+                    $variant_method,
+                    $to_native,
+                    scale,
+                    precision,
+                    $validate
+                )
+            }
+        }
+    };
+}
+
+impl_variant_decimal_scaler!(
+    Decimal32Type,
+    as_decimal4,
+    |x: i32| x,
+    is_validate_decimal32_precision
+);
+impl_variant_decimal_scaler!(
+    Decimal64Type,
+    as_decimal8,
+    |x: i64| x,
+    is_validate_decimal64_precision
+);
+impl_variant_decimal_scaler!(
+    Decimal128Type,
+    as_decimal16,
+    |x: i128| x,
+    is_validate_decimal_precision
+);
+impl_variant_decimal_scaler!(
+    Decimal256Type,
+    as_decimal16,
+    i256::from_i128,
+    is_validate_decimal256_precision
+);
+
+/// Builder for converting variant values to arrow Decimal values
+pub(crate) struct VariantToDecimalArrowRowBuilder<
+    'a,
+    T: datatypes::DecimalType + VariantDecimalScaler,
+> {

Review Comment:
   ```suggestion
   pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T: 
VariantDecimalScaler> {
   ```
   (`VariantDecimalScaler` already includes `DecimalType`)
   
   (again below on the impl)



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
 impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
 impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
 
+macro_rules! scale_variant_decimal {
+    ($variant:expr, $variant_method:ident, $to_int_ty:expr, 
$output_scale:expr, $precision:expr, $validate:path) => {{
+        (|| -> Option<_> {
+            let variant = $variant.$variant_method()?;
+            let input_scale = variant.scale() as i8;
+            let variant = $to_int_ty(variant.integer());
+            let ten = $to_int_ty(10);
+
+            let scaled = if input_scale == $output_scale {
+                Some(variant)
+            } else if input_scale < $output_scale {
+                // scale_up means output has more fractional digits than input
+                // multiply integer by 10^(output_scale - input_scale)
+                let delta = ($output_scale - input_scale) as u32;
+                let mul = ten.checked_pow(delta)?;
+                variant.checked_mul(mul)
+            } else {
+                // scale_down means output has fewer fractional digits than 
input
+                // divide by 10^(input_scale - output_scale) with rounding
+                let delta = (input_scale - $output_scale) as u32;
+                let div = ten.checked_pow(delta)?;
+                let v = variant;
+                let d = v.checked_div(div)?;
+                let r = v % div;

Review Comment:
   Why do we need both `v` and `variant`?



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -2679,4 +2687,371 @@ mod test {
 
         Arc::new(struct_array)
     }
+
+    macro_rules! max_unscaled_value {
+        (32, $precision:expr) => {
+            (u32::pow(10, $precision as u32) - 1) as i32
+        };
+        (64, $precision:expr) => {
+            (u64::pow(10, $precision as u32) - 1) as i64
+        };
+        (128, $precision:expr) => {
+            (u128::pow(10, $precision as u32) - 1) as i128
+        };
+    }
+
+    #[test]
+    fn get_decimal32_unshredded_var_scales_to_scale2() {
+        // Build unshredded variant values with different scales
+        let mut builder = crate::VariantArrayBuilder::new(4);
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 
2).unwrap())); // 12.34
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 
3).unwrap())); // 1.234
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 
0).unwrap())); // 1234
+        builder.append_null();
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal32(9, 2), true);
+        let options = 
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+        assert_eq!(result.precision(), 9);
+        assert_eq!(result.scale(), 2);
+        assert_eq!(result.value(0), 1234);
+        assert_eq!(result.value(1), 123);
+        assert_eq!(result.value(2), 123400);
+        assert!(result.is_null(3));
+    }
+
+    #[test]
+    fn get_decimal32_unshredded_scale_down_rounding() {
+        let mut builder = crate::VariantArrayBuilder::new(2);
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 
3).unwrap()));
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(1245, 
3).unwrap()));
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(-1235, 
3).unwrap()));
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(-1245, 
3).unwrap()));
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal32(9, 2), true);
+        let options = 
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+        assert_eq!(result.value(0), 124);
+        assert_eq!(result.value(1), 125);
+        assert_eq!(result.value(2), -124);
+        assert_eq!(result.value(3), -125);
+    }
+
+    #[test]
+    fn get_decimal32_precision_overflow_safe() {
+        // Exceed Decimal32 max precision (9) after scaling
+        let mut builder = crate::VariantArrayBuilder::new(1);
+        builder.append_variant(Variant::from(
+            VariantDecimal4::try_new(max_unscaled_value!(32, 
DECIMAL32_MAX_PRECISION), 0).unwrap(),

Review Comment:
   (and, further, it looks like these are redundant 
[MAX_DECIMAL32_FOR_EACH_PRECISION](https://docs.rs/arrow/latest/arrow/datatypes/constant.MAX_DECIMAL32_FOR_EACH_PRECISION.html),
 e.g. 
   ```rust
   MAX_DECIMAL32_FOR_EACH_PRECISION[DECIMAL32_MAX_PRECISION]
   ```
   (tho those constants are provided by `arrow-data` crate, which perhaps we 
don't want the `parquet-variant` crate to take a dependency on)



##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -145,62 +165,75 @@ pub(crate) fn 
make_primitive_variant_to_arrow_row_builder<'a>(
 ) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
     use PrimitiveVariantToArrowRowBuilder::*;
 
-    let builder = match data_type {
-        DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        _ if data_type.is_primitive() => {
-            return Err(ArrowError::NotYetImplemented(format!(
-                "Primitive data_type {data_type:?} not yet implemented"
-            )));
-        }
-        _ => {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Not a primitive type: {data_type:?}"
-            )));
-        }
-    };
+    let builder =
+        match data_type {
+            DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float16 => 
Float16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float32 => 
Float32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float64 => 
Float64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Decimal32(precision, scale) => Decimal32(
+                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,

Review Comment:
   Looking at all possible combinations:
   * We are converting unscaled value `v1` of type `Variant::DecimalXX(p1, s1)` 
to `datatypes::DecimalYY(p2, s2)`
   * The variant decimals have implied precision, so p1 is always one of {9, 
19, 38} based on decimal type
   * let `n1 = p1-s1` and `n2 = p2-s2` (the max number of integer digits before 
and after conversion)
   * if n2 < n1, there is an inherent risk of overflow regardless of what 
scales are involved
     * before even looking at scale and scale changes, we should first verify 
that `v1` fits in `n2+s1` digits. If not, flag overflow immediately. Otherwise, 
set n1=n2 and proceed to the next case.
     * NOTE: This check does _NOT_ require changing the type of `v1`, because 
total precision decreased.
   * else if n2 = n1 and s2 < s1, there is an overflow risk when e.g. 0.999 
rounds to 1.00
     * Rescale, and then verify that the rounded result fits in `p2` digits. 
     * NOTE: This check does _NOT_ require changing the type of `v1`, because 
total precision decreased.
   * else, there is no risk of overflow
     * Convert `v1` to the new native type first
     * Then rescale and round as needed
     * NOTE: Both operations are infallible
   
   That would correspond to something like the following code:
   
   <details>
   
   ```rust
   fn variant_to_unscaled_decimal32(
       variant: Variant<'_, '_>, 
       precision: u8, 
       scale: u8,
   ) -> Result<i32> {
       match variant {
           Variant::Decimal4(d) => {
               let s1 = d.scale();
               let mut n1 = VariantDecimal4::MAX_PRECISION - s1;
               let n2 = precision - scale;
               let v1 = d.integer();
               if n2 < n1 {
                   // integer digits pose an overflow risk
                   if v1.unsigned_abs() > MAX_DECIMAL32_FOR_EACH_PRECISION[n2 + 
s1] {
                       return Err(... overflow ...);
                   }
                   // else the value fits in n2 digits and we can pretend n1=n2
                   n1 = n2;
               }
   
               if n2 == n1 {
                   let v2 = ... rescale v1 and round up ...;
                   if v2.unsigned_abs() > 
MAX_DECIMAL32_FOR_EACH_PRECISION[precision] {
                       return Err(... overflow ...);
                   }
                   // else the value can safely convert to the target type
                   return Ok(v2 as _);
               }
   
               // no overflow possible, but still have to rescale and round
               let v1 = v1 as _;
               let v2 = ... rescale v1 and round up ...;
               Ok(v2)
           }
           Variant::Decimal8(d) => {
               ... almost the same code as for Decimal4 case ...
               ... except we use VariantDecimal8::MAX_PRECISION ...
               ... and we index into MAX_DECIMAL64_FOR_EACH_PRECISION ...
           }
           Variant::Decimal16(d) => {
               ... almost the same code as for Decimal4 case ...
               ... except we use VariantDecimal16::MAX_PRECISION ...
               ... and we index into MAX_DECIMAL128_FOR_EACH_PRECISION ...
           }
       }
   }
   
   fn variant_to_unscaled_decimal64(
       variant: Variant<'_, '_>, 
       precision: u8, 
       scale: u8,
   ) -> Result<i64> {
       ... exactly the same code as for decimal32 case ...
       ... but the changed return type means the `as _` casts now produce i64 
...
   }
   
   fn variant_to_unscaled_decimal128(
       variant: Variant<'_, '_>, 
       precision: u8, 
       scale: u8,
   ) -> Result<i128> {
       ... exactly the same code as for decimal32 case ...
       ... but the changed return type means the `as _` casts now produce i128 
...
   }
   
   fn variant_to_unscaled_decimal256(
       variant: Variant<'_, '_>, 
       precision: u8, 
       scale: u8,
   ) -> Result<i256> {
       ... exactly the same code as for decimal32 case ...
       ... but the changed return type means the `as _` casts now produce i256 
...
   }
   ```
   
   So, we'd want two macros:
   * Outer macro that produces the body of `variant_to_unscaled_decimalXX` 
functions
   * Inner macro that produces the body of `Variant::DecimalXX` match arms
   
   We need macros because integer types lack any helpful trait hierarchy that 
generics could take advantage of.
   
   </details>



##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -342,6 +375,117 @@ impl<'a, T: PrimitiveFromVariant> 
VariantToPrimitiveArrowRowBuilder<'a, T> {
     }
 }
 
+// Minimal per-decimal hook: just wraps scale_variant_decimal! with correct 
parameters
+pub(crate) trait VariantDecimalScaler: datatypes::DecimalType {
+    fn scale_from_variant(
+        value: &Variant<'_, '_>,
+        scale: i8,
+        precision: u8,
+    ) -> Option<<Self as ArrowPrimitiveType>::Native>;
+}
+
+macro_rules! impl_variant_decimal_scaler {
+    ($t:ty, $variant_method:ident, $to_native:expr, $validate:path) => {
+        impl VariantDecimalScaler for $t {
+            fn scale_from_variant(

Review Comment:
   nit: Maybe this naming would be easier to understand?
   ```suggestion
   macro_rules! impl_rescale_variant_decimal {
       ($t:ty, $variant_method:ident, $to_native:expr, $validate:path) => {
           impl RescaleVariantDecimal for $t {
               fn rescale_variant_decimal(
   ```
   



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
 impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
 impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
 
+macro_rules! scale_variant_decimal {
+    ($variant:expr, $variant_method:ident, $to_int_ty:expr, 
$output_scale:expr, $precision:expr, $validate:path) => {{
+        (|| -> Option<_> {
+            let variant = $variant.$variant_method()?;
+            let input_scale = variant.scale() as i8;
+            let variant = $to_int_ty(variant.integer());
+            let ten = $to_int_ty(10);

Review Comment:
   Could also "cheat" and do:
   ```rust
   let zero: $to_int_ty = 0;
   ```
   ... and then e.g. `1 + zero` or `10 + zero` other places. maybe that's too 
"clever" tho.



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
 impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
 impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
 
+macro_rules! scale_variant_decimal {
+    ($variant:expr, $variant_method:ident, $to_int_ty:expr, 
$output_scale:expr, $precision:expr, $validate:path) => {{
+        (|| -> Option<_> {
+            let variant = $variant.$variant_method()?;
+            let input_scale = variant.scale() as i8;
+            let variant = $to_int_ty(variant.integer());

Review Comment:
   ah, looking at the call sites, this is indeed a conversion function we 
invoke...



##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -2679,4 +2687,371 @@ mod test {
 
         Arc::new(struct_array)
     }
+
+    macro_rules! max_unscaled_value {
+        (32, $precision:expr) => {
+            (u32::pow(10, $precision as u32) - 1) as i32
+        };
+        (64, $precision:expr) => {
+            (u64::pow(10, $precision as u32) - 1) as i64
+        };
+        (128, $precision:expr) => {
+            (u128::pow(10, $precision as u32) - 1) as i128
+        };
+    }
+
+    #[test]
+    fn get_decimal32_unshredded_var_scales_to_scale2() {
+        // Build unshredded variant values with different scales
+        let mut builder = crate::VariantArrayBuilder::new(4);
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 
2).unwrap())); // 12.34
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 
3).unwrap())); // 1.234
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(1234, 
0).unwrap())); // 1234
+        builder.append_null();
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal32(9, 2), true);
+        let options = 
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+        assert_eq!(result.precision(), 9);
+        assert_eq!(result.scale(), 2);
+        assert_eq!(result.value(0), 1234);
+        assert_eq!(result.value(1), 123);
+        assert_eq!(result.value(2), 123400);
+        assert!(result.is_null(3));
+    }
+
+    #[test]
+    fn get_decimal32_unshredded_scale_down_rounding() {
+        let mut builder = crate::VariantArrayBuilder::new(2);
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(1235, 
3).unwrap()));
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(1245, 
3).unwrap()));
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(-1235, 
3).unwrap()));
+        builder.append_variant(Variant::from(VariantDecimal4::try_new(-1245, 
3).unwrap()));
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal32(9, 2), true);
+        let options = 
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+        assert_eq!(result.value(0), 124);
+        assert_eq!(result.value(1), 125);
+        assert_eq!(result.value(2), -124);
+        assert_eq!(result.value(3), -125);
+    }
+
+    #[test]
+    fn get_decimal32_precision_overflow_safe() {
+        // Exceed Decimal32 max precision (9) after scaling
+        let mut builder = crate::VariantArrayBuilder::new(1);
+        builder.append_variant(Variant::from(
+            VariantDecimal4::try_new(max_unscaled_value!(32, 
DECIMAL32_MAX_PRECISION), 0).unwrap(),

Review Comment:
   No need for the macro, these constants already exist:
   ```suggestion
               VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as 
i32, 0).unwrap(),
   ```



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,8 +151,10 @@ macro_rules! decimal_to_variant_decimal {
         let (v, scale) = if *$scale < 0 {
             // For negative scale, we need to multiply the value by 10^|scale|
             // For example: 123 with scale -2 becomes 12300 with scale 0
-            let multiplier = <$value_type>::pow(10, (-*$scale) as u32);
-            (<$value_type>::checked_mul($v, multiplier), 0u8)
+            let v = (10 as $value_type)
+                .checked_pow((-*$scale) as u32)
+                .and_then(|m| m.checked_mul($v));

Review Comment:
   Nice catch. Panicky math before, yipes! 
   
   Why the cast tho?
   ```suggestion
               let v = <$value_type>::checked_pow(10, (-*$scale) as u32)
                   .and_then(|m| m.checked_mul($v));
   ```



##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -145,62 +165,75 @@ pub(crate) fn 
make_primitive_variant_to_arrow_row_builder<'a>(
 ) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
     use PrimitiveVariantToArrowRowBuilder::*;
 
-    let builder = match data_type {
-        DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        _ if data_type.is_primitive() => {
-            return Err(ArrowError::NotYetImplemented(format!(
-                "Primitive data_type {data_type:?} not yet implemented"
-            )));
-        }
-        _ => {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Not a primitive type: {data_type:?}"
-            )));
-        }
-    };
+    let builder =
+        match data_type {
+            DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float16 => 
Float16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float32 => 
Float32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float64 => 
Float64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Decimal32(precision, scale) => Decimal32(
+                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,

Review Comment:
   To make sure I'm understanding correctly --
   * Here, the user has requested e.g. Decimal32, so we create a decimal32 row 
builder
   * The row builder invokes the `VariantDecimalScaler` trait, which eventually 
calls`Variant::as_decimal4`
   * If the actual variant value was a wider decimal type, the conversion will 
produce `None` unless the unscaled value fits in the narrower type _and_ the 
scale is small enough to fit as well (without rounding)?
   
   But in this case, the user specifically requested rounding, so it seems odd 
to fail some of the time and not fail other times? In particular, going from 
`Decimal32(9, 4)` to `Decimal32(9, 2)` would succeed with rounding, but going 
from `Decimal64(18, 4)` to `Decimal32(9, 2)` would fail for a value like 
`1234567.8901`, even tho the rescaled result `1234567.89` is a valid 
`Decimal32(9, 2)`?
   
   In order to correctly handle all valid narrowing conversions, we need to 
rescale+round first, using the original variant type, and _then_ try to narrow 
the result to the requested type.
   
   The converse hazard exists for widening, where we need to widen first, and 
_then_ rescale+round:
   * Converting the `Decimal32(9, 9)` value `0.999999999` to `Decimal64(*, 0)` 
produces an intermediate value ten decimal digits.
   * Converting the `Decimal(9, 0)` value `999999999` to `Decimal64(18, 9)` 
produces an intermediate (and final) value with 18 digits.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [Variant] Support variant to `Decimal32/64/128/256` [arrow-rs]

Reply via email to