scovich commented on code in PR #8552:
URL: https://github.com/apache/arrow-rs/pull/8552#discussion_r2407870910


##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -145,62 +165,75 @@ pub(crate) fn 
make_primitive_variant_to_arrow_row_builder<'a>(
 ) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
     use PrimitiveVariantToArrowRowBuilder::*;
 
-    let builder = match data_type {
-        DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
-            cast_options,
-            capacity,
-        )),
-        _ if data_type.is_primitive() => {
-            return Err(ArrowError::NotYetImplemented(format!(
-                "Primitive data_type {data_type:?} not yet implemented"
-            )));
-        }
-        _ => {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Not a primitive type: {data_type:?}"
-            )));
-        }
-    };
+    let builder =
+        match data_type {
+            DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float16 => 
Float16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float32 => 
Float32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float64 => 
Float64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Decimal32(precision, scale) => Decimal32(
+                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, 
*precision, *scale)?,

Review Comment:
   Looking at all possible combinations:
   * We are converting unscaled value `v1` of type `Variant::DecimalXX(p1, s1)` 
to `datatypes::DecimalYY(p2, s2)`
   * The variant decimals have implied precision, so p1 is always one of {9, 
19, 38} based on decimal type
   * let `n1 = p1-s1` and `n2 = p2-s2` (the max number of integer digits before 
and after conversion)
   * if n2 < n1, there is an inherent risk of overflow regardless of what 
scales are involved
     * before even looking at scale and scale changes, we should first verify 
that `v1` fits in `n2+s1` digits. If not, flag overflow immediately. Otherwise, 
set n1=n2 and proceed to the next case.
     * NOTE: This check does _NOT_ require changing the type of `v1`, because 
total precision decreased.
   * else if n2 = n1 and s2 < s1, there is an overflow risk when e.g. 0.999 
rounds to 1.00
     * Rescale, and then verify that the rounded result fits in `p2` digits. 
     * NOTE: This check does _NOT_ require changing the type of `v1`, because 
total precision decreased.
   * else, there is no risk of overflow
     * Convert `v1` to the new native type first
     * Then rescale and round as needed
     * NOTE: Both operations are infallible
   
   That would correspond to something like the following code:
   
   <details>
   
   ```rust
   fn variant_to_unscaled_decimal32(
       variant: Variant<'_, '_>, 
       precision: u8, 
       scale: u8,
   ) -> Result<i32> {
       match variant {
           Variant::Decimal4(d) => {
               let s1 = d.scale();
               let mut n1 = VariantDecimal4::MAX_PRECISION - s1;
               let n2 = precision - scale;
               let v1 = d.integer();
               if n2 < n1 {
                   // integer digits pose an overflow risk, and n2+s1 could 
even be out of precision range
                   let max_value = MAX_DECIMAL32_FOR_EACH_PRECISION.get(n2 + 
s1);
                   if max_value.is_none_or(|n| v1.unsigned_abs() > n) {
                       return Err(... overflow ...);
                   }
                   // else the value fits in n2 digits and we can pretend n1=n2
                   n1 = n2;
               }
   
               if n2 == n1 {
                   let v2 = ... rescale v1 and round up ...;
                   if v2.unsigned_abs() > 
MAX_DECIMAL32_FOR_EACH_PRECISION[precision] {
                       return Err(... overflow ...);
                   }
                   // else the value can safely convert to the target type
                   return Ok(v2 as _);
               }
   
               // no overflow possible, but still have to rescale and round
               let v1 = v1 as _;
               let v2 = ... rescale v1 and round up ...;
               Ok(v2)
           }
           Variant::Decimal8(d) => {
               ... almost the same code as for Decimal4 case ...
               ... except we use VariantDecimal8::MAX_PRECISION ...
               ... and we index into MAX_DECIMAL64_FOR_EACH_PRECISION ...
           }
           Variant::Decimal16(d) => {
               ... almost the same code as for Decimal4 case ...
               ... except we use VariantDecimal16::MAX_PRECISION ...
               ... and we index into MAX_DECIMAL128_FOR_EACH_PRECISION ...
           }
           Variant::Int8(i) => { ... treat it like `Variant::Decimal4(i, 0)` 
... }
           Variant::Int16(i) => { ... treat it like `Variant::Decimal4(i, 0)` 
... }
           Variant::Int32(i) => { ... treat it like `Variant::Decimal8(i, 0)` 
... }
           Variant::Int64(i) => { ... treat it like `Variant::Decimal16(i, 0)` 
... }
           _ => return Err(... not exact numeric data ...),
       }
   }
   
   fn variant_to_unscaled_decimal64(
       variant: Variant<'_, '_>, 
       precision: u8, 
       scale: u8,
   ) -> Result<i64> {
       ... exactly the same code as for decimal32 case ...
       ... but the changed return type means the `as _` casts now produce i64 
...
   }
   
   fn variant_to_unscaled_decimal128(
       variant: Variant<'_, '_>, 
       precision: u8, 
       scale: u8,
   ) -> Result<i128> {
       ... exactly the same code as for decimal32 case ...
       ... but the changed return type means the `as _` casts now produce i128 
...
   }
   
   fn variant_to_unscaled_decimal256(
       variant: Variant<'_, '_>, 
       precision: u8, 
       scale: u8,
   ) -> Result<i256> {
       ... exactly the same code as for decimal32 case ...
       ... but the changed return type means the `as _` casts now produce i256 
...
   }
   ```
   
   So, we'd want two macros:
   * Outer macro that produces the body of `variant_to_unscaled_decimalXX` 
functions
   * Inner macro that produces the body of `Variant::DecimalXX` match arms
   
   We need macros because integer types lack any helpful trait hierarchy that 
generics could take advantage of.
   
   </details>



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to