Re: [PR] [Variant] Align cast logic for from/to_decimal for variant [arrow-rs]

via GitHub Mon, 13 Apr 2026 05:44:22 -0700


klion26 commented on code in PR #9689:
URL: https://github.com/apache/arrow-rs/pull/9689#discussion_r3072705088



##########
arrow-cast/src/cast/mod.rs:
##########
@@ -72,9 +72,26 @@ use arrow_schema::*;
 use arrow_select::take::take;
 use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive};
 
-pub use decimal::{DecimalCast, rescale_decimal};
+pub use decimal::{
+    DecimalCast, cast_single_decimal_to_integer, 
parse_string_to_decimal_native, rescale_decimal,
+    single_float_to_decimal,
+};
 pub use string::cast_single_string_to_boolean_default;
 
+/// Lossy conversion from decimal to float.
+///
+/// Conversion is lossy and follows standard floating point semantics. Values
+/// that exceed the representable range become `INFINITY` or `-INFINITY` 
without
+/// returning an error.
+#[inline]
+pub fn single_decimal_to_float_lossy<D, F>(f: &F, x: D::Native, scale: i32) -> 
f64
+where
+    D: DecimalType,
+    F: Fn(D::Native) -> f64,
+{
+    f(x) / 10_f64.powi(scale)

Review Comment:
   Fixed, thanks for the performcne numbers.



##########
arrow-cast/src/cast/decimal.rs:
##########
@@ -826,84 +837,63 @@ where
 
     let mut value_builder = PrimitiveBuilder::<T>::with_capacity(array.len());
 
-    if scale < 0 {
-        match cast_options.safe {
-            true => {
-                for i in 0..array.len() {
-                    if array.is_null(i) {
-                        value_builder.append_null();
-                    } else {
-                        let v = array
-                            .value(i)
-                            .mul_checked(div)
-                            .ok()
-                            .and_then(<T::Native as 
NumCast>::from::<D::Native>);
-                        value_builder.append_option(v);
-                    }
-                }
-            }
-            false => {
-                for i in 0..array.len() {
-                    if array.is_null(i) {
-                        value_builder.append_null();
-                    } else {
-                        let v = array.value(i).mul_checked(div)?;
-
-                        let value =
-                            <T::Native as 
NumCast>::from::<D::Native>(v).ok_or_else(|| {
-                                ArrowError::CastError(format!(
-                                    "value of {:?} is out of range {}",
-                                    v,
-                                    T::DATA_TYPE
-                                ))
-                            })?;
-
-                        value_builder.append_value(value);
-                    }
-                }
-            }
-        }
-    } else {
-        match cast_options.safe {
-            true => {
-                for i in 0..array.len() {
-                    if array.is_null(i) {
-                        value_builder.append_null();
-                    } else {
-                        let v = array
-                            .value(i)
-                            .div_checked(div)
-                            .ok()
-                            .and_then(<T::Native as 
NumCast>::from::<D::Native>);
-                        value_builder.append_option(v);
-                    }
+    for i in 0..array.len() {
+        if array.is_null(i) {
+            value_builder.append_null();
+        } else {
+            match cast_options.safe {
+                true => {
+                    let v = cast_single_decimal_to_integer::<D, T::Native>(

Review Comment:
   Thanks for the detailed explain. fixed.



##########
arrow-cast/src/cast/mod.rs:
##########
@@ -2314,10 +2331,10 @@ where
         Int32 => cast_decimal_to_integer::<D, Int32Type>(array, base, *scale, 
cast_options),
         Int64 => cast_decimal_to_integer::<D, Int64Type>(array, base, *scale, 
cast_options),
         Float32 => cast_decimal_to_float::<D, Float32Type, _>(array, |x| {
-            (as_float(x) / 10_f64.powi(*scale as i32)) as f32
+            single_decimal_to_float_lossy::<D, F>(&as_float, x, *scale as _) 
as f32
         }),
         Float64 => cast_decimal_to_float::<D, Float64Type, _>(array, |x| {
-            as_float(x) / 10_f64.powi(*scale as i32)
+            single_decimal_to_float_lossy::<D, F>(&as_float, x, *scale as _)

Review Comment:
   Yes, `i32::from(*scale)` is better, fixed.



##########
parquet-variant/src/variant.rs:
##########
@@ -1106,6 +1181,25 @@ impl<'m, 'v> Variant<'m, 'v> {
         self.as_num()
     }
 
+    fn convert_string_to_decimal<VD, D>(input: &str) -> Option<VD>
+    where
+        D: DecimalType,
+        VD: VariantDecimalType<Native = D::Native>,
+        D::Native: NumCast + DecimalCast,
+    {
+        // find the last '.'
+        let scale_usize = input
+            .rsplit_once('.')
+            .map(|(_, frac)| frac.len())
+            .unwrap_or(0);

Review Comment:
   Fixed, use `map_or_else` here as `map_or_default` is a nightly-only 
experimental API for now



##########
parquet-variant/src/variant.rs:
##########
@@ -815,9 +878,21 @@ impl<'m, 'v> Variant<'m, 'v> {
             Variant::Int64(i) => num_cast(i),
             Variant::Float(f) => num_cast(f),
             Variant::Double(d) => num_cast(d),
-            Variant::Decimal4(d) if d.scale() == 0 => num_cast(d.integer()),
-            Variant::Decimal8(d) if d.scale() == 0 => num_cast(d.integer()),
-            Variant::Decimal16(d) if d.scale() == 0 => num_cast(d.integer()),
+            Variant::Decimal4(d) => Self::cast_decimal_to_num::<Decimal32Type, 
T, _>(
+                d.integer(),
+                d.scale(),
+                |x: i32| x as f64,
+            ),
+            Variant::Decimal8(d) => Self::cast_decimal_to_num::<Decimal64Type, 
T, _>(
+                d.integer(),
+                d.scale(),
+                |x: i64| x as f64,
+            ),
+            Variant::Decimal16(d) => 
Self::cast_decimal_to_num::<Decimal128Type, T, _>(
+                d.integer(),
+                d.scale(),
+                |x: i128| x as f64,

Review Comment:
   Compiler didn't need this, fixed.



##########
parquet-variant/src/variant.rs:
##########
@@ -1106,6 +1181,25 @@ impl<'m, 'v> Variant<'m, 'v> {
         self.as_num()
     }
 
+    fn convert_string_to_decimal<VD, D>(input: &str) -> Option<VD>
+    where
+        D: DecimalType,
+        VD: VariantDecimalType<Native = D::Native>,
+        D::Native: NumCast + DecimalCast,
+    {
+        // find the last '.'
+        let scale_usize = input
+            .rsplit_once('.')
+            .map(|(_, frac)| frac.len())
+            .unwrap_or(0);
+
+        let scale = u8::try_from(scale_usize).ok()?;
+
+        parse_string_to_decimal_native::<D>(input, scale_usize)
+            .ok()
+            .and_then(|raw| VD::try_new(raw, scale).ok())

Review Comment:
   Fixed, more readable now



##########
parquet-variant/src/variant.rs:
##########
@@ -1106,6 +1181,25 @@ impl<'m, 'v> Variant<'m, 'v> {
         self.as_num()
     }
 
+    fn convert_string_to_decimal<VD, D>(input: &str) -> Option<VD>

Review Comment:
   Fixed



##########
parquet-variant/src/variant.rs:
##########
@@ -1199,14 +1317,27 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
     /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 
2).ok());
     ///
+    /// // or from a string variant if it can be parsed as decimal
+    /// let v2 = Variant::from("123.45");
+    /// assert_eq!(v2.as_decimal16(), VariantDecimal16::try_new(12345, 
2).ok());
+    ///
     /// // but not if the variant is not a decimal
-    /// let v2 = Variant::from("hello!");
-    /// assert_eq!(v2.as_decimal16(), None);
+    /// let v3 = Variant::from("hello!");
+    /// assert_eq!(v3.as_decimal16(), None);
     /// ```
     pub fn as_decimal16(&self) -> Option<VariantDecimal16> {
         match *self {
-            Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | 
Variant::Int64(_) => {
-                self.as_num::<i128>().and_then(|x| x.try_into().ok())
+            Variant::Int8(_) | Variant::Int16(_) | Variant::Int32(_) | 
Variant::Int64(_) => self
+                .as_num::<i64>()
+                .map(|x| (x as i128).try_into().ok())

Review Comment:
   There is no `UInt128` arrow type. `as_num` needs the type to implement 
`DecimalCastTarget`, which needs a corresponding arrow type `UInt128`.
   
   Changed to `i128::from(..)`
   



##########
arrow-cast/src/cast/decimal.rs:
##########
@@ -826,84 +837,63 @@ where
 
     let mut value_builder = PrimitiveBuilder::<T>::with_capacity(array.len());
 
-    if scale < 0 {
-        match cast_options.safe {
-            true => {
-                for i in 0..array.len() {
-                    if array.is_null(i) {
-                        value_builder.append_null();
-                    } else {
-                        let v = array
-                            .value(i)
-                            .mul_checked(div)
-                            .ok()
-                            .and_then(<T::Native as 
NumCast>::from::<D::Native>);
-                        value_builder.append_option(v);
-                    }
-                }
-            }
-            false => {
-                for i in 0..array.len() {
-                    if array.is_null(i) {
-                        value_builder.append_null();
-                    } else {
-                        let v = array.value(i).mul_checked(div)?;
-
-                        let value =
-                            <T::Native as 
NumCast>::from::<D::Native>(v).ok_or_else(|| {
-                                ArrowError::CastError(format!(
-                                    "value of {:?} is out of range {}",
-                                    v,
-                                    T::DATA_TYPE
-                                ))
-                            })?;
-
-                        value_builder.append_value(value);
-                    }
-                }
-            }
-        }
-    } else {
-        match cast_options.safe {
-            true => {
-                for i in 0..array.len() {
-                    if array.is_null(i) {
-                        value_builder.append_null();
-                    } else {
-                        let v = array
-                            .value(i)
-                            .div_checked(div)
-                            .ok()
-                            .and_then(<T::Native as 
NumCast>::from::<D::Native>);
-                        value_builder.append_option(v);
-                    }
+    for i in 0..array.len() {
+        if array.is_null(i) {
+            value_builder.append_null();
+        } else {
+            match cast_options.safe {
+                true => {
+                    let v = cast_single_decimal_to_integer::<D, T::Native>(
+                        array.value(i),
+                        div,
+                        scale as _,

Review Comment:
   To avoid the overlfow `cast_single_decimal_to_integer` receives `i16`, the 
scale of arrow decimal is `i8` and `VariantDecimal` is `u8`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [Variant] Align cast logic for from/to_decimal for variant [arrow-rs]

Reply via email to