This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push: new d6f40ce62b [Variant] Allow lossless casting from integer to floating point (#8357) d6f40ce62b is described below commit d6f40ce62b824af467acedc4da57fd6d22864a86 Author: Ryan Johnson <scov...@users.noreply.github.com> AuthorDate: Wed Sep 17 09:24:52 2025 -0600 [Variant] Allow lossless casting from integer to floating point (#8357) # Which issue does this PR close? We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Closes #NNN. # Rationale for this change Historically, `Variant::as_fXX` methods don't even try to cast int values as floating point, which is counter-intuitive. # What changes are included in this PR? Allow lossless casting of variant integer values to variant floating point values, by a naive determination of precision: * Every floating point number has some number of bits of precision * 53 (double) * 24 (single) * 11 (half) * Any integer that fits entirely inside the target floating point type's precision can be converted losslessly * This produces an intuitive result: "too big" numbers fail to convert, while "small enough" numbers do convert. * This is a sufficient but _not_ a necessary condition. * Technically, wider integer can be represented losslessly as well, as long as they have enough trailing zeros * It's unclear whether allowing those wider values to cast is actually helpful in practice, because only 1 in 2**k values can cast (where k is the number of bits of excess precision); it would certainly make input testing more expensive. # Are these changes tested? New unit tests and doc tests. # Are there any user-facing changes? Yes. Values that failed to cast before now succeed. --- parquet-variant/src/utils.rs | 17 ++++++++++++++ parquet-variant/src/variant.rs | 50 +++++++++++++++++++++++++++++++----------- 2 files changed, 54 insertions(+), 13 deletions(-) diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs index 872e90ad51..d28b8685ba 100644 --- a/parquet-variant/src/utils.rs +++ b/parquet-variant/src/utils.rs @@ -144,3 +144,20 @@ pub(crate) const fn expect_size_of<T>(expected: usize) { let _ = [""; 0][size]; } } + +pub(crate) fn fits_precision<const N: u32>(n: impl Into<i64>) -> bool { + n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_fits_precision() { + assert!(fits_precision::<10>(1023)); + assert!(!fits_precision::<10>(1024)); + assert!(fits_precision::<10>(-1023)); + assert!(!fits_precision::<10>(-1024)); + } +} diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index faaab94bc3..38ef5ba30a 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -28,7 +28,7 @@ use crate::decoder::{ self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType, }; use crate::path::{VariantPath, VariantPathElement}; -use crate::utils::{first_byte_from_slice, slice_from_slice}; +use crate::utils::{first_byte_from_slice, fits_precision, slice_from_slice}; use std::ops::Deref; use arrow_schema::ArrowError; @@ -1082,8 +1082,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Converts this variant to an `f16` if possible. /// - /// Returns `Some(f16)` for float and double variants, - /// `None` for non-floating-point variants. + /// Returns `Some(f16)` for floating point values, and integers with up to 11 bits of + /// precision. `None` otherwise. /// /// # Example /// @@ -1099,21 +1099,29 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(std::f64::consts::PI); /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI))); /// + /// // and from integers with no more than 11 bits of precision + /// let v3 = Variant::from(2047); + /// assert_eq!(v3.as_f16(), Some(f16::from_f32(2047.0))); + /// /// // but not from other variants - /// let v3 = Variant::from("hello!"); - /// assert_eq!(v3.as_f16(), None); + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_f16(), None); pub fn as_f16(&self) -> Option<f16> { match *self { Variant::Float(i) => Some(f16::from_f32(i)), Variant::Double(i) => Some(f16::from_f64(i)), + Variant::Int8(i) => Some(i.into()), + Variant::Int16(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)), + Variant::Int32(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)), + Variant::Int64(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)), _ => None, } } /// Converts this variant to an `f32` if possible. /// - /// Returns `Some(f32)` for float and double variants, - /// `None` for non-floating-point variants. + /// Returns `Some(f32)` for floating point values, and integer values with up to 24 bits of + /// precision. `None` otherwise. /// /// # Examples /// @@ -1128,23 +1136,31 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(std::f64::consts::PI); /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI)); /// + /// // and from integers with no more than 24 bits of precision + /// let v3 = Variant::from(16777215i64); + /// assert_eq!(v3.as_f32(), Some(16777215.0)); + /// /// // but not from other variants - /// let v3 = Variant::from("hello!"); - /// assert_eq!(v3.as_f32(), None); + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_f32(), None); /// ``` #[allow(clippy::cast_possible_truncation)] pub fn as_f32(&self) -> Option<f32> { match *self { Variant::Float(i) => Some(i), Variant::Double(i) => Some(i as f32), + Variant::Int8(i) => Some(i.into()), + Variant::Int16(i) => Some(i.into()), + Variant::Int32(i) if fits_precision::<24>(i) => Some(i as _), + Variant::Int64(i) if fits_precision::<24>(i) => Some(i as _), _ => None, } } /// Converts this variant to an `f64` if possible. /// - /// Returns `Some(f64)` for float and double variants, - /// `None` for non-floating-point variants. + /// Returns `Some(f64)` for floating point values, and integer values with up to 53 bits of + /// precision. `None` otherwise. /// /// # Examples /// @@ -1159,14 +1175,22 @@ impl<'m, 'v> Variant<'m, 'v> { /// let v2 = Variant::from(std::f64::consts::PI); /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI)); /// + /// // and from integers with no more than 53 bits of precision + /// let v3 = Variant::from(9007199254740991i64); + /// assert_eq!(v3.as_f64(), Some(9007199254740991.0)); + /// /// // but not from other variants - /// let v3 = Variant::from("hello!"); - /// assert_eq!(v3.as_f64(), None); + /// let v4 = Variant::from("hello!"); + /// assert_eq!(v4.as_f64(), None); /// ``` pub fn as_f64(&self) -> Option<f64> { match *self { Variant::Float(i) => Some(i.into()), Variant::Double(i) => Some(i), + Variant::Int8(i) => Some(i.into()), + Variant::Int16(i) => Some(i.into()), + Variant::Int32(i) => Some(i.into()), + Variant::Int64(i) if fits_precision::<53>(i) => Some(i as _), _ => None, } }