scovich commented on code in PR #8552:
URL: https://github.com/apache/arrow-rs/pull/8552#discussion_r2432443526
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
|timestamp| Self::make_value(timestamp.naive_utc())
);
+/// Returns the unscaled integer representation for Arrow decimal type `O`
+/// from a `Variant`.
+///
+/// - `precision` and `scale` specify the target Arrow decimal parameters
+/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
+/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
+///
+/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
+/// returns `None` if it cannot fit the requested precision.
+pub(crate) fn variant_to_unscaled_decimal<O>(
+ variant: &Variant<'_, '_>,
+ precision: u8,
+ scale: i8,
+) -> Option<O::Native>
+where
+ O: DecimalType,
+ O::Native: DecimalCast,
+{
+ match variant {
+ Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
+ *i as i32,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
+ *i as i32,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
+ *i,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
+ *i,
+ VariantDecimal8::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
+ d.integer(),
+ VariantDecimal4::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
+ d.integer(),
+ VariantDecimal8::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
+ d.integer(),
+ VariantDecimal16::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ _ => None,
+ }
+}
+
+/// Rescale a decimal from (input_precision, input_scale) to
(output_precision, output_scale)
+/// and return the scaled value if it fits the output precision. Similar to
the implementation in
+/// decimal.rs in arrow-cast.
+pub(crate) fn rescale_decimal<I, O>(
+ value: I::Native,
+ input_precision: u8,
+ input_scale: i8,
+ output_precision: u8,
+ output_scale: i8,
+) -> Option<O::Native>
+where
+ I: DecimalType,
+ O: DecimalType,
+ I::Native: DecimalCast,
+ O::Native: DecimalCast,
+{
+ let delta_scale = output_scale - input_scale;
+
+ // Determine if the cast is infallible based on precision/scale math
+ let is_infallible_cast =
+ is_infallible_decimal_cast(input_precision, input_scale,
output_precision, output_scale);
+
Review Comment:
nit: move this whole block down to where we actually use it -- declare near
first (only) use
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
|timestamp| Self::make_value(timestamp.naive_utc())
);
+/// Returns the unscaled integer representation for Arrow decimal type `O`
+/// from a `Variant`.
+///
+/// - `precision` and `scale` specify the target Arrow decimal parameters
+/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
+/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
+///
+/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
+/// returns `None` if it cannot fit the requested precision.
+pub(crate) fn variant_to_unscaled_decimal<O>(
+ variant: &Variant<'_, '_>,
+ precision: u8,
+ scale: i8,
+) -> Option<O::Native>
+where
+ O: DecimalType,
+ O::Native: DecimalCast,
+{
+ match variant {
+ Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
+ *i as i32,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
+ *i as i32,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
+ *i,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
+ *i,
+ VariantDecimal8::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
+ d.integer(),
+ VariantDecimal4::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
+ d.integer(),
+ VariantDecimal8::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
+ d.integer(),
+ VariantDecimal16::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ _ => None,
+ }
+}
+
+/// Rescale a decimal from (input_precision, input_scale) to
(output_precision, output_scale)
+/// and return the scaled value if it fits the output precision. Similar to
the implementation in
+/// decimal.rs in arrow-cast.
+pub(crate) fn rescale_decimal<I, O>(
+ value: I::Native,
+ input_precision: u8,
+ input_scale: i8,
+ output_precision: u8,
+ output_scale: i8,
+) -> Option<O::Native>
+where
+ I: DecimalType,
+ O: DecimalType,
+ I::Native: DecimalCast,
+ O::Native: DecimalCast,
+{
+ let delta_scale = output_scale - input_scale;
+
+ // Determine if the cast is infallible based on precision/scale math
+ let is_infallible_cast =
+ is_infallible_decimal_cast(input_precision, input_scale,
output_precision, output_scale);
+
+ let scaled = if delta_scale == 0 {
+ O::Native::from_decimal(value)
+ } else if delta_scale > 0 {
+ let mul = O::Native::from_decimal(10_i128)
+ .and_then(|t| t.pow_checked(delta_scale as u32).ok())?;
Review Comment:
Could use the same performance optimization as the negative scale case below:
```suggestion
let max = O::MAX_FOR_EACH_PRECISION.get(delta_scale)?;
let mul = max.add_wrapping(O::Native::ONE);
```
(it didn't matter much in the columnar decimal cast code, but it probably
does matter in row-wise variant cast code)
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
|timestamp| Self::make_value(timestamp.naive_utc())
);
+/// Returns the unscaled integer representation for Arrow decimal type `O`
+/// from a `Variant`.
+///
+/// - `precision` and `scale` specify the target Arrow decimal parameters
+/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
+/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
+///
+/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
+/// returns `None` if it cannot fit the requested precision.
+pub(crate) fn variant_to_unscaled_decimal<O>(
+ variant: &Variant<'_, '_>,
+ precision: u8,
+ scale: i8,
+) -> Option<O::Native>
+where
+ O: DecimalType,
+ O::Native: DecimalCast,
+{
+ match variant {
+ Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
+ *i as i32,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
+ *i as i32,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
+ *i,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
+ *i,
+ VariantDecimal8::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
+ d.integer(),
+ VariantDecimal4::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
+ d.integer(),
+ VariantDecimal8::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
+ d.integer(),
+ VariantDecimal16::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ _ => None,
+ }
+}
+
+/// Rescale a decimal from (input_precision, input_scale) to
(output_precision, output_scale)
+/// and return the scaled value if it fits the output precision. Similar to
the implementation in
+/// decimal.rs in arrow-cast.
+pub(crate) fn rescale_decimal<I, O>(
+ value: I::Native,
+ input_precision: u8,
+ input_scale: i8,
+ output_precision: u8,
+ output_scale: i8,
+) -> Option<O::Native>
+where
+ I: DecimalType,
+ O: DecimalType,
Review Comment:
tiny nit to consider (saves space)
```suggestion
pub(crate) fn rescale_decimal<I: DecimalType, O: DecimalType>(
value: I::Native,
input_precision: u8,
input_scale: i8,
output_precision: u8,
output_scale: i8,
) -> Option<O::Native>
where
```
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
|timestamp| Self::make_value(timestamp.naive_utc())
);
+/// Returns the unscaled integer representation for Arrow decimal type `O`
+/// from a `Variant`.
+///
+/// - `precision` and `scale` specify the target Arrow decimal parameters
+/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
+/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
+///
+/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
+/// returns `None` if it cannot fit the requested precision.
+pub(crate) fn variant_to_unscaled_decimal<O>(
+ variant: &Variant<'_, '_>,
+ precision: u8,
+ scale: i8,
+) -> Option<O::Native>
+where
+ O: DecimalType,
+ O::Native: DecimalCast,
+{
+ match variant {
+ Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
+ *i as i32,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
+ *i as i32,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
+ *i,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
+ *i,
+ VariantDecimal8::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
+ d.integer(),
+ VariantDecimal4::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
+ d.integer(),
+ VariantDecimal8::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
+ d.integer(),
+ VariantDecimal16::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ _ => None,
+ }
+}
+
+/// Rescale a decimal from (input_precision, input_scale) to
(output_precision, output_scale)
+/// and return the scaled value if it fits the output precision. Similar to
the implementation in
+/// decimal.rs in arrow-cast.
+pub(crate) fn rescale_decimal<I, O>(
+ value: I::Native,
+ input_precision: u8,
+ input_scale: i8,
+ output_precision: u8,
+ output_scale: i8,
+) -> Option<O::Native>
+where
+ I: DecimalType,
+ O: DecimalType,
+ I::Native: DecimalCast,
+ O::Native: DecimalCast,
+{
+ let delta_scale = output_scale - input_scale;
+
+ // Determine if the cast is infallible based on precision/scale math
+ let is_infallible_cast =
+ is_infallible_decimal_cast(input_precision, input_scale,
output_precision, output_scale);
+
+ let scaled = if delta_scale == 0 {
+ O::Native::from_decimal(value)
+ } else if delta_scale > 0 {
+ let mul = O::Native::from_decimal(10_i128)
+ .and_then(|t| t.pow_checked(delta_scale as u32).ok())?;
Review Comment:
Also -- we should benchmark, but it might be faster to multiply by one than
to execute the branch that distinguishes between zero and positive delta scale.
If so, we would want code like this:
```rust
let (scaled, is_infallible_cast) = if delta_scale < 0 {
// ... big comment about why ...
let is_infallible = input_precision + delta_scale < output_precision;
// ... comment about dividing out too many digits ...
let delta_scale = delta_scale.unsigned_abs() as usize;
let Some(max) = ... else { ... return zero ... };
...
(O::Native::from_decimal(adjusted)?, is_infallible_cast)
} else {
// ... big comment explaining why ...
let is_infallible_cast = input_precision + delta_scale <=
output_precision;
let max = O::MAX_FOR_EACH_PRECISION.get(delta_scale)?;
let mul = max.add_wrapping(O::Native::ONE);
let x = O::Native::from_decimal(value)?;
(x.mul_checked(mul).ok()?, is_infallible_cast)
}
(is_infallible_cast || O::is_valid_decimal_precision(scaled,
output_precision)).then(scaled)
```
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,6 +114,171 @@ impl_timestamp_from_variant!(
|timestamp| Self::make_value(timestamp.naive_utc())
);
+/// Returns the unscaled integer representation for Arrow decimal type `O`
+/// from a `Variant`.
+///
+/// - `precision` and `scale` specify the target Arrow decimal parameters
+/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
+/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
+///
+/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
+/// returns `None` if it cannot fit the requested precision.
+pub(crate) fn variant_to_unscaled_decimal<O>(
+ variant: &Variant<'_, '_>,
+ precision: u8,
+ scale: i8,
+) -> Option<O::Native>
+where
+ O: DecimalType,
+ O::Native: DecimalCast,
+{
+ match variant {
+ Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
+ *i as i32,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
+ *i as i32,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
+ *i,
+ VariantDecimal4::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
+ *i,
+ VariantDecimal8::MAX_PRECISION,
+ 0,
+ precision,
+ scale,
+ ),
+ Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
+ d.integer(),
+ VariantDecimal4::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
+ d.integer(),
+ VariantDecimal8::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
+ d.integer(),
+ VariantDecimal16::MAX_PRECISION,
+ d.scale() as i8,
+ precision,
+ scale,
+ ),
+ _ => None,
+ }
+}
+
+/// Rescale a decimal from (input_precision, input_scale) to
(output_precision, output_scale)
+/// and return the scaled value if it fits the output precision. Similar to
the implementation in
+/// decimal.rs in arrow-cast.
+pub(crate) fn rescale_decimal<I, O>(
+ value: I::Native,
+ input_precision: u8,
+ input_scale: i8,
+ output_precision: u8,
+ output_scale: i8,
+) -> Option<O::Native>
+where
+ I: DecimalType,
+ O: DecimalType,
+ I::Native: DecimalCast,
+ O::Native: DecimalCast,
+{
+ let delta_scale = output_scale - input_scale;
+
+ // Determine if the cast is infallible based on precision/scale math
+ let is_infallible_cast =
+ is_infallible_decimal_cast(input_precision, input_scale,
output_precision, output_scale);
+
+ let scaled = if delta_scale == 0 {
+ O::Native::from_decimal(value)
+ } else if delta_scale > 0 {
+ let mul = O::Native::from_decimal(10_i128)
+ .and_then(|t| t.pow_checked(delta_scale as u32).ok())?;
+ O::Native::from_decimal(value).and_then(|x| x.mul_checked(mul).ok())
+ } else {
+ // delta_scale is guaranteed to be > 0, but may also be larger than
I::MAX_PRECISION. If so, the
+ // scale change divides out more digits than the input has precision
and the result of the cast
+ // is always zero. For example, if we try to apply delta_scale=10 a
decimal32 value, the largest
+ // possible result is 999999999/10000000000 = 0.0999999999, which
rounds to zero. Smaller values
+ // (e.g. 1/10000000000) or larger delta_scale (e.g.
999999999/10000000000000) produce even
+ // smaller results, which also round to zero. In that case, just
return an array of zeros.
+ let delta_scale = delta_scale.unsigned_abs() as usize;
+ let Some(max) = I::MAX_FOR_EACH_PRECISION.get(delta_scale) else {
+ return Some(O::Native::ZERO);
+ };
+ let div = max.add_wrapping(I::Native::ONE);
+ let half =
div.div_wrapping(I::Native::ONE.add_wrapping(I::Native::ONE));
+ let half_neg = half.neg_wrapping();
+
+ // div is >= 10 and so this cannot overflow
+ let d = value.div_wrapping(div);
+ let r = value.mod_wrapping(div);
+
+ // Round result
+ let adjusted = match value >= I::Native::ZERO {
+ true if r >= half => d.add_wrapping(I::Native::ONE),
+ false if r <= half_neg => d.sub_wrapping(I::Native::ONE),
+ _ => d,
+ };
+ O::Native::from_decimal(adjusted)
+ };
+
+ scaled.filter(|v| is_infallible_cast || O::is_valid_decimal_precision(*v,
output_precision))
+}
+
+/// Returns true if casting from (input_precision, input_scale) to
+/// (output_precision, output_scale) is infallible based on precision/scale
math.
+fn is_infallible_decimal_cast(
+ input_precision: u8,
+ input_scale: i8,
+ output_precision: u8,
+ output_scale: i8,
+) -> bool {
+ let delta_scale = output_scale - input_scale;
Review Comment:
nit: we could have passed this in, our caller already computed it. but I
guess this is more regular?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]