scovich commented on code in PR #8552:
URL: https://github.com/apache/arrow-rs/pull/8552#discussion_r2407040670
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
+ let ten = $to_int_ty(10);
+
+ let scaled = if input_scale == $output_scale {
+ Some(variant)
+ } else if input_scale < $output_scale {
+ // scale_up means output has more fractional digits than input
+ // multiply integer by 10^(output_scale - input_scale)
+ let delta = ($output_scale - input_scale) as u32;
+ let mul = ten.checked_pow(delta)?;
+ variant.checked_mul(mul)
+ } else {
+ // scale_down means output has fewer fractional digits than
input
+ // divide by 10^(input_scale - output_scale) with rounding
+ let delta = (input_scale - $output_scale) as u32;
+ let div = ten.checked_pow(delta)?;
+ let v = variant;
+ let d = v.checked_div(div)?;
+ let r = v % div;
+
+ // rounding in the same way as
convert_to_smaller_scale_decimal in arrow-cast
+ let half = div.checked_div($to_int_ty(2))?;
+ let half_neg = half.checked_neg()?;
+ let adjusted = match v >= $to_int_ty(0) {
+ true if r >= half => d.checked_add($to_int_ty(1))?,
+ false if r <= half_neg => d.checked_sub($to_int_ty(1))?,
+ _ => d,
+ };
+ Some(adjusted)
+ };
+
+ scaled.filter(|v| $validate(*v, $precision))
+ })()
Review Comment:
Why is `(|| -> Option<_> { ... })()` needed here? The macro body _should_
Just Work because the macro is invoked inside a method that already returns
`Option`?
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
Review Comment:
How does this conversion work? I'm guessing it's actually _not_ a conversion
at all? if it's _not_ a conversion, we can avoid passing the redundant
`$to_int_ty` by a type inference trick:
```rust
let zero = 0;
let variant = variant.integer() + zero; // forces `zero` to the native type
let ten = 10 + zero; // forces `ten` to also be the native type
```
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
+ let ten = $to_int_ty(10);
+
+ let scaled = if input_scale == $output_scale {
+ Some(variant)
+ } else if input_scale < $output_scale {
+ // scale_up means output has more fractional digits than input
+ // multiply integer by 10^(output_scale - input_scale)
+ let delta = ($output_scale - input_scale) as u32;
+ let mul = ten.checked_pow(delta)?;
+ variant.checked_mul(mul)
+ } else {
+ // scale_down means output has fewer fractional digits than
input
+ // divide by 10^(input_scale - output_scale) with rounding
+ let delta = (input_scale - $output_scale) as u32;
+ let div = ten.checked_pow(delta)?;
+ let v = variant;
+ let d = v.checked_div(div)?;
+ let r = v % div;
+
+ // rounding in the same way as
convert_to_smaller_scale_decimal in arrow-cast
Review Comment:
Looking at `convert_to_smaller_scale_decimal`, virtually all the logic in
that function is doing exactly what we want here... and then the last line just
applies the calculation as an appropriate unary operation on the input `array`.
Rather than duplicate the logic, is there some way we could factor it out or
otherwise reuse it? Problem is, it's in a different crate, so the factored out
logic would have to be `pub`...
##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -2679,4 +2687,371 @@ mod test {
Arc::new(struct_array)
}
+
+ macro_rules! max_unscaled_value {
+ (32, $precision:expr) => {
+ (u32::pow(10, $precision as u32) - 1) as i32
+ };
+ (64, $precision:expr) => {
+ (u64::pow(10, $precision as u32) - 1) as i64
+ };
+ (128, $precision:expr) => {
+ (u128::pow(10, $precision as u32) - 1) as i128
+ };
+ }
Review Comment:
I don't think we need this macro, see other comments below
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
+ let ten = $to_int_ty(10);
+
+ let scaled = if input_scale == $output_scale {
+ Some(variant)
+ } else if input_scale < $output_scale {
+ // scale_up means output has more fractional digits than input
+ // multiply integer by 10^(output_scale - input_scale)
+ let delta = ($output_scale - input_scale) as u32;
+ let mul = ten.checked_pow(delta)?;
+ variant.checked_mul(mul)
+ } else {
+ // scale_down means output has fewer fractional digits than
input
+ // divide by 10^(input_scale - output_scale) with rounding
Review Comment:
I'm a bit nervous about rounding (the whole point of decimal is to be
lossless, unlike floating point). But I guess in this case the user
specifically asked for the narrower type, so the usual worries about lossy
coercion don't apply?
##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -342,6 +375,117 @@ impl<'a, T: PrimitiveFromVariant>
VariantToPrimitiveArrowRowBuilder<'a, T> {
}
}
+// Minimal per-decimal hook: just wraps scale_variant_decimal! with correct
parameters
+pub(crate) trait VariantDecimalScaler: datatypes::DecimalType {
+ fn scale_from_variant(
+ value: &Variant<'_, '_>,
+ scale: i8,
+ precision: u8,
+ ) -> Option<<Self as ArrowPrimitiveType>::Native>;
+}
+
+macro_rules! impl_variant_decimal_scaler {
+ ($t:ty, $variant_method:ident, $to_native:expr, $validate:path) => {
+ impl VariantDecimalScaler for $t {
+ fn scale_from_variant(
+ value: &Variant<'_, '_>,
+ scale: i8,
+ precision: u8,
+ ) -> Option<<Self as ArrowPrimitiveType>::Native> {
+ scale_variant_decimal!(
+ value,
+ $variant_method,
+ $to_native,
+ scale,
+ precision,
+ $validate
+ )
+ }
+ }
+ };
+}
+
+impl_variant_decimal_scaler!(
+ Decimal32Type,
+ as_decimal4,
+ |x: i32| x,
+ is_validate_decimal32_precision
+);
+impl_variant_decimal_scaler!(
+ Decimal64Type,
+ as_decimal8,
+ |x: i64| x,
+ is_validate_decimal64_precision
+);
+impl_variant_decimal_scaler!(
+ Decimal128Type,
+ as_decimal16,
+ |x: i128| x,
+ is_validate_decimal_precision
+);
+impl_variant_decimal_scaler!(
+ Decimal256Type,
+ as_decimal16,
+ i256::from_i128,
+ is_validate_decimal256_precision
+);
+
+/// Builder for converting variant values to arrow Decimal values
+pub(crate) struct VariantToDecimalArrowRowBuilder<
+ 'a,
+ T: datatypes::DecimalType + VariantDecimalScaler,
+> {
Review Comment:
```suggestion
pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T:
VariantDecimalScaler> {
```
(`VariantDecimalScaler` already includes `DecimalType`)
(again below on the impl)
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
+ let ten = $to_int_ty(10);
+
+ let scaled = if input_scale == $output_scale {
+ Some(variant)
+ } else if input_scale < $output_scale {
+ // scale_up means output has more fractional digits than input
+ // multiply integer by 10^(output_scale - input_scale)
+ let delta = ($output_scale - input_scale) as u32;
+ let mul = ten.checked_pow(delta)?;
+ variant.checked_mul(mul)
+ } else {
+ // scale_down means output has fewer fractional digits than
input
+ // divide by 10^(input_scale - output_scale) with rounding
+ let delta = (input_scale - $output_scale) as u32;
+ let div = ten.checked_pow(delta)?;
+ let v = variant;
+ let d = v.checked_div(div)?;
+ let r = v % div;
Review Comment:
Why do we need both `v` and `variant`?
##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -2679,4 +2687,371 @@ mod test {
Arc::new(struct_array)
}
+
+ macro_rules! max_unscaled_value {
+ (32, $precision:expr) => {
+ (u32::pow(10, $precision as u32) - 1) as i32
+ };
+ (64, $precision:expr) => {
+ (u64::pow(10, $precision as u32) - 1) as i64
+ };
+ (128, $precision:expr) => {
+ (u128::pow(10, $precision as u32) - 1) as i128
+ };
+ }
+
+ #[test]
+ fn get_decimal32_unshredded_var_scales_to_scale2() {
+ // Build unshredded variant values with different scales
+ let mut builder = crate::VariantArrayBuilder::new(4);
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1234,
2).unwrap())); // 12.34
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1234,
3).unwrap())); // 1.234
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1234,
0).unwrap())); // 1234
+ builder.append_null();
+ let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+ let field = Field::new("result", DataType::Decimal32(9, 2), true);
+ let options =
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+ let result = variant_get(&variant_array, options).unwrap();
+ let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+ assert_eq!(result.precision(), 9);
+ assert_eq!(result.scale(), 2);
+ assert_eq!(result.value(0), 1234);
+ assert_eq!(result.value(1), 123);
+ assert_eq!(result.value(2), 123400);
+ assert!(result.is_null(3));
+ }
+
+ #[test]
+ fn get_decimal32_unshredded_scale_down_rounding() {
+ let mut builder = crate::VariantArrayBuilder::new(2);
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1235,
3).unwrap()));
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1245,
3).unwrap()));
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(-1235,
3).unwrap()));
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(-1245,
3).unwrap()));
+ let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+ let field = Field::new("result", DataType::Decimal32(9, 2), true);
+ let options =
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+ let result = variant_get(&variant_array, options).unwrap();
+ let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+ assert_eq!(result.value(0), 124);
+ assert_eq!(result.value(1), 125);
+ assert_eq!(result.value(2), -124);
+ assert_eq!(result.value(3), -125);
+ }
+
+ #[test]
+ fn get_decimal32_precision_overflow_safe() {
+ // Exceed Decimal32 max precision (9) after scaling
+ let mut builder = crate::VariantArrayBuilder::new(1);
+ builder.append_variant(Variant::from(
+ VariantDecimal4::try_new(max_unscaled_value!(32,
DECIMAL32_MAX_PRECISION), 0).unwrap(),
Review Comment:
(and, further, it looks like these are redundant
[MAX_DECIMAL32_FOR_EACH_PRECISION](https://docs.rs/arrow/latest/arrow/datatypes/constant.MAX_DECIMAL32_FOR_EACH_PRECISION.html),
e.g.
```rust
MAX_DECIMAL32_FOR_EACH_PRECISION[DECIMAL32_MAX_PRECISION]
```
(tho those constants are provided by `arrow-data` crate, which perhaps we
don't want the `parquet-variant` crate to take a dependency on)
##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -145,62 +165,75 @@ pub(crate) fn
make_primitive_variant_to_arrow_row_builder<'a>(
) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
use PrimitiveVariantToArrowRowBuilder::*;
- let builder = match data_type {
- DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- _ if data_type.is_primitive() => {
- return Err(ArrowError::NotYetImplemented(format!(
- "Primitive data_type {data_type:?} not yet implemented"
- )));
- }
- _ => {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Not a primitive type: {data_type:?}"
- )));
- }
- };
+ let builder =
+ match data_type {
+ DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float16 =>
Float16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float32 =>
Float32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float64 =>
Float64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Decimal32(precision, scale) => Decimal32(
+ VariantToDecimalArrowRowBuilder::new(cast_options, capacity,
*precision, *scale)?,
Review Comment:
Looking at all possible combinations:
* We are converting unscaled value `v1` of type `Variant::DecimalXX(p1, s1)`
to `datatypes::DecimalYY(p2, s2)`
* The variant decimals have implied precision, so p1 is always one of {9,
19, 38} based on decimal type
* let `n1 = p1-s1` and `n2 = p2-s2` (the max number of integer digits before
and after conversion)
* if n2 < n1, there is an inherent risk of overflow regardless of what
scales are involved
* before even looking at scale and scale changes, we should first verify
that `v1` fits in `n2+s1` digits. If not, flag overflow immediately. Otherwise,
set n1=n2 and proceed to the next case.
* NOTE: This check does _NOT_ require changing the type of `v1`, because
total precision decreased.
* else if n2 = n1 and s2 < s1, there is an overflow risk when e.g. 0.999
rounds to 1.00
* Rescale, and then verify that the rounded result fits in `p2` digits.
* NOTE: This check does _NOT_ require changing the type of `v1`, because
total precision decreased.
* else, there is no risk of overflow
* Convert `v1` to the new native type first
* Then rescale and round as needed
* NOTE: Both operations are infallible
That would correspond to something like the following code:
<details>
```rust
fn variant_to_unscaled_decimal32(
variant: Variant<'_, '_>,
precision: u8,
scale: u8,
) -> Result<i32> {
match variant {
Variant::Decimal4(d) => {
let s1 = d.scale();
let mut n1 = VariantDecimal4::MAX_PRECISION - s1;
let n2 = precision - scale;
let v1 = d.integer();
if n2 < n1 {
// integer digits pose an overflow risk
if v1.unsigned_abs() > MAX_DECIMAL32_FOR_EACH_PRECISION[n2 +
s1] {
return Err(... overflow ...);
}
// else the value fits in n2 digits and we can pretend n1=n2
n1 = n2;
}
if n2 == n1 {
let v2 = ... rescale v1 and round up ...;
if v2.unsigned_abs() >
MAX_DECIMAL32_FOR_EACH_PRECISION[precision] {
return Err(... overflow ...);
}
// else the value can safely convert to the target type
return Ok(v2 as _);
}
// no overflow possible, but still have to rescale and round
let v1 = v1 as _;
let v2 = ... rescale v1 and round up ...;
Ok(v2)
}
Variant::Decimal8(d) => {
... almost the same code as for Decimal4 case ...
... except we use VariantDecimal8::MAX_PRECISION ...
... and we index into MAX_DECIMAL64_FOR_EACH_PRECISION ...
}
Variant::Decimal16(d) => {
... almost the same code as for Decimal4 case ...
... except we use VariantDecimal16::MAX_PRECISION ...
... and we index into MAX_DECIMAL128_FOR_EACH_PRECISION ...
}
}
}
fn variant_to_unscaled_decimal64(
variant: Variant<'_, '_>,
precision: u8,
scale: u8,
) -> Result<i64> {
... exactly the same code as for decimal32 case ...
... but the changed return type means the `as _` casts now produce i64
...
}
fn variant_to_unscaled_decimal128(
variant: Variant<'_, '_>,
precision: u8,
scale: u8,
) -> Result<i128> {
... exactly the same code as for decimal32 case ...
... but the changed return type means the `as _` casts now produce i128
...
}
fn variant_to_unscaled_decimal256(
variant: Variant<'_, '_>,
precision: u8,
scale: u8,
) -> Result<i256> {
... exactly the same code as for decimal32 case ...
... but the changed return type means the `as _` casts now produce i256
...
}
```
So, we'd want two macros:
* Outer macro that produces the body of `variant_to_unscaled_decimalXX`
functions
* Inner macro that produces the body of `Variant::DecimalXX` match arms
We need macros because integer types lack any helpful trait hierarchy that
generics could take advantage of.
</details>
##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -342,6 +375,117 @@ impl<'a, T: PrimitiveFromVariant>
VariantToPrimitiveArrowRowBuilder<'a, T> {
}
}
+// Minimal per-decimal hook: just wraps scale_variant_decimal! with correct
parameters
+pub(crate) trait VariantDecimalScaler: datatypes::DecimalType {
+ fn scale_from_variant(
+ value: &Variant<'_, '_>,
+ scale: i8,
+ precision: u8,
+ ) -> Option<<Self as ArrowPrimitiveType>::Native>;
+}
+
+macro_rules! impl_variant_decimal_scaler {
+ ($t:ty, $variant_method:ident, $to_native:expr, $validate:path) => {
+ impl VariantDecimalScaler for $t {
+ fn scale_from_variant(
Review Comment:
nit: Maybe this naming would be easier to understand?
```suggestion
macro_rules! impl_rescale_variant_decimal {
($t:ty, $variant_method:ident, $to_native:expr, $validate:path) => {
impl RescaleVariantDecimal for $t {
fn rescale_variant_decimal(
```
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
+ let ten = $to_int_ty(10);
Review Comment:
Could also "cheat" and do:
```rust
let zero: $to_int_ty = 0;
```
... and then e.g. `1 + zero` or `10 + zero` other places. maybe that's too
"clever" tho.
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
Review Comment:
ah, looking at the call sites, this is indeed a conversion function we
invoke...
##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -2679,4 +2687,371 @@ mod test {
Arc::new(struct_array)
}
+
+ macro_rules! max_unscaled_value {
+ (32, $precision:expr) => {
+ (u32::pow(10, $precision as u32) - 1) as i32
+ };
+ (64, $precision:expr) => {
+ (u64::pow(10, $precision as u32) - 1) as i64
+ };
+ (128, $precision:expr) => {
+ (u128::pow(10, $precision as u32) - 1) as i128
+ };
+ }
+
+ #[test]
+ fn get_decimal32_unshredded_var_scales_to_scale2() {
+ // Build unshredded variant values with different scales
+ let mut builder = crate::VariantArrayBuilder::new(4);
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1234,
2).unwrap())); // 12.34
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1234,
3).unwrap())); // 1.234
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1234,
0).unwrap())); // 1234
+ builder.append_null();
+ let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+ let field = Field::new("result", DataType::Decimal32(9, 2), true);
+ let options =
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+ let result = variant_get(&variant_array, options).unwrap();
+ let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+ assert_eq!(result.precision(), 9);
+ assert_eq!(result.scale(), 2);
+ assert_eq!(result.value(0), 1234);
+ assert_eq!(result.value(1), 123);
+ assert_eq!(result.value(2), 123400);
+ assert!(result.is_null(3));
+ }
+
+ #[test]
+ fn get_decimal32_unshredded_scale_down_rounding() {
+ let mut builder = crate::VariantArrayBuilder::new(2);
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1235,
3).unwrap()));
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1245,
3).unwrap()));
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(-1235,
3).unwrap()));
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(-1245,
3).unwrap()));
+ let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+ let field = Field::new("result", DataType::Decimal32(9, 2), true);
+ let options =
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+ let result = variant_get(&variant_array, options).unwrap();
+ let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+ assert_eq!(result.value(0), 124);
+ assert_eq!(result.value(1), 125);
+ assert_eq!(result.value(2), -124);
+ assert_eq!(result.value(3), -125);
+ }
+
+ #[test]
+ fn get_decimal32_precision_overflow_safe() {
+ // Exceed Decimal32 max precision (9) after scaling
+ let mut builder = crate::VariantArrayBuilder::new(1);
+ builder.append_variant(Variant::from(
+ VariantDecimal4::try_new(max_unscaled_value!(32,
DECIMAL32_MAX_PRECISION), 0).unwrap(),
Review Comment:
No need for the macro, these constants already exist:
```suggestion
VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE as
i32, 0).unwrap(),
```
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -109,8 +151,10 @@ macro_rules! decimal_to_variant_decimal {
let (v, scale) = if *$scale < 0 {
// For negative scale, we need to multiply the value by 10^|scale|
// For example: 123 with scale -2 becomes 12300 with scale 0
- let multiplier = <$value_type>::pow(10, (-*$scale) as u32);
- (<$value_type>::checked_mul($v, multiplier), 0u8)
+ let v = (10 as $value_type)
+ .checked_pow((-*$scale) as u32)
+ .and_then(|m| m.checked_mul($v));
Review Comment:
Nice catch. Panicky math before, yipes!
Why the cast tho?
```suggestion
let v = <$value_type>::checked_pow(10, (-*$scale) as u32)
.and_then(|m| m.checked_mul($v));
```
##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -145,62 +165,75 @@ pub(crate) fn
make_primitive_variant_to_arrow_row_builder<'a>(
) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
use PrimitiveVariantToArrowRowBuilder::*;
- let builder = match data_type {
- DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- _ if data_type.is_primitive() => {
- return Err(ArrowError::NotYetImplemented(format!(
- "Primitive data_type {data_type:?} not yet implemented"
- )));
- }
- _ => {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Not a primitive type: {data_type:?}"
- )));
- }
- };
+ let builder =
+ match data_type {
+ DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float16 =>
Float16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float32 =>
Float32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float64 =>
Float64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Decimal32(precision, scale) => Decimal32(
+ VariantToDecimalArrowRowBuilder::new(cast_options, capacity,
*precision, *scale)?,
Review Comment:
To make sure I'm understanding correctly --
* Here, the user has requested e.g. Decimal32, so we create a decimal32 row
builder
* The row builder invokes the `VariantDecimalScaler` trait, which eventually
calls`Variant::as_decimal4`
* If the actual variant value was a wider decimal type, the conversion will
produce `None` unless the unscaled value fits in the narrower type _and_ the
scale is small enough to fit as well (without rounding)?
But in this case, the user specifically requested rounding, so it seems odd
to fail some of the time and not fail other times? In particular, going from
`Decimal32(9, 4)` to `Decimal32(9, 2)` would succeed with rounding, but going
from `Decimal64(18, 4)` to `Decimal32(9, 2)` would fail for a value like
`1234567.8901`, even tho the rescaled result `1234567.89` is a valid
`Decimal32(9, 2)`?
In order to correctly handle all valid narrowing conversions, we need to
rescale+round first, using the original variant type, and _then_ try to narrow
the result to the requested type.
The converse hazard exists for widening, where we need to widen first, and
_then_ rescale+round:
* Converting the `Decimal32(9, 9)` value `0.999999999` to `Decimal64(*, 0)`
produces an intermediate value ten decimal digits.
* Converting the `Decimal(9, 0)` value `999999999` to `Decimal64(18, 9)`
produces an intermediate (and final) value with 18 digits.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]