liamzwbao commented on code in PR #8552:
URL: https://github.com/apache/arrow-rs/pull/8552#discussion_r2408966328
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
+ let ten = $to_int_ty(10);
+
+ let scaled = if input_scale == $output_scale {
+ Some(variant)
+ } else if input_scale < $output_scale {
+ // scale_up means output has more fractional digits than input
+ // multiply integer by 10^(output_scale - input_scale)
+ let delta = ($output_scale - input_scale) as u32;
+ let mul = ten.checked_pow(delta)?;
+ variant.checked_mul(mul)
+ } else {
+ // scale_down means output has fewer fractional digits than
input
+ // divide by 10^(input_scale - output_scale) with rounding
+ let delta = (input_scale - $output_scale) as u32;
+ let div = ten.checked_pow(delta)?;
+ let v = variant;
+ let d = v.checked_div(div)?;
+ let r = v % div;
+
+ // rounding in the same way as
convert_to_smaller_scale_decimal in arrow-cast
+ let half = div.checked_div($to_int_ty(2))?;
+ let half_neg = half.checked_neg()?;
+ let adjusted = match v >= $to_int_ty(0) {
+ true if r >= half => d.checked_add($to_int_ty(1))?,
+ false if r <= half_neg => d.checked_sub($to_int_ty(1))?,
+ _ => d,
+ };
+ Some(adjusted)
+ };
+
+ scaled.filter(|v| $validate(*v, $precision))
+ })()
Review Comment:
yeah, it's redundant, removed
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
Review Comment:
yeah, there is an exception for i256
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
+ let ten = $to_int_ty(10);
Review Comment:
same here, the i256 type works different from others
##########
parquet-variant-compute/src/variant_to_arrow.rs:
##########
@@ -145,62 +165,75 @@ pub(crate) fn
make_primitive_variant_to_arrow_row_builder<'a>(
) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
use PrimitiveVariantToArrowRowBuilder::*;
- let builder = match data_type {
- DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
- cast_options,
- capacity,
- )),
- _ if data_type.is_primitive() => {
- return Err(ArrowError::NotYetImplemented(format!(
- "Primitive data_type {data_type:?} not yet implemented"
- )));
- }
- _ => {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Not a primitive type: {data_type:?}"
- )));
- }
- };
+ let builder =
+ match data_type {
+ DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float16 =>
Float16(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float32 =>
Float32(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Float64 =>
Float64(VariantToPrimitiveArrowRowBuilder::new(
+ cast_options,
+ capacity,
+ )),
+ DataType::Decimal32(precision, scale) => Decimal32(
+ VariantToDecimalArrowRowBuilder::new(cast_options, capacity,
*precision, *scale)?,
Review Comment:
Thanks for catching this! Let me dig a bit deeper and improve this
conversion. Indeed it's possible to get a null for a valid decimal.
For negative scale, I think it's covered in this method, I will add more
tests for it. Also, the validate function in the macro `scale_variant_decimal`
will check and make sure it fit into a decimal with specific precision
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
+ let ten = $to_int_ty(10);
+
+ let scaled = if input_scale == $output_scale {
+ Some(variant)
+ } else if input_scale < $output_scale {
+ // scale_up means output has more fractional digits than input
+ // multiply integer by 10^(output_scale - input_scale)
+ let delta = ($output_scale - input_scale) as u32;
+ let mul = ten.checked_pow(delta)?;
+ variant.checked_mul(mul)
+ } else {
+ // scale_down means output has fewer fractional digits than
input
+ // divide by 10^(input_scale - output_scale) with rounding
Review Comment:
I think rounding makes sense here as arrow variant conversion could also
cause precision loss due to rescaling. But we could also introduce a new option
to fail on precision loss if needed
##########
parquet-variant-compute/src/variant_get.rs:
##########
@@ -2679,4 +2687,371 @@ mod test {
Arc::new(struct_array)
}
+
+ macro_rules! max_unscaled_value {
+ (32, $precision:expr) => {
+ (u32::pow(10, $precision as u32) - 1) as i32
+ };
+ (64, $precision:expr) => {
+ (u64::pow(10, $precision as u32) - 1) as i64
+ };
+ (128, $precision:expr) => {
+ (u128::pow(10, $precision as u32) - 1) as i128
+ };
+ }
+
+ #[test]
+ fn get_decimal32_unshredded_var_scales_to_scale2() {
+ // Build unshredded variant values with different scales
+ let mut builder = crate::VariantArrayBuilder::new(4);
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1234,
2).unwrap())); // 12.34
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1234,
3).unwrap())); // 1.234
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1234,
0).unwrap())); // 1234
+ builder.append_null();
+ let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+ let field = Field::new("result", DataType::Decimal32(9, 2), true);
+ let options =
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+ let result = variant_get(&variant_array, options).unwrap();
+ let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+ assert_eq!(result.precision(), 9);
+ assert_eq!(result.scale(), 2);
+ assert_eq!(result.value(0), 1234);
+ assert_eq!(result.value(1), 123);
+ assert_eq!(result.value(2), 123400);
+ assert!(result.is_null(3));
+ }
+
+ #[test]
+ fn get_decimal32_unshredded_scale_down_rounding() {
+ let mut builder = crate::VariantArrayBuilder::new(2);
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1235,
3).unwrap()));
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(1245,
3).unwrap()));
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(-1235,
3).unwrap()));
+ builder.append_variant(Variant::from(VariantDecimal4::try_new(-1245,
3).unwrap()));
+ let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+ let field = Field::new("result", DataType::Decimal32(9, 2), true);
+ let options =
GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+ let result = variant_get(&variant_array, options).unwrap();
+ let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+ assert_eq!(result.value(0), 124);
+ assert_eq!(result.value(1), 125);
+ assert_eq!(result.value(2), -124);
+ assert_eq!(result.value(3), -125);
+ }
+
+ #[test]
+ fn get_decimal32_precision_overflow_safe() {
+ // Exceed Decimal32 max precision (9) after scaling
+ let mut builder = crate::VariantArrayBuilder::new(1);
+ builder.append_variant(Variant::from(
+ VariantDecimal4::try_new(max_unscaled_value!(32,
DECIMAL32_MAX_PRECISION), 0).unwrap(),
Review Comment:
for this one I make the `MAX_UNSCALED_VALUE` public, so we don't need to
worry about adding new dependency
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
+ let ten = $to_int_ty(10);
+
+ let scaled = if input_scale == $output_scale {
+ Some(variant)
+ } else if input_scale < $output_scale {
+ // scale_up means output has more fractional digits than input
+ // multiply integer by 10^(output_scale - input_scale)
+ let delta = ($output_scale - input_scale) as u32;
+ let mul = ten.checked_pow(delta)?;
+ variant.checked_mul(mul)
+ } else {
+ // scale_down means output has fewer fractional digits than
input
+ // divide by 10^(input_scale - output_scale) with rounding
+ let delta = (input_scale - $output_scale) as u32;
+ let div = ten.checked_pow(delta)?;
+ let v = variant;
+ let d = v.checked_div(div)?;
+ let r = v % div;
+
+ // rounding in the same way as
convert_to_smaller_scale_decimal in arrow-cast
Review Comment:
those are internal helper functions, could refactor the logic, but not sure
if it's good to expose that. WDYT @alamb ?
##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -61,6 +61,48 @@ impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+macro_rules! scale_variant_decimal {
+ ($variant:expr, $variant_method:ident, $to_int_ty:expr,
$output_scale:expr, $precision:expr, $validate:path) => {{
+ (|| -> Option<_> {
+ let variant = $variant.$variant_method()?;
+ let input_scale = variant.scale() as i8;
+ let variant = $to_int_ty(variant.integer());
+ let ten = $to_int_ty(10);
+
+ let scaled = if input_scale == $output_scale {
+ Some(variant)
+ } else if input_scale < $output_scale {
+ // scale_up means output has more fractional digits than input
+ // multiply integer by 10^(output_scale - input_scale)
+ let delta = ($output_scale - input_scale) as u32;
+ let mul = ten.checked_pow(delta)?;
+ variant.checked_mul(mul)
+ } else {
+ // scale_down means output has fewer fractional digits than
input
+ // divide by 10^(input_scale - output_scale) with rounding
+ let delta = (input_scale - $output_scale) as u32;
+ let div = ten.checked_pow(delta)?;
+ let v = variant;
+ let d = v.checked_div(div)?;
+ let r = v % div;
Review Comment:
no, it's just v looks more concise during calculation, but let me remove it
and just use variant
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]