Jefffrey commented on code in PR #17808:
URL: https://github.com/apache/datafusion/pull/17808#discussion_r2386609740
##########
datafusion/expr-common/src/type_coercion/binary.rs:
##########
@@ -955,28 +963,106 @@ pub fn decimal_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<Data
match (lhs_type, rhs_type) {
// Prefer decimal data type over floating point for comparison
operation
+ (Decimal32(_, _), Decimal32(_, _)) => get_wider_decimal_type(lhs_type,
rhs_type),
+ (Decimal32(_, _), Decimal64(_, _) | Decimal128(_, _) | Decimal256(_,
_)) => {
+ get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+ }
+ (Decimal32(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
+ (Decimal64(_, _), Decimal64(_, _)) => get_wider_decimal_type(lhs_type,
rhs_type),
+ (Decimal64(_, _), Decimal32(_, _) | Decimal128(_, _) | Decimal256(_,
_)) => {
+ get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+ }
+ (Decimal64(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
(Decimal128(_, _), Decimal128(_, _)) => {
get_wider_decimal_type(lhs_type, rhs_type)
}
+ (Decimal128(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal256(_,
_)) => {
+ get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+ }
(Decimal128(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
- (_, Decimal128(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
(Decimal256(_, _), Decimal256(_, _)) => {
get_wider_decimal_type(lhs_type, rhs_type)
}
+ (Decimal256(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal128(_,
_)) => {
+ get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+ }
(Decimal256(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
+ (_, Decimal32(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
+ (_, Decimal64(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
+ (_, Decimal128(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
(_, Decimal256(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
(_, _) => None,
}
}
+/// Handle cross-variant decimal widening by choosing the larger variant
+fn get_wider_decimal_type_cross_variant(
+ lhs_type: &DataType,
+ rhs_type: &DataType,
+) -> Option<DataType> {
+ use arrow::datatypes::DataType::*;
+
+ let (p1, s1) = match lhs_type {
+ Decimal32(p, s) => (*p, *s),
+ Decimal64(p, s) => (*p, *s),
+ Decimal128(p, s) => (*p, *s),
+ Decimal256(p, s) => (*p, *s),
+ _ => return None,
+ };
+
+ let (p2, s2) = match rhs_type {
+ Decimal32(p, s) => (*p, *s),
+ Decimal64(p, s) => (*p, *s),
+ Decimal128(p, s) => (*p, *s),
+ Decimal256(p, s) => (*p, *s),
+ _ => return None,
+ };
+
+ // max(s1, s2) + max(p1-s1, p2-s2), max(s1, s2)
+ let s = s1.max(s2);
+ let range = (p1 as i8 - s1).max(p2 as i8 - s2);
+ let required_precision = (range + s) as u8;
Review Comment:
What happens if we have:
Decimal256 with precision 76 (max) and scale 0, and Decimal128 with
precision 38 (max) with scale 1;
So `s = 1`, `range = 76`, `required_precision = 76 + 1` -> overflow?
Is this a valid case?
##########
datafusion/spark/src/function/math/width_bucket.rs:
##########
@@ -96,7 +96,14 @@ impl ScalarUDFImpl for SparkWidthBucket {
let is_num = |t: &DataType| {
matches!(
t,
- Int8 | Int16 | Int32 | Int64 | Float32 | Float64 |
Decimal128(_, _)
+ Int8 | Int16
+ | Int32
+ | Int64
+ | Float32
+ | Float64
+ | Decimal32(_, _)
+ | Decimal64(_, _)
+ | Decimal128(_, _)
Review Comment:
Can use
[`is_signed_numeric()`](https://github.com/apache/datafusion/blob/2d947b31d0355710dc179d1d72ca5366b7721b2a/datafusion/expr/src/type_coercion/mod.rs#L43-L59)
here potentially (though that brings in `Float16`)
##########
datafusion/physical-plan/src/joins/sort_merge_join/stream.rs:
##########
@@ -1994,6 +1996,8 @@ fn is_join_arrays_equal(
DataType::BinaryView => compare_value!(BinaryViewArray),
DataType::FixedSizeBinary(_) =>
compare_value!(FixedSizeBinaryArray),
DataType::LargeBinary => compare_value!(LargeBinaryArray),
+ DataType::Decimal32(..) => compare_value!(Decimal32Array),
+ DataType::Decimal64(..) => compare_value!(Decimal64Array),
Review Comment:
Yeah I am curious why 256 is omitted? Perhaps we can add it in if there's no
compiler error doing so?
##########
datafusion/expr-common/src/type_coercion/binary.rs:
##########
@@ -357,6 +357,14 @@ fn math_decimal_coercion(
| (Decimal256(_, _), Decimal256(_, _)) => {
Some((lhs_type.clone(), rhs_type.clone()))
}
+ // Cross-variant decimal coercion - choose larger variant with
appropriate precision/scale
+ (Decimal32(_, _), Decimal64(_, _) | Decimal128(_, _) | Decimal256(_,
_))
+ | (Decimal64(_, _), Decimal32(_, _) | Decimal128(_, _) | Decimal256(_,
_))
+ | (Decimal128(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal256(_,
_))
+ | (Decimal256(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal128(_,
_)) => {
+ let coerced_type = get_wider_decimal_type_cross_variant(lhs_type,
rhs_type)?;
+ Some((coerced_type.clone(), coerced_type))
+ }
Review Comment:
```suggestion
// Cross-variant decimal coercion - choose larger variant with
appropriate precision/scale
(lhs, rhs) if is_decimal(lhs) && is_decimal(rhs) &&
std::mem::discriminant(lhs) != std::mem::discriminant(rhs) => {
let coerced_type =
get_wider_decimal_type_cross_variant(lhs_type, rhs_type)?;
Some((coerced_type.clone(), coerced_type))
}
```
Using
[`is_decimal`](https://github.com/apache/datafusion/blob/2d947b31d0355710dc179d1d72ca5366b7721b2a/datafusion/expr/src/type_coercion/mod.rs#L92-L101)
and
[`std::mem::discriminant`](https://doc.rust-lang.org/std/mem/fn.discriminant.html)
Dunno if it's better to list out each combination (more explicit but could
be easier to miss if we're missing a combination) or do this way which might
not be as clear but could be more robust 🤔
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]