alamb commented on code in PR #19460:
URL: https://github.com/apache/datafusion/pull/19460#discussion_r2645594640
##########
datafusion/optimizer/src/analyzer/type_coercion.rs:
##########
@@ -290,17 +291,214 @@ impl<'a> TypeCoercionRewriter<'a> {
right: Expr,
right_schema: &DFSchema,
) -> Result<(Expr, Expr)> {
- let (left_type, right_type) = BinaryTypeCoercer::new(
- &left.get_type(left_schema)?,
+ let left_data_type = left.get_type(left_schema)?;
+ let right_data_type = right.get_type(right_schema)?;
+ let (left_type, right_type) =
+ BinaryTypeCoercer::new(&left_data_type, &op, &right_data_type)
+ .get_input_types()?;
+ let left_cast_ok = can_cast_types(&left_data_type, &left_type);
+ let right_cast_ok = can_cast_types(&right_data_type, &right_type);
+
+ // handle special cases for
+ // * Date +/- int => Date
+ // * Date + time => Timestamp
+ let left_expr = if !left_cast_ok {
+ Self::coerce_date_time_math_op(
+ left,
+ &op,
+ &left_data_type,
+ &left_type,
+ &right_type,
+ )?
+ } else {
+ left.cast_to(&left_type, left_schema)?
+ };
+
+ let right_expr = if !right_cast_ok {
+ Self::coerce_date_time_math_op(
+ right,
+ &op,
+ &right_data_type,
+ &right_type,
+ &left_type,
+ )?
+ } else {
+ right.cast_to(&right_type, right_schema)?
+ };
+
+ Ok((left_expr, right_expr))
+ }
+
+ fn coerce_date_time_math_op(
+ expr: Expr,
+ op: &Operator,
+ left_current_type: &DataType,
+ left_target_type: &DataType,
+ right_target_type: &DataType,
+ ) -> Result<Expr, DataFusionError> {
+ use DataType::*;
+
+ let e = match (
&op,
- &right.get_type(right_schema)?,
- )
- .get_input_types()?;
+ &left_current_type,
+ &left_target_type,
+ &right_target_type,
+ ) {
+ // int +/- date => date
+ (
+ Operator::Plus | Operator::Minus,
+ Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 |
UInt64,
+ Interval(IntervalUnit::MonthDayNano),
+ Date32 | Date64,
+ ) => {
+ // cast to i64 first
+ let expr = if *left_current_type == Int64 {
+ expr
+ } else {
+ Expr::Cast(Cast::new(Box::new(expr), Int64))
+ };
+ // next, multiply by 86400 to get seconds
+ let expr = Expr::BinaryExpr(BinaryExpr::new(
+ Box::new(expr),
+ Operator::Multiply,
+ Box::new(Expr::Literal(
+ ScalarValue::Int64(Some(SECONDS_IN_DAY)),
+ None,
+ )),
+ ));
+ // cast to duration
+ let expr =
Review Comment:
I see that `cast_to` requires a schema so you can't call it directly here,
but you could make a function like
```rust
fn cast(expr: Expr, target_type: DataType) -> Expr {
Expr::Cast(Cast::new(Box::new(expr))
}
```
And then make this like
```rust
let expr = cast(expr, target_type)
```
And save a lot of boilerplate
##########
datafusion/optimizer/src/analyzer/type_coercion.rs:
##########
@@ -290,17 +291,214 @@ impl<'a> TypeCoercionRewriter<'a> {
right: Expr,
right_schema: &DFSchema,
) -> Result<(Expr, Expr)> {
- let (left_type, right_type) = BinaryTypeCoercer::new(
- &left.get_type(left_schema)?,
+ let left_data_type = left.get_type(left_schema)?;
+ let right_data_type = right.get_type(right_schema)?;
+ let (left_type, right_type) =
+ BinaryTypeCoercer::new(&left_data_type, &op, &right_data_type)
+ .get_input_types()?;
+ let left_cast_ok = can_cast_types(&left_data_type, &left_type);
+ let right_cast_ok = can_cast_types(&right_data_type, &right_type);
+
+ // handle special cases for
+ // * Date +/- int => Date
+ // * Date + time => Timestamp
+ let left_expr = if !left_cast_ok {
+ Self::coerce_date_time_math_op(
+ left,
+ &op,
+ &left_data_type,
+ &left_type,
+ &right_type,
+ )?
+ } else {
+ left.cast_to(&left_type, left_schema)?
+ };
+
+ let right_expr = if !right_cast_ok {
+ Self::coerce_date_time_math_op(
+ right,
+ &op,
+ &right_data_type,
+ &right_type,
+ &left_type,
+ )?
+ } else {
+ right.cast_to(&right_type, right_schema)?
+ };
+
+ Ok((left_expr, right_expr))
+ }
+
+ fn coerce_date_time_math_op(
+ expr: Expr,
+ op: &Operator,
+ left_current_type: &DataType,
+ left_target_type: &DataType,
+ right_target_type: &DataType,
+ ) -> Result<Expr, DataFusionError> {
+ use DataType::*;
+
+ let e = match (
&op,
- &right.get_type(right_schema)?,
- )
- .get_input_types()?;
+ &left_current_type,
+ &left_target_type,
+ &right_target_type,
+ ) {
+ // int +/- date => date
+ (
+ Operator::Plus | Operator::Minus,
+ Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 |
UInt64,
+ Interval(IntervalUnit::MonthDayNano),
+ Date32 | Date64,
+ ) => {
+ // cast to i64 first
+ let expr = if *left_current_type == Int64 {
+ expr
+ } else {
+ Expr::Cast(Cast::new(Box::new(expr), Int64))
+ };
+ // next, multiply by 86400 to get seconds
+ let expr = Expr::BinaryExpr(BinaryExpr::new(
+ Box::new(expr),
+ Operator::Multiply,
+ Box::new(Expr::Literal(
+ ScalarValue::Int64(Some(SECONDS_IN_DAY)),
+ None,
+ )),
+ ));
+ // cast to duration
+ let expr =
+ Expr::Cast(Cast::new(Box::new(expr),
Duration(TimeUnit::Second)));
+
+ // finally cast to interval
+ Expr::Cast(Cast::new(
+ Box::new(expr),
+ Interval(IntervalUnit::MonthDayNano),
+ ))
+ }
+ // These might seem to be a bit convoluted, however for arrow to
do date + time arithmetic
Review Comment:
are there additional date/time kernels we should add to arrow-rs that would
make this code easier? (as a follow on set of PRs of course)
##########
datafusion/sqllogictest/test_files/datetime/arith_date_date.slt:
##########
@@ -0,0 +1,14 @@
+# date - date → integer
+# Subtract dates, producing the number of days elapsed
+# date '2001-10-01' - date '2001-09-28' → 3
+
+# note that datafusion returns Duration whereas postgres returns an int
Review Comment:
If we file a follow on ticket for this , I recommend we also leave a link in
the code back to the issue
##########
datafusion/optimizer/src/analyzer/type_coercion.rs:
##########
@@ -290,17 +291,214 @@ impl<'a> TypeCoercionRewriter<'a> {
right: Expr,
right_schema: &DFSchema,
) -> Result<(Expr, Expr)> {
- let (left_type, right_type) = BinaryTypeCoercer::new(
- &left.get_type(left_schema)?,
+ let left_data_type = left.get_type(left_schema)?;
+ let right_data_type = right.get_type(right_schema)?;
+ let (left_type, right_type) =
+ BinaryTypeCoercer::new(&left_data_type, &op, &right_data_type)
+ .get_input_types()?;
+ let left_cast_ok = can_cast_types(&left_data_type, &left_type);
+ let right_cast_ok = can_cast_types(&right_data_type, &right_type);
+
+ // handle special cases for
+ // * Date +/- int => Date
+ // * Date + time => Timestamp
+ let left_expr = if !left_cast_ok {
+ Self::coerce_date_time_math_op(
+ left,
+ &op,
+ &left_data_type,
+ &left_type,
+ &right_type,
+ )?
+ } else {
+ left.cast_to(&left_type, left_schema)?
+ };
+
+ let right_expr = if !right_cast_ok {
+ Self::coerce_date_time_math_op(
+ right,
+ &op,
+ &right_data_type,
+ &right_type,
+ &left_type,
+ )?
+ } else {
+ right.cast_to(&right_type, right_schema)?
+ };
+
+ Ok((left_expr, right_expr))
+ }
+
+ fn coerce_date_time_math_op(
+ expr: Expr,
+ op: &Operator,
+ left_current_type: &DataType,
+ left_target_type: &DataType,
+ right_target_type: &DataType,
+ ) -> Result<Expr, DataFusionError> {
+ use DataType::*;
+
+ let e = match (
&op,
- &right.get_type(right_schema)?,
- )
- .get_input_types()?;
+ &left_current_type,
+ &left_target_type,
+ &right_target_type,
+ ) {
+ // int +/- date => date
+ (
+ Operator::Plus | Operator::Minus,
+ Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 |
UInt64,
+ Interval(IntervalUnit::MonthDayNano),
+ Date32 | Date64,
+ ) => {
+ // cast to i64 first
+ let expr = if *left_current_type == Int64 {
+ expr
+ } else {
+ Expr::Cast(Cast::new(Box::new(expr), Int64))
+ };
+ // next, multiply by 86400 to get seconds
+ let expr = Expr::BinaryExpr(BinaryExpr::new(
Review Comment:
I think the boilerplate code here for creating expressions somewhat obscures
what is going on. The comments make it understandable, but I think you could
also write this much more succinctly using the fluent API, which I also think
would help readability
For example:
```rust
let expr = expr * lit(ScalarValue::from(SECONDS_IN_DAY));
```
The same comment applies to the rest of this file too
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]