This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new cf233c855f refactor: separate get_common_type / get_result_type for 
temporal type (#6250)
cf233c855f is described below

commit cf233c855f46fad8355342c581ab55f610758b6c
Author: jakevin <[email protected]>
AuthorDate: Fri May 5 23:27:47 2023 +0800

    refactor: separate get_common_type / get_result_type for temporal type 
(#6250)
---
 .../core/tests/sqllogictests/test_files/dates.slt  |   4 +-
 .../tests/sqllogictests/test_files/interval.slt    |  12 +-
 .../tests/sqllogictests/test_files/timestamps.slt  |   4 +-
 datafusion/expr/src/type_coercion/binary.rs        | 180 +++++++++------------
 4 files changed, 90 insertions(+), 110 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/dates.slt 
b/datafusion/core/tests/sqllogictests/test_files/dates.slt
index 0cb96d8bc7..bfeb92fd59 100644
--- a/datafusion/core/tests/sqllogictests/test_files/dates.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/dates.slt
@@ -94,13 +94,13 @@ query error DataFusion error: Error during planning: 
Unsupported argument types\
 SELECT DATE '2023-04-09' - DATE '2023-04-02';
 
 # DATE minus Timestamp
-query P
+query ?
 SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp;
 ----
 0 years 0 mons 8499 days 0 hours 0 mins 0.000000000 secs
 
 # Timestamp minus DATE
-query P
+query ?
 SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
 ----
 0 years 0 mons 730 days 0 hours 0 mins 0.000000000 secs
diff --git a/datafusion/core/tests/sqllogictests/test_files/interval.slt 
b/datafusion/core/tests/sqllogictests/test_files/interval.slt
index 15b18f51dd..82131f5d2a 100644
--- a/datafusion/core/tests/sqllogictests/test_files/interval.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/interval.slt
@@ -327,10 +327,10 @@ select '1 month'::interval + 
'1980-01-01T12:00:00'::timestamp;
 
 # Exected error: interval (scalar) - date / timestamp (scalar)
 
-query error DataFusion error: type_coercion\ncaused by\nError during planning: 
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning: 
Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a 
common type to coerce the types to
 select '1 month'::interval - '1980-01-01'::date;
 
-query error DataFusion error: type_coercion\ncaused by\nError during planning: 
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning: 
Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated 
because there isn't a common type to coerce the types to
 select '1 month'::interval - '1980-01-01T12:00:00'::timestamp;
 
 # interval (array) + date / timestamp (array)
@@ -349,10 +349,10 @@ select i + ts from t;
 2000-02-01T00:01:00
 
 # expected error interval (array) - date / timestamp (array)
-query error DataFusion error: type_coercion\ncaused by\nError during planning: 
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning: 
Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a 
common type to coerce the types to
 select i - d from t;
 
-query error DataFusion error: type_coercion\ncaused by\nError during planning: 
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning: 
Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated 
because there isn't a common type to coerce the types to
 select i - ts from t;
 
 
@@ -372,10 +372,10 @@ select '1 month'::interval + ts from t;
 2000-03-01T00:00:00
 
 # expected error interval (scalar) - date / timestamp (array)
-query error DataFusion error: type_coercion\ncaused by\nError during planning: 
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning: 
Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a 
common type to coerce the types to
 select '1 month'::interval - d from t;
 
-query error DataFusion error: type_coercion\ncaused by\nError during planning: 
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning: 
Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated 
because there isn't a common type to coerce the types to
 select '1 month'::interval - ts from t;
 
 statement ok
diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt 
b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
index e1102d626c..42a5f8e3a4 100644
--- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
@@ -1014,7 +1014,7 @@ SELECT ts1 + i FROM foo;
 2003-07-12T01:31:15.000123463
 
 # Timestamp + Timestamp => error
-statement error DataFusion error: Error during planning: Unsupported argument 
types\. Can not evaluate Timestamp\(Nanosecond, None\) \+ 
Timestamp\(Nanosecond, None\)
+query error DataFusion error: type_coercion\ncaused by\nInternal error: 
Unsupported operation Plus between Timestamp\(Nanosecond, None\) and 
Timestamp\(Nanosecond, None\)\. This was likely caused by a bug in DataFusion's 
code and we would welcome that you file an bug report in our issue tracker
 SELECT ts1 + ts2
 FROM foo;
 
@@ -1031,7 +1031,7 @@ SELECT '2000-01-01T00:00:00'::timestamp - 
'2010-01-01T00:00:00'::timestamp;
 0 years 0 mons -3653 days 0 hours 0 mins 0.000000000 secs
 
 # Interval - Timestamp => error
-statement error DataFusion error: type_coercion\ncaused by\nError during 
planning: interval can't subtract timestamp/date
+statement error DataFusion error: type_coercion\ncaused by\nError during 
planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be 
evaluated because there isn't a common type to coerce the types to
 SELECT i - ts1 from FOO;
 
 statement ok
diff --git a/datafusion/expr/src/type_coercion/binary.rs 
b/datafusion/expr/src/type_coercion/binary.rs
index 006b56c7fe..f01b06e0db 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -19,17 +19,70 @@
 
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{
-    DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, 
DECIMAL128_MAX_SCALE,
+    DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
 };
 
 use datafusion_common::DataFusionError;
 use datafusion_common::Result;
 
-use crate::type_coercion::{
-    is_datetime, is_decimal, is_interval, is_numeric, is_timestamp,
-};
+use crate::type_coercion::{is_datetime, is_decimal, is_interval, is_numeric};
 use crate::Operator;
 
+/// Returns the result type of applying mathematics operations such as
+/// `+` to arguments of `lhs_type` and `rhs_type`.
+fn mathematics_temporal_result_type(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+    use arrow::datatypes::IntervalUnit::*;
+    use arrow::datatypes::TimeUnit::*;
+
+    if !is_interval(lhs_type)
+        && !is_interval(rhs_type)
+        && !is_datetime(lhs_type)
+        && !is_datetime(rhs_type)
+    {
+        return None;
+    };
+
+    match (lhs_type, rhs_type) {
+        // datetime +/- interval
+        (Interval(_), Timestamp(_, _)) => Some(rhs_type.clone()),
+        (Timestamp(_, _), Interval(_)) => Some(lhs_type.clone()),
+        (Interval(_), Date32) => Some(rhs_type.clone()),
+        (Date32, Interval(_)) => Some(lhs_type.clone()),
+        (Interval(_), Date64) => Some(rhs_type.clone()),
+        (Date64, Interval(_)) => Some(lhs_type.clone()),
+        // interval +/-
+        (Interval(YearMonth), Interval(YearMonth)) => 
Some(Interval(YearMonth)),
+        (Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)),
+        (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
+        // timestamp - timestamp
+        (Timestamp(Second, _), Timestamp(Second, _))
+        | (Timestamp(Millisecond, _), Timestamp(Millisecond, _)) => {
+            Some(Interval(DayTime))
+        }
+        (Timestamp(Microsecond, _), Timestamp(Microsecond, _))
+        | (Timestamp(Nanosecond, _), Timestamp(Nanosecond, _)) => {
+            Some(Interval(MonthDayNano))
+        }
+        (Timestamp(_, _), Timestamp(_, _)) => None,
+        // TODO: date minus date
+        // date - timestamp, timestamp - date
+        (Date32, Timestamp(_, _))
+        | (Timestamp(_, _), Date32)
+        | (Date64, Timestamp(_, _))
+        | (Timestamp(_, _), Date64) => {
+            // TODO: make get_result_type must after coerce type.
+            // if type isn't coerced, we need get common type, and then get 
result type.
+            let common_type = temporal_coercion(lhs_type, rhs_type);
+            common_type.and_then(|t| mathematics_temporal_result_type(&t, &t))
+        }
+        _ => None,
+    }
+}
+
 /// returns the resulting type of a binary expression evaluating the `op` with 
the left and right hand types
 pub fn get_result_type(
     lhs_type: &DataType,
@@ -67,12 +120,12 @@ pub fn get_result_type(
         | Operator::IsDistinctFrom
         | Operator::IsNotDistinctFrom => Some(DataType::Boolean),
         Operator::Plus | Operator::Minus
-            if is_datetime(lhs_type)
-                || is_datetime(rhs_type)
-                || is_interval(lhs_type)
-                || is_interval(rhs_type) =>
+            if is_datetime(lhs_type) && is_datetime(rhs_type)
+                || (is_interval(lhs_type) && is_interval(rhs_type))
+                || (is_datetime(lhs_type) && is_interval(rhs_type))
+                || (is_interval(lhs_type) && is_datetime(rhs_type)) =>
         {
-            temporal_add_sub_coercion(lhs_type, rhs_type, op)
+            mathematics_temporal_result_type(lhs_type, rhs_type)
         }
         // following same with `coerce_types`
         Operator::BitwiseAnd
@@ -126,19 +179,13 @@ pub fn coerce_types(
         | Operator::LtEq
         | Operator::IsDistinctFrom
         | Operator::IsNotDistinctFrom => comparison_coercion(lhs_type, 
rhs_type),
-        // interval - timestamp is an erroneous case, cannot coerce a type
         Operator::Plus | Operator::Minus
-            if is_datetime(lhs_type)
-                || is_datetime(rhs_type)
-                || is_interval(lhs_type)
-                || is_interval(rhs_type) =>
+            if is_interval(lhs_type) && is_interval(rhs_type) =>
         {
-            if is_interval(lhs_type) && is_datetime(rhs_type) && *op == 
Operator::Minus {
-                return Err(DataFusionError::Plan(
-                    "interval can't subtract timestamp/date".to_string(),
-                ));
-            }
-            temporal_add_sub_coercion(lhs_type, rhs_type, op)
+            temporal_coercion(lhs_type, rhs_type)
+        }
+        Operator::Minus if is_datetime(lhs_type) && is_datetime(rhs_type) => {
+            temporal_coercion(lhs_type, rhs_type)
         }
         // for math expressions, the final value of the coercion is also the 
return type
         // because coercion favours higher information types
@@ -208,7 +255,10 @@ pub fn math_decimal_coercion(
 
 /// Returns the output type of applying bitwise operations such as
 /// `&`, `|`, or `xor`to arguments of `lhs_type` and `rhs_type`.
-fn bitwise_coercion(left_type: &DataType, right_type: &DataType) -> 
Option<DataType> {
+pub(crate) fn bitwise_coercion(
+    left_type: &DataType,
+    right_type: &DataType,
+) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
 
     if !both_numeric_or_null_and_numeric(left_type, right_type) {
@@ -260,80 +310,6 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<D
         .or_else(|| string_numeric_coercion(lhs_type, rhs_type))
 }
 
-// This function performs temporal coercion between the two input data types 
and the provided operator.
-// It returns None (it will convert a Err outside) if the operands are an 
unsupported/wrong operation.
-// If the coercion is possible, it returns a new data type as Some(DataType).
-pub fn temporal_add_sub_coercion(
-    lhs_type: &DataType,
-    rhs_type: &DataType,
-    op: &Operator,
-) -> Option<DataType> {
-    match (lhs_type, rhs_type, op) {
-        // if an interval is being added/subtracted from a date/timestamp, 
return the date/timestamp data type
-        (lhs, rhs, _) if is_interval(lhs) && is_datetime(rhs) => 
Some(rhs.clone()),
-        (lhs, rhs, _) if is_interval(rhs) && is_datetime(lhs) => 
Some(lhs.clone()),
-        // if two timestamps are being subtracted, check their time units and 
return the corresponding interval data type
-        (lhs, rhs, Operator::Minus) if is_timestamp(lhs) && is_timestamp(rhs) 
=> {
-            handle_timestamp_minus(lhs, rhs)
-        }
-        // if two intervals are being added/subtracted, check their interval 
units and return the corresponding interval data type
-        (lhs, rhs, _) if is_interval(lhs) && is_interval(rhs) => {
-            handle_interval_addition(lhs, rhs)
-        }
-        (lhs, rhs, Operator::Minus) if is_datetime(lhs) && is_datetime(rhs) => 
{
-            temporal_coercion(lhs, rhs)
-        }
-        // return None if no coercion is possible
-        _ => None,
-    }
-}
-
-// This function checks if two interval data types have the same interval unit 
and returns an interval data type
-// representing the sum of them. If the two interval data types have different 
units, it returns an interval data type
-// with "IntervalUnit::MonthDayNano". If the two interval data types are 
already "IntervalUnit::YearMonth" or "IntervalUnit::DayTime",
-// it returns an interval data type with the same unit as the operands.
-fn handle_interval_addition(lhs: &DataType, rhs: &DataType) -> 
Option<DataType> {
-    match (lhs, rhs) {
-        // operation with the same types
-        (
-            DataType::Interval(IntervalUnit::YearMonth),
-            DataType::Interval(IntervalUnit::YearMonth),
-        ) => Some(DataType::Interval(IntervalUnit::YearMonth)),
-        (
-            DataType::Interval(IntervalUnit::DayTime),
-            DataType::Interval(IntervalUnit::DayTime),
-        ) => Some(DataType::Interval(IntervalUnit::DayTime)),
-        // operation with MonthDayNano's or different types
-        (_, _) => Some(DataType::Interval(IntervalUnit::MonthDayNano)),
-    }
-}
-
-// This function checks if two timestamp data types have the same time unit 
and returns an interval data type
-// representing the difference between them, either "IntervalUnit::DayTime" if 
the time unit is second or millisecond,
-// or "IntervalUnit::MonthDayNano" if the time unit is microsecond or 
nanosecond. If the two timestamp data types have
-// different time units, it returns an error indicating that "The timestamps 
have different types".
-fn handle_timestamp_minus(lhs: &DataType, rhs: &DataType) -> Option<DataType> {
-    match (lhs, rhs) {
-        (
-            DataType::Timestamp(TimeUnit::Second, _),
-            DataType::Timestamp(TimeUnit::Second, _),
-        )
-        | (
-            DataType::Timestamp(TimeUnit::Millisecond, _),
-            DataType::Timestamp(TimeUnit::Millisecond, _),
-        ) => Some(DataType::Interval(IntervalUnit::DayTime)),
-        (
-            DataType::Timestamp(TimeUnit::Microsecond, _),
-            DataType::Timestamp(TimeUnit::Microsecond, _),
-        )
-        | (
-            DataType::Timestamp(TimeUnit::Nanosecond, _),
-            DataType::Timestamp(TimeUnit::Nanosecond, _),
-        ) => Some(DataType::Interval(IntervalUnit::MonthDayNano)),
-        (_, _) => None,
-    }
-}
-
 /// Returns the output type of applying numeric operations such as `=`
 /// to arguments `lhs_type` and `rhs_type` if one is numeric and one
 /// is `Utf8`/`LargeUtf8`.
@@ -777,9 +753,15 @@ fn is_time_with_valid_unit(datatype: DataType) -> bool {
 
 /// Coercion rules for Temporal columns: the type that both lhs and rhs can be
 /// casted to for the purpose of a date computation
+/// For interval arithmetic, it doesn't handle datetime type +/- interval
 fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
     use arrow::datatypes::DataType::*;
+    use arrow::datatypes::IntervalUnit::*;
     match (lhs_type, rhs_type) {
+        // interval +/-
+        (Interval(YearMonth), Interval(YearMonth)) => 
Some(Interval(YearMonth)),
+        (Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)),
+        (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
         (Date64, Date32) | (Date32, Date64) => Some(Date64),
         (Utf8, Date32) | (Date32, Utf8) => Some(Date32),
         (Utf8, Date64) | (Date64, Utf8) => Some(Date64),
@@ -1055,14 +1037,12 @@ mod tests {
 
     #[test]
     fn test_date_timestamp_arithmetic_error() -> Result<()> {
-        let err = coerce_types(
+        let common_type = coerce_types(
             &DataType::Timestamp(TimeUnit::Nanosecond, None),
             &Operator::Minus,
             &DataType::Timestamp(TimeUnit::Millisecond, None),
-        )
-        .unwrap_err()
-        .to_string();
-        assert_contains!(&err, "Timestamp(Nanosecond, None) - 
Timestamp(Millisecond, None) can't be evaluated because there isn't a common 
type to coerce the types to");
+        )?;
+        assert_eq!(common_type.to_string(), "Timestamp(Millisecond, None)");
 
         let err = coerce_types(&DataType::Date32, &Operator::Plus, 
&DataType::Date64)
             .unwrap_err()

Reply via email to