This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new da373e591f fix: support data/timestamp minus (#5960)
da373e591f is described below

commit da373e591fb471afdb236d2a0991944b9c4a8660
Author: jakevin <[email protected]>
AuthorDate: Wed Apr 12 10:38:59 2023 +0800

    fix: support data/timestamp minus (#5960)
---
 .../core/tests/sqllogictests/test_files/dates.slt  | 16 ++++++
 .../tests/sqllogictests/test_files/timestamps.slt  | 12 +++++
 datafusion/expr/src/type_coercion/aggregates.rs    |  3 +-
 datafusion/expr/src/type_coercion/binary.rs        | 60 +++-------------------
 datafusion/physical-expr/src/type_coercion.rs      |  2 +-
 5 files changed, 36 insertions(+), 57 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/dates.slt 
b/datafusion/core/tests/sqllogictests/test_files/dates.slt
index d75fba6f51..4102460704 100644
--- a/datafusion/core/tests/sqllogictests/test_files/dates.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/dates.slt
@@ -88,3 +88,19 @@ h
 statement error DataFusion error: Error during planning: 
Timestamp\(Nanosecond, Some\("\+00:00"\)\) \+ Utf8 can't be evaluated because 
there isn't a common type to coerce the types to
 select i_item_desc from test
 where d3_date > now() + '5 days';
+
+# DATE minus DATE
+query error DataFusion error: Error during planning: Date32 \- Date32 can't be 
evaluated because there isn't a common type to coerce the types to
+SELECT DATE '2023-04-09' - DATE '2023-04-02';
+
+# DATE minus Timestamp
+query P
+SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp;
+----
+0 years 0 mons 8499 days 0 hours 0 mins 0.000000000 secs
+
+# Timestamp minus DATE
+query P
+SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
+----
+0 years 0 mons 730 days 0 hours 0 mins 0.000000000 secs
diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt 
b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
index 3887fc84ad..f51a272a6b 100644
--- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
@@ -660,6 +660,18 @@ statement error DataFusion error: Error during planning: 
Timestamp\(Nanosecond,
 SELECT ts1 + ts2
 FROM foo;
 
+# Timestamp - Timestamp
+query ?
+SELECT '2000-01-01T00:00:00'::timestamp - '2000-01-01T00:00:00'::timestamp;
+----
+0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs
+
+# large timestamp - small timestamp
+query ?
+SELECT '2000-01-01T00:00:00'::timestamp - '2010-01-01T00:00:00'::timestamp;
+----
+0 years 0 mons -3653 days 0 hours 0 mins 0.000000000 secs
+
 # Interval - Timestamp => error
 statement error DataFusion error: Error during planning: 
Interval\(MonthDayNano\) - Timestamp\(Nanosecond, None\) can't be evaluated 
because there isn't a common type to coerce the types to
 SELECT i - ts1 from FOO;
diff --git a/datafusion/expr/src/type_coercion/aggregates.rs 
b/datafusion/expr/src/type_coercion/aggregates.rs
index efcd503cf4..cc57c72b02 100644
--- a/datafusion/expr/src/type_coercion/aggregates.rs
+++ b/datafusion/expr/src/type_coercion/aggregates.rs
@@ -266,8 +266,7 @@ fn check_arg_count(
         TypeSignature::VariadicAny => {
             if input_types.is_empty() {
                 return Err(DataFusionError::Plan(format!(
-                    "The function {:?} expects at least one argument",
-                    agg_fun
+                    "The function {agg_fun:?} expects at least one argument"
                 )));
             }
         }
diff --git a/datafusion/expr/src/type_coercion/binary.rs 
b/datafusion/expr/src/type_coercion/binary.rs
index 33ec5e4365..7f767d9368 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -112,7 +112,9 @@ pub fn coerce_types(
         | Operator::Lt
         | Operator::Gt
         | Operator::GtEq
-        | Operator::LtEq => comparison_coercion(lhs_type, rhs_type),
+        | Operator::LtEq
+        | Operator::IsDistinctFrom
+        | Operator::IsNotDistinctFrom => comparison_coercion(lhs_type, 
rhs_type),
         // interval - timestamp is an erroneous case, cannot coerce a type
         Operator::Plus | Operator::Minus
             if (is_date(lhs_type)
@@ -140,9 +142,6 @@ pub fn coerce_types(
         | Operator::RegexNotIMatch => regex_coercion(lhs_type, rhs_type),
         // "||" operator has its own rules, and always return a string type
         Operator::StringConcat => string_concat_coercion(lhs_type, rhs_type),
-        Operator::IsDistinctFrom | Operator::IsNotDistinctFrom => {
-            eq_coercion(lhs_type, rhs_type)
-        }
     };
 
     // re-write the error message of failed coercions to include the 
operator's information
@@ -211,8 +210,8 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<D
 }
 
 // This function performs temporal coercion between the two input data types 
and the provided operator.
-// It returns an error if the operands are date or timestamp data types with 
an unsupported operation,
-// or None if the coercion is not possible. If the coercion is possible, it 
returns a new data type as Some(DataType).
+// It returns None (it will convert a Err outside) if the operands are an 
unsupported/wrong operation.
+// If the coercion is possible, it returns a new data type as Some(DataType).
 pub fn temporal_add_sub_coercion(
     lhs_type: &DataType,
     rhs_type: &DataType,
@@ -234,13 +233,11 @@ pub fn temporal_add_sub_coercion(
         (lhs, rhs, _) if is_interval(lhs) && is_interval(rhs) => {
             handle_interval_addition(lhs, rhs)
         }
-        // if two date/timestamp are being added/subtracted, return an error 
indicating that the operation is not supported
         (lhs, rhs, Operator::Minus)
             if (is_date(lhs) || is_timestamp(lhs))
                 && (is_date(rhs) || is_timestamp(rhs)) =>
         {
-            // TODO: support Minus
-            None
+            temporal_coercion(lhs, rhs)
         }
         // return None if no coercion is possible
         _ => None,
@@ -754,51 +751,6 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<DataTyp
     }
 }
 
-/// Coercion rule for numerical types: The type that both lhs and rhs
-/// can be casted to for numerical calculation, while maintaining
-/// maximum precision
-fn numerical_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
-    use arrow::datatypes::DataType::*;
-
-    // error on any non-numeric type
-    if !is_numeric(lhs_type) || !is_numeric(rhs_type) {
-        return None;
-    };
-
-    if lhs_type == rhs_type {
-        // same type => all good
-        return Some(lhs_type.clone());
-    }
-
-    // these are ordered from most informative to least informative so
-    // that the coercion removes the least amount of information
-    match (lhs_type, rhs_type) {
-        (Float64, _) | (_, Float64) => Some(Float64),
-        (_, Float32) | (Float32, _) => Some(Float32),
-        (Int64, _) | (_, Int64) => Some(Int64),
-        (Int32, _) | (_, Int32) => Some(Int32),
-        (Int16, _) | (_, Int16) => Some(Int16),
-        (Int8, _) | (_, Int8) => Some(Int8),
-        (UInt64, _) | (_, UInt64) => Some(UInt64),
-        (UInt32, _) | (_, UInt32) => Some(UInt32),
-        (UInt16, _) | (_, UInt16) => Some(UInt16),
-        (UInt8, _) | (_, UInt8) => Some(UInt8),
-        _ => None,
-    }
-}
-
-/// coercion rules for equality operations. This is a superset of all 
numerical coercion rules.
-fn eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    if lhs_type == rhs_type {
-        // same type => equality is possible
-        return Some(lhs_type.clone());
-    }
-    numerical_coercion(lhs_type, rhs_type)
-        .or_else(|| dictionary_coercion(lhs_type, rhs_type, true))
-        .or_else(|| temporal_coercion(lhs_type, rhs_type))
-        .or_else(|| null_coercion(lhs_type, rhs_type))
-}
-
 /// coercion rules from NULL type. Since NULL can be casted to most of types 
in arrow,
 /// either lhs or rhs is NULL, if NULL can be casted to type of the other 
side, the coercion is valid.
 fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> 
{
diff --git a/datafusion/physical-expr/src/type_coercion.rs 
b/datafusion/physical-expr/src/type_coercion.rs
index 7fe002a859..399dcc0899 100644
--- a/datafusion/physical-expr/src/type_coercion.rs
+++ b/datafusion/physical-expr/src/type_coercion.rs
@@ -18,7 +18,7 @@
 //! Type coercion rules for functions with multiple valid signatures
 //!
 //! Coercion is performed automatically by DataFusion when the types
-//! of arguments passed to a function do not exacty match the types
+//! of arguments passed to a function do not exactly match the types
 //! required by that function. In this case, DataFusion will attempt to
 //! *coerce* the arguments to types accepted by the function by
 //! inserting CAST operations.

Reply via email to