This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new da373e591f fix: support data/timestamp minus (#5960)
da373e591f is described below
commit da373e591fb471afdb236d2a0991944b9c4a8660
Author: jakevin <[email protected]>
AuthorDate: Wed Apr 12 10:38:59 2023 +0800
fix: support data/timestamp minus (#5960)
---
.../core/tests/sqllogictests/test_files/dates.slt | 16 ++++++
.../tests/sqllogictests/test_files/timestamps.slt | 12 +++++
datafusion/expr/src/type_coercion/aggregates.rs | 3 +-
datafusion/expr/src/type_coercion/binary.rs | 60 +++-------------------
datafusion/physical-expr/src/type_coercion.rs | 2 +-
5 files changed, 36 insertions(+), 57 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/dates.slt
b/datafusion/core/tests/sqllogictests/test_files/dates.slt
index d75fba6f51..4102460704 100644
--- a/datafusion/core/tests/sqllogictests/test_files/dates.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/dates.slt
@@ -88,3 +88,19 @@ h
statement error DataFusion error: Error during planning:
Timestamp\(Nanosecond, Some\("\+00:00"\)\) \+ Utf8 can't be evaluated because
there isn't a common type to coerce the types to
select i_item_desc from test
where d3_date > now() + '5 days';
+
+# DATE minus DATE
+query error DataFusion error: Error during planning: Date32 \- Date32 can't be
evaluated because there isn't a common type to coerce the types to
+SELECT DATE '2023-04-09' - DATE '2023-04-02';
+
+# DATE minus Timestamp
+query P
+SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp;
+----
+0 years 0 mons 8499 days 0 hours 0 mins 0.000000000 secs
+
+# Timestamp minus DATE
+query P
+SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
+----
+0 years 0 mons 730 days 0 hours 0 mins 0.000000000 secs
diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
index 3887fc84ad..f51a272a6b 100644
--- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
@@ -660,6 +660,18 @@ statement error DataFusion error: Error during planning:
Timestamp\(Nanosecond,
SELECT ts1 + ts2
FROM foo;
+# Timestamp - Timestamp
+query ?
+SELECT '2000-01-01T00:00:00'::timestamp - '2000-01-01T00:00:00'::timestamp;
+----
+0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs
+
+# large timestamp - small timestamp
+query ?
+SELECT '2000-01-01T00:00:00'::timestamp - '2010-01-01T00:00:00'::timestamp;
+----
+0 years 0 mons -3653 days 0 hours 0 mins 0.000000000 secs
+
# Interval - Timestamp => error
statement error DataFusion error: Error during planning:
Interval\(MonthDayNano\) - Timestamp\(Nanosecond, None\) can't be evaluated
because there isn't a common type to coerce the types to
SELECT i - ts1 from FOO;
diff --git a/datafusion/expr/src/type_coercion/aggregates.rs
b/datafusion/expr/src/type_coercion/aggregates.rs
index efcd503cf4..cc57c72b02 100644
--- a/datafusion/expr/src/type_coercion/aggregates.rs
+++ b/datafusion/expr/src/type_coercion/aggregates.rs
@@ -266,8 +266,7 @@ fn check_arg_count(
TypeSignature::VariadicAny => {
if input_types.is_empty() {
return Err(DataFusionError::Plan(format!(
- "The function {:?} expects at least one argument",
- agg_fun
+ "The function {agg_fun:?} expects at least one argument"
)));
}
}
diff --git a/datafusion/expr/src/type_coercion/binary.rs
b/datafusion/expr/src/type_coercion/binary.rs
index 33ec5e4365..7f767d9368 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -112,7 +112,9 @@ pub fn coerce_types(
| Operator::Lt
| Operator::Gt
| Operator::GtEq
- | Operator::LtEq => comparison_coercion(lhs_type, rhs_type),
+ | Operator::LtEq
+ | Operator::IsDistinctFrom
+ | Operator::IsNotDistinctFrom => comparison_coercion(lhs_type,
rhs_type),
// interval - timestamp is an erroneous case, cannot coerce a type
Operator::Plus | Operator::Minus
if (is_date(lhs_type)
@@ -140,9 +142,6 @@ pub fn coerce_types(
| Operator::RegexNotIMatch => regex_coercion(lhs_type, rhs_type),
// "||" operator has its own rules, and always return a string type
Operator::StringConcat => string_concat_coercion(lhs_type, rhs_type),
- Operator::IsDistinctFrom | Operator::IsNotDistinctFrom => {
- eq_coercion(lhs_type, rhs_type)
- }
};
// re-write the error message of failed coercions to include the
operator's information
@@ -211,8 +210,8 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<D
}
// This function performs temporal coercion between the two input data types
and the provided operator.
-// It returns an error if the operands are date or timestamp data types with
an unsupported operation,
-// or None if the coercion is not possible. If the coercion is possible, it
returns a new data type as Some(DataType).
+// It returns None (it will convert a Err outside) if the operands are an
unsupported/wrong operation.
+// If the coercion is possible, it returns a new data type as Some(DataType).
pub fn temporal_add_sub_coercion(
lhs_type: &DataType,
rhs_type: &DataType,
@@ -234,13 +233,11 @@ pub fn temporal_add_sub_coercion(
(lhs, rhs, _) if is_interval(lhs) && is_interval(rhs) => {
handle_interval_addition(lhs, rhs)
}
- // if two date/timestamp are being added/subtracted, return an error
indicating that the operation is not supported
(lhs, rhs, Operator::Minus)
if (is_date(lhs) || is_timestamp(lhs))
&& (is_date(rhs) || is_timestamp(rhs)) =>
{
- // TODO: support Minus
- None
+ temporal_coercion(lhs, rhs)
}
// return None if no coercion is possible
_ => None,
@@ -754,51 +751,6 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<DataTyp
}
}
-/// Coercion rule for numerical types: The type that both lhs and rhs
-/// can be casted to for numerical calculation, while maintaining
-/// maximum precision
-fn numerical_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
- use arrow::datatypes::DataType::*;
-
- // error on any non-numeric type
- if !is_numeric(lhs_type) || !is_numeric(rhs_type) {
- return None;
- };
-
- if lhs_type == rhs_type {
- // same type => all good
- return Some(lhs_type.clone());
- }
-
- // these are ordered from most informative to least informative so
- // that the coercion removes the least amount of information
- match (lhs_type, rhs_type) {
- (Float64, _) | (_, Float64) => Some(Float64),
- (_, Float32) | (Float32, _) => Some(Float32),
- (Int64, _) | (_, Int64) => Some(Int64),
- (Int32, _) | (_, Int32) => Some(Int32),
- (Int16, _) | (_, Int16) => Some(Int16),
- (Int8, _) | (_, Int8) => Some(Int8),
- (UInt64, _) | (_, UInt64) => Some(UInt64),
- (UInt32, _) | (_, UInt32) => Some(UInt32),
- (UInt16, _) | (_, UInt16) => Some(UInt16),
- (UInt8, _) | (_, UInt8) => Some(UInt8),
- _ => None,
- }
-}
-
-/// coercion rules for equality operations. This is a superset of all
numerical coercion rules.
-fn eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
- if lhs_type == rhs_type {
- // same type => equality is possible
- return Some(lhs_type.clone());
- }
- numerical_coercion(lhs_type, rhs_type)
- .or_else(|| dictionary_coercion(lhs_type, rhs_type, true))
- .or_else(|| temporal_coercion(lhs_type, rhs_type))
- .or_else(|| null_coercion(lhs_type, rhs_type))
-}
-
/// coercion rules from NULL type. Since NULL can be casted to most of types
in arrow,
/// either lhs or rhs is NULL, if NULL can be casted to type of the other
side, the coercion is valid.
fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
{
diff --git a/datafusion/physical-expr/src/type_coercion.rs
b/datafusion/physical-expr/src/type_coercion.rs
index 7fe002a859..399dcc0899 100644
--- a/datafusion/physical-expr/src/type_coercion.rs
+++ b/datafusion/physical-expr/src/type_coercion.rs
@@ -18,7 +18,7 @@
//! Type coercion rules for functions with multiple valid signatures
//!
//! Coercion is performed automatically by DataFusion when the types
-//! of arguments passed to a function do not exacty match the types
+//! of arguments passed to a function do not exactly match the types
//! required by that function. In this case, DataFusion will attempt to
//! *coerce* the arguments to types accepted by the function by
//! inserting CAST operations.