This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new cf233c855f refactor: separate get_common_type / get_result_type for
temporal type (#6250)
cf233c855f is described below
commit cf233c855f46fad8355342c581ab55f610758b6c
Author: jakevin <[email protected]>
AuthorDate: Fri May 5 23:27:47 2023 +0800
refactor: separate get_common_type / get_result_type for temporal type
(#6250)
---
.../core/tests/sqllogictests/test_files/dates.slt | 4 +-
.../tests/sqllogictests/test_files/interval.slt | 12 +-
.../tests/sqllogictests/test_files/timestamps.slt | 4 +-
datafusion/expr/src/type_coercion/binary.rs | 180 +++++++++------------
4 files changed, 90 insertions(+), 110 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/dates.slt
b/datafusion/core/tests/sqllogictests/test_files/dates.slt
index 0cb96d8bc7..bfeb92fd59 100644
--- a/datafusion/core/tests/sqllogictests/test_files/dates.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/dates.slt
@@ -94,13 +94,13 @@ query error DataFusion error: Error during planning:
Unsupported argument types\
SELECT DATE '2023-04-09' - DATE '2023-04-02';
# DATE minus Timestamp
-query P
+query ?
SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp;
----
0 years 0 mons 8499 days 0 hours 0 mins 0.000000000 secs
# Timestamp minus DATE
-query P
+query ?
SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
----
0 years 0 mons 730 days 0 hours 0 mins 0.000000000 secs
diff --git a/datafusion/core/tests/sqllogictests/test_files/interval.slt
b/datafusion/core/tests/sqllogictests/test_files/interval.slt
index 15b18f51dd..82131f5d2a 100644
--- a/datafusion/core/tests/sqllogictests/test_files/interval.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/interval.slt
@@ -327,10 +327,10 @@ select '1 month'::interval +
'1980-01-01T12:00:00'::timestamp;
# Exected error: interval (scalar) - date / timestamp (scalar)
-query error DataFusion error: type_coercion\ncaused by\nError during planning:
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning:
Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a
common type to coerce the types to
select '1 month'::interval - '1980-01-01'::date;
-query error DataFusion error: type_coercion\ncaused by\nError during planning:
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning:
Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated
because there isn't a common type to coerce the types to
select '1 month'::interval - '1980-01-01T12:00:00'::timestamp;
# interval (array) + date / timestamp (array)
@@ -349,10 +349,10 @@ select i + ts from t;
2000-02-01T00:01:00
# expected error interval (array) - date / timestamp (array)
-query error DataFusion error: type_coercion\ncaused by\nError during planning:
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning:
Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a
common type to coerce the types to
select i - d from t;
-query error DataFusion error: type_coercion\ncaused by\nError during planning:
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning:
Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated
because there isn't a common type to coerce the types to
select i - ts from t;
@@ -372,10 +372,10 @@ select '1 month'::interval + ts from t;
2000-03-01T00:00:00
# expected error interval (scalar) - date / timestamp (array)
-query error DataFusion error: type_coercion\ncaused by\nError during planning:
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning:
Interval\(MonthDayNano\) \- Date32 can't be evaluated because there isn't a
common type to coerce the types to
select '1 month'::interval - d from t;
-query error DataFusion error: type_coercion\ncaused by\nError during planning:
interval can't subtract timestamp/date
+query error DataFusion error: type_coercion\ncaused by\nError during planning:
Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated
because there isn't a common type to coerce the types to
select '1 month'::interval - ts from t;
statement ok
diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
index e1102d626c..42a5f8e3a4 100644
--- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
@@ -1014,7 +1014,7 @@ SELECT ts1 + i FROM foo;
2003-07-12T01:31:15.000123463
# Timestamp + Timestamp => error
-statement error DataFusion error: Error during planning: Unsupported argument
types\. Can not evaluate Timestamp\(Nanosecond, None\) \+
Timestamp\(Nanosecond, None\)
+query error DataFusion error: type_coercion\ncaused by\nInternal error:
Unsupported operation Plus between Timestamp\(Nanosecond, None\) and
Timestamp\(Nanosecond, None\)\. This was likely caused by a bug in DataFusion's
code and we would welcome that you file an bug report in our issue tracker
SELECT ts1 + ts2
FROM foo;
@@ -1031,7 +1031,7 @@ SELECT '2000-01-01T00:00:00'::timestamp -
'2010-01-01T00:00:00'::timestamp;
0 years 0 mons -3653 days 0 hours 0 mins 0.000000000 secs
# Interval - Timestamp => error
-statement error DataFusion error: type_coercion\ncaused by\nError during
planning: interval can't subtract timestamp/date
+statement error DataFusion error: type_coercion\ncaused by\nError during
planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be
evaluated because there isn't a common type to coerce the types to
SELECT i - ts1 from FOO;
statement ok
diff --git a/datafusion/expr/src/type_coercion/binary.rs
b/datafusion/expr/src/type_coercion/binary.rs
index 006b56c7fe..f01b06e0db 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -19,17 +19,70 @@
use arrow::compute::can_cast_types;
use arrow::datatypes::{
- DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION,
DECIMAL128_MAX_SCALE,
+ DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
};
use datafusion_common::DataFusionError;
use datafusion_common::Result;
-use crate::type_coercion::{
- is_datetime, is_decimal, is_interval, is_numeric, is_timestamp,
-};
+use crate::type_coercion::{is_datetime, is_decimal, is_interval, is_numeric};
use crate::Operator;
+/// Returns the result type of applying mathematics operations such as
+/// `+` to arguments of `lhs_type` and `rhs_type`.
+fn mathematics_temporal_result_type(
+ lhs_type: &DataType,
+ rhs_type: &DataType,
+) -> Option<DataType> {
+ use arrow::datatypes::DataType::*;
+ use arrow::datatypes::IntervalUnit::*;
+ use arrow::datatypes::TimeUnit::*;
+
+ if !is_interval(lhs_type)
+ && !is_interval(rhs_type)
+ && !is_datetime(lhs_type)
+ && !is_datetime(rhs_type)
+ {
+ return None;
+ };
+
+ match (lhs_type, rhs_type) {
+ // datetime +/- interval
+ (Interval(_), Timestamp(_, _)) => Some(rhs_type.clone()),
+ (Timestamp(_, _), Interval(_)) => Some(lhs_type.clone()),
+ (Interval(_), Date32) => Some(rhs_type.clone()),
+ (Date32, Interval(_)) => Some(lhs_type.clone()),
+ (Interval(_), Date64) => Some(rhs_type.clone()),
+ (Date64, Interval(_)) => Some(lhs_type.clone()),
+ // interval +/-
+ (Interval(YearMonth), Interval(YearMonth)) =>
Some(Interval(YearMonth)),
+ (Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)),
+ (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
+ // timestamp - timestamp
+ (Timestamp(Second, _), Timestamp(Second, _))
+ | (Timestamp(Millisecond, _), Timestamp(Millisecond, _)) => {
+ Some(Interval(DayTime))
+ }
+ (Timestamp(Microsecond, _), Timestamp(Microsecond, _))
+ | (Timestamp(Nanosecond, _), Timestamp(Nanosecond, _)) => {
+ Some(Interval(MonthDayNano))
+ }
+ (Timestamp(_, _), Timestamp(_, _)) => None,
+ // TODO: date minus date
+ // date - timestamp, timestamp - date
+ (Date32, Timestamp(_, _))
+ | (Timestamp(_, _), Date32)
+ | (Date64, Timestamp(_, _))
+ | (Timestamp(_, _), Date64) => {
+ // TODO: make get_result_type must after coerce type.
+ // if type isn't coerced, we need get common type, and then get
result type.
+ let common_type = temporal_coercion(lhs_type, rhs_type);
+ common_type.and_then(|t| mathematics_temporal_result_type(&t, &t))
+ }
+ _ => None,
+ }
+}
+
/// returns the resulting type of a binary expression evaluating the `op` with
the left and right hand types
pub fn get_result_type(
lhs_type: &DataType,
@@ -67,12 +120,12 @@ pub fn get_result_type(
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom => Some(DataType::Boolean),
Operator::Plus | Operator::Minus
- if is_datetime(lhs_type)
- || is_datetime(rhs_type)
- || is_interval(lhs_type)
- || is_interval(rhs_type) =>
+ if is_datetime(lhs_type) && is_datetime(rhs_type)
+ || (is_interval(lhs_type) && is_interval(rhs_type))
+ || (is_datetime(lhs_type) && is_interval(rhs_type))
+ || (is_interval(lhs_type) && is_datetime(rhs_type)) =>
{
- temporal_add_sub_coercion(lhs_type, rhs_type, op)
+ mathematics_temporal_result_type(lhs_type, rhs_type)
}
// following same with `coerce_types`
Operator::BitwiseAnd
@@ -126,19 +179,13 @@ pub fn coerce_types(
| Operator::LtEq
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom => comparison_coercion(lhs_type,
rhs_type),
- // interval - timestamp is an erroneous case, cannot coerce a type
Operator::Plus | Operator::Minus
- if is_datetime(lhs_type)
- || is_datetime(rhs_type)
- || is_interval(lhs_type)
- || is_interval(rhs_type) =>
+ if is_interval(lhs_type) && is_interval(rhs_type) =>
{
- if is_interval(lhs_type) && is_datetime(rhs_type) && *op ==
Operator::Minus {
- return Err(DataFusionError::Plan(
- "interval can't subtract timestamp/date".to_string(),
- ));
- }
- temporal_add_sub_coercion(lhs_type, rhs_type, op)
+ temporal_coercion(lhs_type, rhs_type)
+ }
+ Operator::Minus if is_datetime(lhs_type) && is_datetime(rhs_type) => {
+ temporal_coercion(lhs_type, rhs_type)
}
// for math expressions, the final value of the coercion is also the
return type
// because coercion favours higher information types
@@ -208,7 +255,10 @@ pub fn math_decimal_coercion(
/// Returns the output type of applying bitwise operations such as
/// `&`, `|`, or `xor`to arguments of `lhs_type` and `rhs_type`.
-fn bitwise_coercion(left_type: &DataType, right_type: &DataType) ->
Option<DataType> {
+pub(crate) fn bitwise_coercion(
+ left_type: &DataType,
+ right_type: &DataType,
+) -> Option<DataType> {
use arrow::datatypes::DataType::*;
if !both_numeric_or_null_and_numeric(left_type, right_type) {
@@ -260,80 +310,6 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<D
.or_else(|| string_numeric_coercion(lhs_type, rhs_type))
}
-// This function performs temporal coercion between the two input data types
and the provided operator.
-// It returns None (it will convert a Err outside) if the operands are an
unsupported/wrong operation.
-// If the coercion is possible, it returns a new data type as Some(DataType).
-pub fn temporal_add_sub_coercion(
- lhs_type: &DataType,
- rhs_type: &DataType,
- op: &Operator,
-) -> Option<DataType> {
- match (lhs_type, rhs_type, op) {
- // if an interval is being added/subtracted from a date/timestamp,
return the date/timestamp data type
- (lhs, rhs, _) if is_interval(lhs) && is_datetime(rhs) =>
Some(rhs.clone()),
- (lhs, rhs, _) if is_interval(rhs) && is_datetime(lhs) =>
Some(lhs.clone()),
- // if two timestamps are being subtracted, check their time units and
return the corresponding interval data type
- (lhs, rhs, Operator::Minus) if is_timestamp(lhs) && is_timestamp(rhs)
=> {
- handle_timestamp_minus(lhs, rhs)
- }
- // if two intervals are being added/subtracted, check their interval
units and return the corresponding interval data type
- (lhs, rhs, _) if is_interval(lhs) && is_interval(rhs) => {
- handle_interval_addition(lhs, rhs)
- }
- (lhs, rhs, Operator::Minus) if is_datetime(lhs) && is_datetime(rhs) =>
{
- temporal_coercion(lhs, rhs)
- }
- // return None if no coercion is possible
- _ => None,
- }
-}
-
-// This function checks if two interval data types have the same interval unit
and returns an interval data type
-// representing the sum of them. If the two interval data types have different
units, it returns an interval data type
-// with "IntervalUnit::MonthDayNano". If the two interval data types are
already "IntervalUnit::YearMonth" or "IntervalUnit::DayTime",
-// it returns an interval data type with the same unit as the operands.
-fn handle_interval_addition(lhs: &DataType, rhs: &DataType) ->
Option<DataType> {
- match (lhs, rhs) {
- // operation with the same types
- (
- DataType::Interval(IntervalUnit::YearMonth),
- DataType::Interval(IntervalUnit::YearMonth),
- ) => Some(DataType::Interval(IntervalUnit::YearMonth)),
- (
- DataType::Interval(IntervalUnit::DayTime),
- DataType::Interval(IntervalUnit::DayTime),
- ) => Some(DataType::Interval(IntervalUnit::DayTime)),
- // operation with MonthDayNano's or different types
- (_, _) => Some(DataType::Interval(IntervalUnit::MonthDayNano)),
- }
-}
-
-// This function checks if two timestamp data types have the same time unit
and returns an interval data type
-// representing the difference between them, either "IntervalUnit::DayTime" if
the time unit is second or millisecond,
-// or "IntervalUnit::MonthDayNano" if the time unit is microsecond or
nanosecond. If the two timestamp data types have
-// different time units, it returns an error indicating that "The timestamps
have different types".
-fn handle_timestamp_minus(lhs: &DataType, rhs: &DataType) -> Option<DataType> {
- match (lhs, rhs) {
- (
- DataType::Timestamp(TimeUnit::Second, _),
- DataType::Timestamp(TimeUnit::Second, _),
- )
- | (
- DataType::Timestamp(TimeUnit::Millisecond, _),
- DataType::Timestamp(TimeUnit::Millisecond, _),
- ) => Some(DataType::Interval(IntervalUnit::DayTime)),
- (
- DataType::Timestamp(TimeUnit::Microsecond, _),
- DataType::Timestamp(TimeUnit::Microsecond, _),
- )
- | (
- DataType::Timestamp(TimeUnit::Nanosecond, _),
- DataType::Timestamp(TimeUnit::Nanosecond, _),
- ) => Some(DataType::Interval(IntervalUnit::MonthDayNano)),
- (_, _) => None,
- }
-}
-
/// Returns the output type of applying numeric operations such as `=`
/// to arguments `lhs_type` and `rhs_type` if one is numeric and one
/// is `Utf8`/`LargeUtf8`.
@@ -777,9 +753,15 @@ fn is_time_with_valid_unit(datatype: DataType) -> bool {
/// Coercion rules for Temporal columns: the type that both lhs and rhs can be
/// casted to for the purpose of a date computation
+/// For interval arithmetic, it doesn't handle datetime type +/- interval
fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
use arrow::datatypes::DataType::*;
+ use arrow::datatypes::IntervalUnit::*;
match (lhs_type, rhs_type) {
+ // interval +/-
+ (Interval(YearMonth), Interval(YearMonth)) =>
Some(Interval(YearMonth)),
+ (Interval(DayTime), Interval(DayTime)) => Some(Interval(DayTime)),
+ (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)),
(Date64, Date32) | (Date32, Date64) => Some(Date64),
(Utf8, Date32) | (Date32, Utf8) => Some(Date32),
(Utf8, Date64) | (Date64, Utf8) => Some(Date64),
@@ -1055,14 +1037,12 @@ mod tests {
#[test]
fn test_date_timestamp_arithmetic_error() -> Result<()> {
- let err = coerce_types(
+ let common_type = coerce_types(
&DataType::Timestamp(TimeUnit::Nanosecond, None),
&Operator::Minus,
&DataType::Timestamp(TimeUnit::Millisecond, None),
- )
- .unwrap_err()
- .to_string();
- assert_contains!(&err, "Timestamp(Nanosecond, None) -
Timestamp(Millisecond, None) can't be evaluated because there isn't a common
type to coerce the types to");
+ )?;
+ assert_eq!(common_type.to_string(), "Timestamp(Millisecond, None)");
let err = coerce_types(&DataType::Date32, &Operator::Plus,
&DataType::Date64)
.unwrap_err()