This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 01d00fdaa Implement cast between Date and Timestamp (#4726)
01d00fdaa is described below
commit 01d00fdaa8baa8007cc5f118d875257bb990f162
Author: comphead <[email protected]>
AuthorDate: Mon Dec 26 13:18:26 2022 -0800
Implement cast between Date and Timestamp (#4726)
* Date to timestamp conversion
* bit of cleaning
* fmt
---
datafusion/core/tests/sql/timestamp.rs | 65 ++++++++++++++++++++++++++
datafusion/expr/src/type_coercion/binary.rs | 2 +
datafusion/expr/src/type_coercion/functions.rs | 27 +++++++++++
datafusion/physical-expr/src/planner.rs | 14 ++++--
4 files changed, 105 insertions(+), 3 deletions(-)
diff --git a/datafusion/core/tests/sql/timestamp.rs
b/datafusion/core/tests/sql/timestamp.rs
index cb70ab2d0..33ef1e51b 100644
--- a/datafusion/core/tests/sql/timestamp.rs
+++ b/datafusion/core/tests/sql/timestamp.rs
@@ -1679,3 +1679,68 @@ async fn test_current_time() -> Result<()> {
assert_batches_eq!(expected, &results);
Ok(())
}
+
+#[tokio::test]
+async fn test_ts_dt_binary_ops() -> Result<()> {
+ let ctx = SessionContext::new();
+
+ // test cast in where clause
+ let sql =
+ "select count(1) result from (select now() as n) a where n =
'2000-01-01'::date";
+ let results = execute_to_batches(&ctx, sql).await;
+
+ let expected = vec![
+ "+--------+",
+ "| result |",
+ "+--------+",
+ "| 0 |",
+ "+--------+",
+ ];
+
+ assert_batches_eq!(expected, &results);
+
+ // test cast in where ge clause
+ let sql =
+ "select count(1) result from (select now() as n) a where n >=
'2000-01-01'::date";
+ let results = execute_to_batches(&ctx, sql).await;
+
+ let expected = vec![
+ "+--------+",
+ "| result |",
+ "+--------+",
+ "| 1 |",
+ "+--------+",
+ ];
+
+ assert_batches_eq!(expected, &results);
+
+ // test cast in equal select
+ let sql = "select now() = '2000-01-01'::date as result";
+ let results = execute_to_batches(&ctx, sql).await;
+
+ let expected = vec![
+ "+--------+",
+ "| result |",
+ "+--------+",
+ "| false |",
+ "+--------+",
+ ];
+
+ assert_batches_eq!(expected, &results);
+
+ // test cast in gt select
+ let sql = "select now() >= '2000-01-01'::date as result";
+ let results = execute_to_batches(&ctx, sql).await;
+
+ let expected = vec![
+ "+--------+",
+ "| result |",
+ "+--------+",
+ "| true |",
+ "+--------+",
+ ];
+
+ assert_batches_eq!(expected, &results);
+
+ Ok(())
+}
diff --git a/datafusion/expr/src/type_coercion/binary.rs
b/datafusion/expr/src/type_coercion/binary.rs
index 8105b6d24..60d214285 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -575,6 +575,8 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<DataTyp
},
(Timestamp(_, tz), Utf8) => Some(Timestamp(TimeUnit::Nanosecond,
tz.clone())),
(Utf8, Timestamp(_, tz)) => Some(Timestamp(TimeUnit::Nanosecond,
tz.clone())),
+ (Timestamp(_, _), Date32) => Some(Date32),
+ (Timestamp(_, _), Date64) => Some(Date64),
(Timestamp(lhs_unit, lhs_tz), Timestamp(rhs_unit, rhs_tz)) => {
let tz = match (lhs_tz, rhs_tz) {
// can't cast across timezones
diff --git a/datafusion/expr/src/type_coercion/functions.rs
b/datafusion/expr/src/type_coercion/functions.rs
index c20a93b99..b4a54fdcc 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -131,6 +131,10 @@ fn maybe_data_types(
/// See the module level documentation for more detail on coercion.
pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
use self::DataType::*;
+
+ if type_into == type_from {
+ return true;
+ }
// Null can convert to most of types
match type_into {
Int8 => matches!(type_from, Null | Int8),
@@ -173,12 +177,35 @@ pub fn can_coerce_from(type_into: &DataType, type_from:
&DataType) -> bool {
Timestamp(TimeUnit::Nanosecond, None) => {
matches!(type_from, Null | Timestamp(_, None))
}
+ Date32 => {
+ matches!(type_from, Null | Timestamp(_, None))
+ }
Utf8 | LargeUtf8 => true,
Null => can_cast_types(type_from, type_into),
_ => false,
}
}
+/// Returns a common coerced datatype between 2 given datatypes
+///
+/// See the module level documentation for more detail on coercion.
+pub fn get_common_coerced_type(
+ left_datatype: DataType,
+ right_datatype: DataType,
+) -> Result<DataType> {
+ if left_datatype == right_datatype || can_coerce_from(&left_datatype,
&right_datatype)
+ {
+ Ok(left_datatype)
+ } else if can_coerce_from(&right_datatype, &left_datatype) {
+ Ok(right_datatype)
+ } else {
+ Err(DataFusionError::Plan(format!(
+ "Datatypes cannot be casted into common type {:?} <-> {:?}",
+ left_datatype, right_datatype
+ )))
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
diff --git a/datafusion/physical-expr/src/planner.rs
b/datafusion/physical-expr/src/planner.rs
index d5e2c05e7..04acf5322 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -28,6 +28,7 @@ use crate::{
use arrow::datatypes::{DataType, Schema};
use datafusion_common::{DFSchema, DataFusionError, Result, ScalarValue};
use datafusion_expr::expr::Cast;
+use datafusion_expr::type_coercion::functions::get_common_coerced_type;
use datafusion_expr::{
binary_expr, Between, BinaryExpr, Expr, GetIndexedField, Like, Operator,
TryCast,
};
@@ -198,9 +199,16 @@ pub fn create_physical_expr(
input_schema,
)?)),
_ => {
- // assume that we can coerce both sides into a common type
- // and then perform a binary operation
- binary(lhs, *op, rhs, input_schema)
+ let target_datatype = get_common_coerced_type(
+ lhs.data_type(input_schema)?,
+ rhs.data_type(input_schema)?,
+ )?;
+ binary(
+ expressions::cast(lhs, input_schema,
target_datatype.clone())?,
+ *op,
+ expressions::cast(rhs, input_schema, target_datatype)?,
+ input_schema,
+ )
}
}
}