This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 2071259e23 Fixing issues with for timestamp literals (#8193)
2071259e23 is described below
commit 2071259e23f75d94678cc0a54bad154d3748b8cb
Author: comphead <[email protected]>
AuthorDate: Sun Nov 26 14:28:01 2023 -0800
Fixing issues with for timestamp literals (#8193)
Fixing issue for timestamp literals
---
datafusion/common/src/scalar.rs | 12 +++++
datafusion/core/tests/sql/timestamp.rs | 2 +-
datafusion/expr/src/built_in_function.rs | 7 +--
.../physical-expr/src/datetime_expressions.rs | 8 ++--
.../physical-expr/src/expressions/negative.rs | 7 ++-
datafusion/sql/src/expr/mod.rs | 30 +++++++++----
datafusion/sql/tests/sql_integration.rs | 4 +-
datafusion/sqllogictest/test_files/timestamps.slt | 52 +++++++++++++++++++---
datafusion/sqllogictest/test_files/window.slt | 16 +++----
9 files changed, 103 insertions(+), 35 deletions(-)
diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index ffa8ab50f8..3431d71468 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -983,6 +983,18 @@ impl ScalarValue {
ScalarValue::Decimal256(Some(v), precision, scale) => Ok(
ScalarValue::Decimal256(Some(v.neg_wrapping()), *precision,
*scale),
),
+ ScalarValue::TimestampSecond(Some(v), tz) => {
+ Ok(ScalarValue::TimestampSecond(Some(-v), tz.clone()))
+ }
+ ScalarValue::TimestampNanosecond(Some(v), tz) => {
+ Ok(ScalarValue::TimestampNanosecond(Some(-v), tz.clone()))
+ }
+ ScalarValue::TimestampMicrosecond(Some(v), tz) => {
+ Ok(ScalarValue::TimestampMicrosecond(Some(-v), tz.clone()))
+ }
+ ScalarValue::TimestampMillisecond(Some(v), tz) => {
+ Ok(ScalarValue::TimestampMillisecond(Some(-v), tz.clone()))
+ }
value => _internal_err!(
"Can not run arithmetic negative on scalar value {value:?}"
),
diff --git a/datafusion/core/tests/sql/timestamp.rs
b/datafusion/core/tests/sql/timestamp.rs
index a18e6831b6..ada66503a1 100644
--- a/datafusion/core/tests/sql/timestamp.rs
+++ b/datafusion/core/tests/sql/timestamp.rs
@@ -742,7 +742,7 @@ async fn test_arrow_typeof() -> Result<()> {
"+-----------------------------------------------------------------------+",
"|
arrow_typeof(date_trunc(Utf8(\"microsecond\"),to_timestamp(Int64(61)))) |",
"+-----------------------------------------------------------------------+",
- "| Timestamp(Second, None)
|",
+ "| Timestamp(Nanosecond, None)
|",
"+-----------------------------------------------------------------------+",
];
assert_batches_eq!(expected, &actual);
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index d920675016..c511c752b4 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -779,13 +779,10 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::SubstrIndex => {
utf8_to_str_type(&input_expr_types[0], "substr_index")
}
- BuiltinScalarFunction::ToTimestamp => Ok(match
&input_expr_types[0] {
- Int64 => Timestamp(Second, None),
- _ => Timestamp(Nanosecond, None),
- }),
+ BuiltinScalarFunction::ToTimestamp
+ | BuiltinScalarFunction::ToTimestampNanos =>
Ok(Timestamp(Nanosecond, None)),
BuiltinScalarFunction::ToTimestampMillis =>
Ok(Timestamp(Millisecond, None)),
BuiltinScalarFunction::ToTimestampMicros =>
Ok(Timestamp(Microsecond, None)),
- BuiltinScalarFunction::ToTimestampNanos =>
Ok(Timestamp(Nanosecond, None)),
BuiltinScalarFunction::ToTimestampSeconds => Ok(Timestamp(Second,
None)),
BuiltinScalarFunction::FromUnixtime => Ok(Timestamp(Second, None)),
BuiltinScalarFunction::Now => {
diff --git a/datafusion/physical-expr/src/datetime_expressions.rs
b/datafusion/physical-expr/src/datetime_expressions.rs
index 5b597de78a..0d42708c97 100644
--- a/datafusion/physical-expr/src/datetime_expressions.rs
+++ b/datafusion/physical-expr/src/datetime_expressions.rs
@@ -966,9 +966,11 @@ pub fn to_timestamp_invoke(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
}
match args[0].data_type() {
- DataType::Int64 => {
- cast_column(&args[0], &DataType::Timestamp(TimeUnit::Second,
None), None)
- }
+ DataType::Int64 => cast_column(
+ &cast_column(&args[0], &DataType::Timestamp(TimeUnit::Second,
None), None)?,
+ &DataType::Timestamp(TimeUnit::Nanosecond, None),
+ None,
+ ),
DataType::Timestamp(_, None) => cast_column(
&args[0],
&DataType::Timestamp(TimeUnit::Nanosecond, None),
diff --git a/datafusion/physical-expr/src/expressions/negative.rs
b/datafusion/physical-expr/src/expressions/negative.rs
index a59fd1ae3f..b64b4a0c86 100644
--- a/datafusion/physical-expr/src/expressions/negative.rs
+++ b/datafusion/physical-expr/src/expressions/negative.rs
@@ -33,7 +33,7 @@ use arrow::{
use datafusion_common::{internal_err, DataFusionError, Result};
use datafusion_expr::interval_arithmetic::Interval;
use datafusion_expr::{
- type_coercion::{is_interval, is_null, is_signed_numeric},
+ type_coercion::{is_interval, is_null, is_signed_numeric, is_timestamp},
ColumnarValue,
};
@@ -160,7 +160,10 @@ pub fn negative(
let data_type = arg.data_type(input_schema)?;
if is_null(&data_type) {
Ok(arg)
- } else if !is_signed_numeric(&data_type) && !is_interval(&data_type) {
+ } else if !is_signed_numeric(&data_type)
+ && !is_interval(&data_type)
+ && !is_timestamp(&data_type)
+ {
internal_err!(
"Can't create negative physical expr for (- '{arg:?}'), the type
of child expr is {data_type}, not signed numeric"
)
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 7fa16ced39..25fe6b6633 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -29,6 +29,7 @@ mod value;
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
use arrow_schema::DataType;
+use arrow_schema::TimeUnit;
use datafusion_common::{
internal_err, not_impl_err, plan_err, Column, DFSchema, DataFusionError,
Result,
ScalarValue,
@@ -224,14 +225,27 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLExpr::Cast {
expr, data_type, ..
- } => Ok(Expr::Cast(Cast::new(
- Box::new(self.sql_expr_to_logical_expr(
- *expr,
- schema,
- planner_context,
- )?),
- self.convert_data_type(&data_type)?,
- ))),
+ } => {
+ let dt = self.convert_data_type(&data_type)?;
+ let expr =
+ self.sql_expr_to_logical_expr(*expr, schema,
planner_context)?;
+
+ // numeric constants are treated as seconds (rather as
nanoseconds)
+ // to align with postgres / duckdb semantics
+ let expr = match &dt {
+ DataType::Timestamp(TimeUnit::Nanosecond, tz)
+ if expr.get_type(schema)? == DataType::Int64 =>
+ {
+ Expr::Cast(Cast::new(
+ Box::new(expr),
+ DataType::Timestamp(TimeUnit::Second, tz.clone()),
+ ))
+ }
+ _ => expr,
+ };
+
+ Ok(Expr::Cast(Cast::new(Box::new(expr), dt)))
+ }
SQLExpr::TryCast {
expr, data_type, ..
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index a56e9a50f0..d5b06bcf81 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -606,11 +606,9 @@ fn select_compound_filter() {
#[test]
fn test_timestamp_filter() {
let sql = "SELECT state FROM person WHERE birth_date < CAST
(158412331400600000 as timestamp)";
-
let expected = "Projection: person.state\
- \n Filter: person.birth_date < CAST(Int64(158412331400600000) AS
Timestamp(Nanosecond, None))\
+ \n Filter: person.birth_date <
CAST(CAST(Int64(158412331400600000) AS Timestamp(Second, None)) AS
Timestamp(Nanosecond, None))\
\n TableScan: person";
-
quick_test(sql, expected);
}
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt
b/datafusion/sqllogictest/test_files/timestamps.slt
index e186aa12f7..3830d8f868 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -1788,8 +1788,50 @@ SELECT TIMESTAMPTZ '2020-01-01 00:00:00Z' = TIMESTAMP
'2020-01-01'
----
true
-# verify to_timestamp edge cases to be in sync with postgresql
-query PPPPP
-SELECT to_timestamp(null), to_timestamp(-62125747200), to_timestamp(0),
to_timestamp(1926632005177), to_timestamp(1926632005)
-----
-NULL 0001-04-25T00:00:00 1970-01-01T00:00:00 +63022-07-16T12:59:37
2031-01-19T23:33:25
+# verify timestamp cast with integer input
+query PPPPPP
+SELECT to_timestamp(null), to_timestamp(0), to_timestamp(1926632005),
to_timestamp(1), to_timestamp(-1), to_timestamp(0-1)
+----
+NULL 1970-01-01T00:00:00 2031-01-19T23:33:25 1970-01-01T00:00:01
1969-12-31T23:59:59 1969-12-31T23:59:59
+
+# verify timestamp syntax stlyes are consistent
+query BBBBBBBBBBBBB
+SELECT to_timestamp(null) is null as c1,
+ null::timestamp is null as c2,
+ cast(null as timestamp) is null as c3,
+ to_timestamp(0) = 0::timestamp as c4,
+ to_timestamp(1926632005) = 1926632005::timestamp as c5,
+ to_timestamp(1) = 1::timestamp as c6,
+ to_timestamp(-1) = -1::timestamp as c7,
+ to_timestamp(0-1) = (0-1)::timestamp as c8,
+ to_timestamp(0) = cast(0 as timestamp) as c9,
+ to_timestamp(1926632005) = cast(1926632005 as timestamp) as c10,
+ to_timestamp(1) = cast(1 as timestamp) as c11,
+ to_timestamp(-1) = cast(-1 as timestamp) as c12,
+ to_timestamp(0-1) = cast(0-1 as timestamp) as c13
+----
+true true true true true true true true true true true true true
+
+# verify timestamp output types
+query TTT
+SELECT arrow_typeof(to_timestamp(1)), arrow_typeof(to_timestamp(null)),
arrow_typeof(to_timestamp('2023-01-10 12:34:56.000'))
+----
+Timestamp(Nanosecond, None) Timestamp(Nanosecond, None) Timestamp(Nanosecond,
None)
+
+# verify timestamp output types using timestamp literal syntax
+query BBBBBB
+SELECT arrow_typeof(to_timestamp(1)) = arrow_typeof(1::timestamp) as c1,
+ arrow_typeof(to_timestamp(null)) = arrow_typeof(null::timestamp) as c2,
+ arrow_typeof(to_timestamp('2023-01-10 12:34:56.000')) =
arrow_typeof('2023-01-10 12:34:56.000'::timestamp) as c3,
+ arrow_typeof(to_timestamp(1)) = arrow_typeof(cast(1 as timestamp)) as
c4,
+ arrow_typeof(to_timestamp(null)) = arrow_typeof(cast(null as
timestamp)) as c5,
+ arrow_typeof(to_timestamp('2023-01-10 12:34:56.000')) =
arrow_typeof(cast('2023-01-10 12:34:56.000' as timestamp)) as c6
+----
+true true true true true true
+
+# known issues. currently overflows (expects default precision to be
microsecond instead of nanoseconds. Work pending)
+#verify extreme values
+#query PPPPPPPP
+#SELECT to_timestamp(-62125747200), to_timestamp(1926632005177),
-62125747200::timestamp, 1926632005177::timestamp, cast(-62125747200 as
timestamp), cast(1926632005177 as timestamp)
+#----
+#0001-04-25T00:00:00 +63022-07-16T12:59:37 0001-04-25T00:00:00
+63022-07-16T12:59:37 0001-04-25T00:00:00 +63022-07-16T12:59:37
diff --git a/datafusion/sqllogictest/test_files/window.slt
b/datafusion/sqllogictest/test_files/window.slt
index 1ef0ba0d10..319c084076 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -895,14 +895,14 @@ SELECT
statement ok
create table temp as values
-(1664264591000000000),
-(1664264592000000000),
-(1664264592000000000),
-(1664264593000000000),
-(1664264594000000000),
-(1664364594000000000),
-(1664464594000000000),
-(1664564594000000000);
+(1664264591),
+(1664264592),
+(1664264592),
+(1664264593),
+(1664264594),
+(1664364594),
+(1664464594),
+(1664564594);
statement ok
create table t as select cast(column1 as timestamp) as ts from temp;