This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 5c02664674 Double type argument for to_timestamp function (#8159)
5c02664674 is described below
commit 5c02664674215f6a012901ec976860544189d265
Author: Seth Paydar <[email protected]>
AuthorDate: Thu Nov 30 14:45:58 2023 -0800
Double type argument for to_timestamp function (#8159)
* feat: test queries for to_timestamp(float) WIP
* feat: Float64 input for to_timestamp
* cargo fmt
* clippy
* docs: double input type for to_timestamp
* feat: cast floats to timestamp
* style: cargo fmt
* fix: float64 cast for timestamp nanos only
---
datafusion/expr/src/built_in_function.rs | 1 +
.../physical-expr/src/datetime_expressions.rs | 5 ++++
datafusion/physical-expr/src/expressions/cast.rs | 20 +++++++++++++--
datafusion/sqllogictest/test_files/timestamps.slt | 29 ++++++++++++++++++++++
docs/source/user-guide/sql/scalar_functions.md | 4 +--
5 files changed, 55 insertions(+), 4 deletions(-)
diff --git a/datafusion/expr/src/built_in_function.rs
b/datafusion/expr/src/built_in_function.rs
index a51941fdee..d48e9e7a67 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -1023,6 +1023,7 @@ impl BuiltinScalarFunction {
1,
vec![
Int64,
+ Float64,
Timestamp(Nanosecond, None),
Timestamp(Microsecond, None),
Timestamp(Millisecond, None),
diff --git a/datafusion/physical-expr/src/datetime_expressions.rs
b/datafusion/physical-expr/src/datetime_expressions.rs
index 0d42708c97..bc0385cd89 100644
--- a/datafusion/physical-expr/src/datetime_expressions.rs
+++ b/datafusion/physical-expr/src/datetime_expressions.rs
@@ -971,6 +971,11 @@ pub fn to_timestamp_invoke(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
&DataType::Timestamp(TimeUnit::Nanosecond, None),
None,
),
+ DataType::Float64 => cast_column(
+ &args[0],
+ &DataType::Timestamp(TimeUnit::Nanosecond, None),
+ None,
+ ),
DataType::Timestamp(_, None) => cast_column(
&args[0],
&DataType::Timestamp(TimeUnit::Nanosecond, None),
diff --git a/datafusion/physical-expr/src/expressions/cast.rs
b/datafusion/physical-expr/src/expressions/cast.rs
index b718b5017c..b3ca95292a 100644
--- a/datafusion/physical-expr/src/expressions/cast.rs
+++ b/datafusion/physical-expr/src/expressions/cast.rs
@@ -176,7 +176,20 @@ pub fn cast_column(
kernels::cast::cast_with_options(array, cast_type, &cast_options)?,
)),
ColumnarValue::Scalar(scalar) => {
- let scalar_array = scalar.to_array()?;
+ let scalar_array = if cast_type
+ == &DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond,
None)
+ {
+ if let ScalarValue::Float64(Some(float_ts)) = scalar {
+ ScalarValue::Int64(
+ Some((float_ts * 1_000_000_000_f64).trunc() as i64),
+ )
+ .to_array()?
+ } else {
+ scalar.to_array()?
+ }
+ } else {
+ scalar.to_array()?
+ };
let cast_array = kernels::cast::cast_with_options(
&scalar_array,
cast_type,
@@ -201,7 +214,10 @@ pub fn cast_with_options(
let expr_type = expr.data_type(input_schema)?;
if expr_type == cast_type {
Ok(expr.clone())
- } else if can_cast_types(&expr_type, &cast_type) {
+ } else if can_cast_types(&expr_type, &cast_type)
+ || (expr_type == DataType::Float64
+ && cast_type ==
DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None))
+ {
Ok(Arc::new(CastExpr::new(expr, cast_type, cast_options)))
} else {
not_impl_err!("Unsupported CAST from {expr_type:?} to {cast_type:?}")
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt
b/datafusion/sqllogictest/test_files/timestamps.slt
index 3830d8f868..71b6ddf33f 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -291,6 +291,35 @@ SELECT COUNT(*) FROM ts_data_secs where ts >
to_timestamp_seconds('2020-09-08T12
----
2
+
+# to_timestamp float inputs
+
+query PPP
+SELECT to_timestamp(1.1) as c1, cast(1.1 as timestamp) as c2, 1.1::timestamp
as c3;
+----
+1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 1970-01-01T00:00:01.100
+
+query PPP
+SELECT to_timestamp(-1.1) as c1, cast(-1.1 as timestamp) as c2,
(-1.1)::timestamp as c3;
+----
+1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 1969-12-31T23:59:58.900
+
+query PPP
+SELECT to_timestamp(0.0) as c1, cast(0.0 as timestamp) as c2, 0.0::timestamp
as c3;
+----
+1970-01-01T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00
+
+query PPP
+SELECT to_timestamp(1.23456789) as c1, cast(1.23456789 as timestamp) as c2,
1.23456789::timestamp as c3;
+----
+1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890
1970-01-01T00:00:01.234567890
+
+query PPP
+SELECT to_timestamp(123456789.123456789) as c1, cast(123456789.123456789 as
timestamp) as c2, 123456789.123456789::timestamp as c3;
+----
+1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784
1973-11-29T21:33:09.123456784
+
+
# from_unixtime
# 1599566400 is '2020-09-08T12:00:00+00:00'
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index c0889d94db..49e850ba90 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1457,9 +1457,9 @@ extract(field FROM source)
### `to_timestamp`
Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`).
-Supports strings, integer, and unsigned integer types as input.
+Supports strings, integer, unsigned integer, and double types as input.
Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
-Integers and unsigned integers are interpreted as seconds since the unix epoch
(`1970-01-01T00:00:00Z`)
+Integers, unsigned integers, and doubles are interpreted as seconds since the
unix epoch (`1970-01-01T00:00:00Z`)
return the corresponding timestamp.
```