This is an automated email from the ASF dual-hosted git repository.
berkay pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new d19865ee80 [minor] overload from_unixtime func to have optional
timezone parameter (#13130)
d19865ee80 is described below
commit d19865ee80ee6434694b8dc9cd00c8cae52c6fc7
Author: Burak Şen <[email protected]>
AuthorDate: Mon Nov 4 14:49:14 2024 +0300
[minor] overload from_unixtime func to have optional timezone parameter
(#13130)
* overloaded from ts
* Update docs/source/user-guide/sql/scalar_functions_new.md
Co-authored-by: Bruce Ritchie <[email protected]>
* fixed return type
* added sql example
* optional in ∂oc
* review
---------
Co-authored-by: Bruce Ritchie <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/functions/src/datetime/from_unixtime.rs | 132 +++++++++++++++++----
datafusion/sqllogictest/test_files/timestamps.slt | 23 ++++
docs/source/user-guide/sql/scalar_functions.md | 16 ++-
3 files changed, 148 insertions(+), 23 deletions(-)
diff --git a/datafusion/functions/src/datetime/from_unixtime.rs
b/datafusion/functions/src/datetime/from_unixtime.rs
index 84aa9feec6..ed9858106c 100644
--- a/datafusion/functions/src/datetime/from_unixtime.rs
+++ b/datafusion/functions/src/datetime/from_unixtime.rs
@@ -15,16 +15,17 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::datatypes::DataType;
-use arrow::datatypes::DataType::{Int64, Timestamp};
-use arrow::datatypes::TimeUnit::Second;
use std::any::Any;
-use std::sync::OnceLock;
+use std::sync::{Arc, OnceLock};
-use datafusion_common::{exec_err, Result};
+use arrow::datatypes::DataType;
+use arrow::datatypes::DataType::{Int64, Timestamp, Utf8};
+use arrow::datatypes::TimeUnit::Second;
+use datafusion_common::{exec_err, internal_err, ExprSchema, Result,
ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
+use datafusion_expr::TypeSignature::Exact;
use datafusion_expr::{
- ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
};
#[derive(Debug)]
@@ -41,7 +42,10 @@ impl Default for FromUnixtimeFunc {
impl FromUnixtimeFunc {
pub fn new() -> Self {
Self {
- signature: Signature::uniform(1, vec![Int64],
Volatility::Immutable),
+ signature: Signature::one_of(
+ vec![Exact(vec![Int64, Utf8]), Exact(vec![Int64])],
+ Volatility::Immutable,
+ ),
}
}
}
@@ -59,28 +63,63 @@ impl ScalarUDFImpl for FromUnixtimeFunc {
&self.signature
}
+ fn return_type_from_exprs(
+ &self,
+ args: &[Expr],
+ _schema: &dyn ExprSchema,
+ arg_types: &[DataType],
+ ) -> Result<DataType> {
+ match arg_types.len() {
+ 1 => Ok(Timestamp(Second, None)),
+ 2 => match &args[1] {
+ Expr::Literal(ScalarValue::Utf8(Some(tz))) =>
Ok(Timestamp(Second, Some(Arc::from(tz.to_string())))),
+ _ => exec_err!(
+ "Second argument for `from_unixtime` must be non-null
utf8, received {:?}",
+ arg_types[1]),
+ },
+ _ => exec_err!(
+ "from_unixtime function requires 1 or 2 arguments, got {}",
+ arg_types.len()
+ ),
+ }
+ }
+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
- Ok(Timestamp(Second, None))
+ internal_err!("call return_type_from_exprs instead")
}
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
- if args.len() != 1 {
+ let len = args.len();
+ if len != 1 && len != 2 {
return exec_err!(
- "from_unixtime function requires 1 argument, got {}",
+ "from_unixtime function requires 1 or 2 argument, got {}",
args.len()
);
}
- match args[0].data_type() {
- Int64 => args[0].cast_to(&Timestamp(Second, None), None),
- other => {
- exec_err!(
- "Unsupported data type {:?} for function from_unixtime",
- other
- )
- }
+ if args[0].data_type() != Int64 {
+ return exec_err!(
+ "Unsupported data type {:?} for function from_unixtime",
+ args[0].data_type()
+ );
+ }
+
+ match len {
+ 1 => args[0].cast_to(&Timestamp(Second, None), None),
+ 2 => match &args[1] {
+ ColumnarValue::Scalar(ScalarValue::Utf8(Some(tz))) => args[0]
+ .cast_to(&Timestamp(Second,
Some(Arc::from(tz.to_string()))), None),
+ _ => {
+ exec_err!(
+ "Unsupported data type {:?} for function
from_unixtime",
+ args[1].data_type()
+ )
+ }
+ },
+ _ => unreachable!(),
}
}
+
fn documentation(&self) -> Option<&Documentation> {
Some(get_from_unixtime_doc())
}
@@ -93,12 +132,63 @@ fn get_from_unixtime_doc() -> &'static Documentation {
Documentation::builder()
.with_doc_section(DOC_SECTION_DATETIME)
.with_description("Converts an integer to RFC3339 timestamp format
(`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are
interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return
the corresponding timestamp.")
- .with_syntax_example("from_unixtime(expression)")
+ .with_syntax_example("from_unixtime(expression[, timezone])")
+ .with_standard_argument("expression", None)
.with_argument(
- "expression",
- "Expression to operate on. Can be a constant, column, or
function, and any combination of arithmetic operators."
+ "timezone",
+ "Optional timezone to use when converting the integer to a
timestamp. If not provided, the default timezone is UTC.",
)
+ .with_sql_example(r#"```sql
+> select from_unixtime(1599572549, 'America/New_York');
++-----------------------------------------------------------+
+| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
++-----------------------------------------------------------+
+| 2020-09-08T09:42:29-04:00 |
++-----------------------------------------------------------+
+```"#)
.build()
.unwrap()
})
}
+
+#[cfg(test)]
+mod test {
+ use crate::datetime::from_unixtime::FromUnixtimeFunc;
+ use datafusion_common::ScalarValue;
+ use datafusion_common::ScalarValue::Int64;
+ use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+
+ #[test]
+ fn test_without_timezone() {
+ let args = [ColumnarValue::Scalar(Int64(Some(1729900800)))];
+
+ let result = FromUnixtimeFunc::new().invoke(&args).unwrap();
+
+ match result {
+ ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec),
None)) => {
+ assert_eq!(sec, 1729900800);
+ }
+ _ => panic!("Expected scalar value"),
+ }
+ }
+
+ #[test]
+ fn test_with_timezone() {
+ let args = [
+ ColumnarValue::Scalar(Int64(Some(1729900800))),
+ ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+ "America/New_York".to_string(),
+ ))),
+ ];
+
+ let result = FromUnixtimeFunc::new().invoke(&args).unwrap();
+
+ match result {
+ ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec),
Some(tz))) => {
+ assert_eq!(sec, 1729900800);
+ assert_eq!(tz.to_string(), "America/New_York");
+ }
+ _ => panic!("Expected scalar value"),
+ }
+ }
+}
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt
b/datafusion/sqllogictest/test_files/timestamps.slt
index a09a63a791..42abeff674 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -308,6 +308,29 @@ SELECT from_unixtime(ts / 1000000000) FROM ts_data LIMIT 3;
2020-09-08T12:42:29
2020-09-08T11:42:29
+# from_unixtime single
+
+query P
+SELECT from_unixtime(1599572549190855123 / 1000000000, 'America/New_York');
+----
+2020-09-08T09:42:29-04:00
+
+# from_unixtime with timezone
+query P
+SELECT from_unixtime(ts / 1000000000, 'Asia/Istanbul') FROM ts_data LIMIT 3;
+----
+2020-09-08T16:42:29+03:00
+2020-09-08T15:42:29+03:00
+2020-09-08T14:42:29+03:00
+
+# from_unixtime with utc timezone
+query P
+SELECT from_unixtime(ts / 1000000000, 'UTC') FROM ts_data LIMIT 3;
+----
+2020-09-08T13:42:29Z
+2020-09-08T12:42:29Z
+2020-09-08T11:42:29Z
+
# to_timestamp
query I
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index 98c44cbd98..b92b815d7c 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -2037,12 +2037,24 @@ _Alias of [date_trunc](#date_trunc)._
Converts an integer to RFC3339 timestamp format
(`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are
interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return
the corresponding timestamp.
```
-from_unixtime(expression)
+from_unixtime(expression[, timezone])
```
#### Arguments
-- **expression**: Expression to operate on. Can be a constant, column, or
function, and any combination of arithmetic operators.
+- **expression**: The expression to operate on. Can be a constant, column, or
function, and any combination of operators.
+- **timezone**: Optional timezone to use when converting the integer to a
timestamp. If not provided, the default timezone is UTC.
+
+#### Example
+
+```sql
+> select from_unixtime(1599572549, 'America/New_York');
++-----------------------------------------------------------+
+| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
++-----------------------------------------------------------+
+| 2020-09-08T09:42:29-04:00 |
++-----------------------------------------------------------+
+```
### `make_date`
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]