This is an automated email from the ASF dual-hosted git repository.

berkay pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new d19865ee80 [minor] overload from_unixtime func to have optional 
timezone parameter (#13130)
d19865ee80 is described below

commit d19865ee80ee6434694b8dc9cd00c8cae52c6fc7
Author: Burak Şen <[email protected]>
AuthorDate: Mon Nov 4 14:49:14 2024 +0300

    [minor] overload from_unixtime func to have optional timezone parameter 
(#13130)
    
    * overloaded from ts
    
    * Update docs/source/user-guide/sql/scalar_functions_new.md
    
    Co-authored-by: Bruce Ritchie <[email protected]>
    
    * fixed return type
    
    * added sql example
    
    * optional in ∂oc
    
    * review
    
    ---------
    
    Co-authored-by: Bruce Ritchie <[email protected]>
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/functions/src/datetime/from_unixtime.rs | 132 +++++++++++++++++----
 datafusion/sqllogictest/test_files/timestamps.slt  |  23 ++++
 docs/source/user-guide/sql/scalar_functions.md     |  16 ++-
 3 files changed, 148 insertions(+), 23 deletions(-)

diff --git a/datafusion/functions/src/datetime/from_unixtime.rs 
b/datafusion/functions/src/datetime/from_unixtime.rs
index 84aa9feec6..ed9858106c 100644
--- a/datafusion/functions/src/datetime/from_unixtime.rs
+++ b/datafusion/functions/src/datetime/from_unixtime.rs
@@ -15,16 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::DataType;
-use arrow::datatypes::DataType::{Int64, Timestamp};
-use arrow::datatypes::TimeUnit::Second;
 use std::any::Any;
-use std::sync::OnceLock;
+use std::sync::{Arc, OnceLock};
 
-use datafusion_common::{exec_err, Result};
+use arrow::datatypes::DataType;
+use arrow::datatypes::DataType::{Int64, Timestamp, Utf8};
+use arrow::datatypes::TimeUnit::Second;
+use datafusion_common::{exec_err, internal_err, ExprSchema, Result, 
ScalarValue};
 use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
+use datafusion_expr::TypeSignature::Exact;
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
 };
 
 #[derive(Debug)]
@@ -41,7 +42,10 @@ impl Default for FromUnixtimeFunc {
 impl FromUnixtimeFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::uniform(1, vec![Int64], 
Volatility::Immutable),
+            signature: Signature::one_of(
+                vec![Exact(vec![Int64, Utf8]), Exact(vec![Int64])],
+                Volatility::Immutable,
+            ),
         }
     }
 }
@@ -59,28 +63,63 @@ impl ScalarUDFImpl for FromUnixtimeFunc {
         &self.signature
     }
 
+    fn return_type_from_exprs(
+        &self,
+        args: &[Expr],
+        _schema: &dyn ExprSchema,
+        arg_types: &[DataType],
+    ) -> Result<DataType> {
+        match arg_types.len() {
+            1 => Ok(Timestamp(Second, None)),
+            2 => match &args[1] {
+                    Expr::Literal(ScalarValue::Utf8(Some(tz))) => 
Ok(Timestamp(Second, Some(Arc::from(tz.to_string())))),
+                    _ => exec_err!(
+                        "Second argument for `from_unixtime` must be non-null 
utf8, received {:?}",
+                        arg_types[1]),
+            },
+            _ => exec_err!(
+                "from_unixtime function requires 1 or 2 arguments, got {}",
+                arg_types.len()
+            ),
+        }
+    }
+
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(Timestamp(Second, None))
+        internal_err!("call return_type_from_exprs instead")
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
-        if args.len() != 1 {
+        let len = args.len();
+        if len != 1 && len != 2 {
             return exec_err!(
-                "from_unixtime function requires 1 argument, got {}",
+                "from_unixtime function requires 1 or 2 argument, got {}",
                 args.len()
             );
         }
 
-        match args[0].data_type() {
-            Int64 => args[0].cast_to(&Timestamp(Second, None), None),
-            other => {
-                exec_err!(
-                    "Unsupported data type {:?} for function from_unixtime",
-                    other
-                )
-            }
+        if args[0].data_type() != Int64 {
+            return exec_err!(
+                "Unsupported data type {:?} for function from_unixtime",
+                args[0].data_type()
+            );
+        }
+
+        match len {
+            1 => args[0].cast_to(&Timestamp(Second, None), None),
+            2 => match &args[1] {
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some(tz))) => args[0]
+                    .cast_to(&Timestamp(Second, 
Some(Arc::from(tz.to_string()))), None),
+                _ => {
+                    exec_err!(
+                        "Unsupported data type {:?} for function 
from_unixtime",
+                        args[1].data_type()
+                    )
+                }
+            },
+            _ => unreachable!(),
         }
     }
+
     fn documentation(&self) -> Option<&Documentation> {
         Some(get_from_unixtime_doc())
     }
@@ -93,12 +132,63 @@ fn get_from_unixtime_doc() -> &'static Documentation {
         Documentation::builder()
             .with_doc_section(DOC_SECTION_DATETIME)
             .with_description("Converts an integer to RFC3339 timestamp format 
(`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are 
interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return 
the corresponding timestamp.")
-            .with_syntax_example("from_unixtime(expression)")
+            .with_syntax_example("from_unixtime(expression[, timezone])")
+            .with_standard_argument("expression", None)
             .with_argument(
-                "expression",
-                "Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators."
+                "timezone",
+                "Optional timezone to use when converting the integer to a 
timestamp. If not provided, the default timezone is UTC.",
             )
+            .with_sql_example(r#"```sql
+> select from_unixtime(1599572549, 'America/New_York');
++-----------------------------------------------------------+
+| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
++-----------------------------------------------------------+
+| 2020-09-08T09:42:29-04:00                                 |
++-----------------------------------------------------------+
+```"#)
             .build()
             .unwrap()
     })
 }
+
+#[cfg(test)]
+mod test {
+    use crate::datetime::from_unixtime::FromUnixtimeFunc;
+    use datafusion_common::ScalarValue;
+    use datafusion_common::ScalarValue::Int64;
+    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+
+    #[test]
+    fn test_without_timezone() {
+        let args = [ColumnarValue::Scalar(Int64(Some(1729900800)))];
+
+        let result = FromUnixtimeFunc::new().invoke(&args).unwrap();
+
+        match result {
+            ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), 
None)) => {
+                assert_eq!(sec, 1729900800);
+            }
+            _ => panic!("Expected scalar value"),
+        }
+    }
+
+    #[test]
+    fn test_with_timezone() {
+        let args = [
+            ColumnarValue::Scalar(Int64(Some(1729900800))),
+            ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+                "America/New_York".to_string(),
+            ))),
+        ];
+
+        let result = FromUnixtimeFunc::new().invoke(&args).unwrap();
+
+        match result {
+            ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), 
Some(tz))) => {
+                assert_eq!(sec, 1729900800);
+                assert_eq!(tz.to_string(), "America/New_York");
+            }
+            _ => panic!("Expected scalar value"),
+        }
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt 
b/datafusion/sqllogictest/test_files/timestamps.slt
index a09a63a791..42abeff674 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -308,6 +308,29 @@ SELECT from_unixtime(ts / 1000000000) FROM ts_data LIMIT 3;
 2020-09-08T12:42:29
 2020-09-08T11:42:29
 
+# from_unixtime single
+
+query P
+SELECT from_unixtime(1599572549190855123 / 1000000000, 'America/New_York');
+----
+2020-09-08T09:42:29-04:00
+
+# from_unixtime with timezone
+query P
+SELECT from_unixtime(ts / 1000000000, 'Asia/Istanbul') FROM ts_data LIMIT 3;
+----
+2020-09-08T16:42:29+03:00
+2020-09-08T15:42:29+03:00
+2020-09-08T14:42:29+03:00
+
+# from_unixtime with utc timezone
+query P
+SELECT from_unixtime(ts / 1000000000, 'UTC') FROM ts_data LIMIT 3;
+----
+2020-09-08T13:42:29Z
+2020-09-08T12:42:29Z
+2020-09-08T11:42:29Z
+
 # to_timestamp
 
 query I
diff --git a/docs/source/user-guide/sql/scalar_functions.md 
b/docs/source/user-guide/sql/scalar_functions.md
index 98c44cbd98..b92b815d7c 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -2037,12 +2037,24 @@ _Alias of [date_trunc](#date_trunc)._
 Converts an integer to RFC3339 timestamp format 
(`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are 
interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return 
the corresponding timestamp.
 
 ```
-from_unixtime(expression)
+from_unixtime(expression[, timezone])
 ```
 
 #### Arguments
 
-- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+- **expression**: The expression to operate on. Can be a constant, column, or 
function, and any combination of operators.
+- **timezone**: Optional timezone to use when converting the integer to a 
timestamp. If not provided, the default timezone is UTC.
+
+#### Example
+
+```sql
+> select from_unixtime(1599572549, 'America/New_York');
++-----------------------------------------------------------+
+| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
++-----------------------------------------------------------+
+| 2020-09-08T09:42:29-04:00                                 |
++-----------------------------------------------------------+
+```
 
 ### `make_date`
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to