This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new d19e9d684b fix(5975/5976): timezone handling for timestamps and 
`date_trunc`, `date_part` and `date_bin` (#7614)
d19e9d684b is described below

commit d19e9d684bbe1fd820674d48a96795bfbea9db7d
Author: wiedld <[email protected]>
AuthorDate: Sat Sep 23 03:42:48 2023 -0700

    fix(5975/5976): timezone handling for timestamps and `date_trunc`, 
`date_part` and `date_bin` (#7614)
    
    * test: enforce timestamptz contract
    
    * test(5975/5976): demonstrate what logical plan casting must occur with 
datetime scalar functions.
    
      * These test cases also document how our scalar functions are currently 
not correct.
    
      * Extra comments documenting the logical plan will be removed on test 
cleanup (after code fixes).
    
    * fix(5975/5976): enable type coercion to include specific timezones
    
      * Prior to this change, the outcome was always coerced to 
Timestamp(Nanoseconds, None)
    and the tz was dropped.
    
    * fix(5975/5976): have date_trunc use DateTime<Tz>, instead of NaiveDateTime
    
    * chore(5975/5976): test cleanup -- consolidate into the single timestamps 
test file
    
    * fix(5975/5976): enable all valid timezones to be supported in type 
coercion
    
    * chore: update cargo.lock in datafusion-cli
    
    * test(5975/5976): tests to document the bounds of timezone acceptance
    
    * test(5975/5976): document irregular offsets and daylight savings time
    
    * refactor(5975/5976): do not parse timezone string during type coersion, 
should have already failed in parser if invalid
    
    * chore: properly abbreviate abbreviations
    
    * fix(5975/5976): apply tz string parsing per batch.
    
      * move parsing up to date_trunc() to apply per batch, not per value.
      * do not infer a default UTC timezone for missing tz. Instead use the 
appropriate type for with, or without, tz.
---
 datafusion-cli/Cargo.lock                          |  36 +-
 datafusion/core/tests/sql/expr.rs                  |   8 +
 datafusion/expr/Cargo.toml                         |   1 +
 datafusion/expr/src/built_in_function.rs           |  32 +-
 datafusion/expr/src/type_coercion/functions.rs     | 135 +++++---
 .../physical-expr/src/datetime_expressions.rs      | 166 +++++++--
 datafusion/sqllogictest/test_files/timestamps.slt  | 371 +++++++++++++++++++++
 7 files changed, 648 insertions(+), 101 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 0cf511e566..ba733726c3 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -32,9 +32,9 @@ dependencies = [
 
 [[package]]
 name = "aho-corasick"
-version = "1.1.0"
+version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "0f2135563fb5c609d2b2b87c1e8ce7bc41b0b45430fa9661f457981503dd5bf0"
+checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab"
 dependencies = [
  "memchr",
 ]
@@ -711,16 +711,15 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.4.1"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5"
+checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87"
 dependencies = [
  "arrayref",
  "arrayvec",
  "cc",
  "cfg-if",
  "constant_time_eq",
- "digest",
 ]
 
 [[package]]
@@ -1156,6 +1155,7 @@ version = "31.0.0"
 dependencies = [
  "ahash",
  "arrow",
+ "arrow-array",
  "datafusion-common",
  "sqlparser",
  "strum 0.25.0",
@@ -1659,9 +1659,9 @@ dependencies = [
 
 [[package]]
 name = "hermit-abi"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
+checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
 
 [[package]]
 name = "hex"
@@ -2191,7 +2191,7 @@ version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
 dependencies = [
- "hermit-abi 0.3.2",
+ "hermit-abi 0.3.3",
  "libc",
 ]
 
@@ -2723,9 +2723,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.13"
+version = "0.38.14"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "d7db8590df6dfcd144d22afd1b83b36c21a18d7cbc1dc4bb5295a8712e9eb662"
+checksum = "747c788e9ce8e92b12cd485c49ddf90723550b654b32508f979b71a7b1ecda4f"
 dependencies = [
  "bitflags 2.4.0",
  "errno",
@@ -2964,9 +2964,9 @@ dependencies = [
 
 [[package]]
 name = "smallvec"
-version = "1.11.0"
+version = "1.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
+checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a"
 
 [[package]]
 name = "snafu"
@@ -3309,9 +3309,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-util"
-version = "0.7.8"
+version = "0.7.9"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d"
+checksum = "1d68074620f57a0b21594d9735eb2e98ab38b17f80d3fcb189fca266771ca60d"
 dependencies = [
  "bytes",
  "futures-core",
@@ -3433,9 +3433,9 @@ checksum = 
"1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
 
 [[package]]
 name = "unicode-width"
-version = "0.1.10"
+version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
+checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
 
 [[package]]
 name = "untrusted"
@@ -3644,9 +3644,9 @@ checksum = 
"ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
 [[package]]
 name = "winapi-util"
-version = "0.1.5"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
 dependencies = [
  "winapi",
 ]
diff --git a/datafusion/core/tests/sql/expr.rs 
b/datafusion/core/tests/sql/expr.rs
index 2cc61b1fdd..044b3b57ea 100644
--- a/datafusion/core/tests/sql/expr.rs
+++ b/datafusion/core/tests/sql/expr.rs
@@ -128,6 +128,14 @@ async fn test_encoding_expressions() -> Result<()> {
     Ok(())
 }
 
+#[should_panic(expected = "Invalid timezone \\\"Foo\\\": 'Foo' is not a valid 
timezone")]
+#[tokio::test]
+async fn test_array_cast_invalid_timezone_will_panic() {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_cast('2021-01-02T03:04:00', 'Timestamp(Nanosecond, 
Some(\"Foo\"))')";
+    execute(&ctx, sql).await;
+}
+
 #[tokio::test]
 #[cfg_attr(not(feature = "crypto_expressions"), ignore)]
 async fn test_crypto_expressions() -> Result<()> {
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index f6d8e15fdd..4d69ce7475 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -37,6 +37,7 @@ path = "src/lib.rs"
 [dependencies]
 ahash = { version = "0.8", default-features = false, features = 
["runtime-rng"] }
 arrow = { workspace = true }
+arrow-array = { workspace = true }
 datafusion-common = { path = "../common", version = "31.0.0", default-features 
= false }
 sqlparser = { workspace = true }
 strum = { version = "0.25.0", features = ["derive"] }
diff --git a/datafusion/expr/src/built_in_function.rs 
b/datafusion/expr/src/built_in_function.rs
index 3f1eb581aa..a429634956 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -1020,9 +1020,13 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::DateTrunc => Signature::one_of(
                 vec![
                     Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
+                    Exact(vec![Utf8, Timestamp(Nanosecond, 
Some("+TZ".into()))]),
                     Exact(vec![Utf8, Timestamp(Microsecond, None)]),
+                    Exact(vec![Utf8, Timestamp(Microsecond, 
Some("+TZ".into()))]),
                     Exact(vec![Utf8, Timestamp(Millisecond, None)]),
+                    Exact(vec![Utf8, Timestamp(Millisecond, 
Some("+TZ".into()))]),
                     Exact(vec![Utf8, Timestamp(Second, None)]),
+                    Exact(vec![Utf8, Timestamp(Second, Some("+TZ".into()))]),
                 ],
                 self.volatility(),
             ),
@@ -1034,16 +1038,37 @@ impl BuiltinScalarFunction {
                             Timestamp(array_type.clone(), None),
                             Timestamp(Nanosecond, None),
                         ]),
+                        Exact(vec![
+                            Interval(MonthDayNano),
+                            Timestamp(array_type.clone(), Some("+TZ".into())),
+                            Timestamp(Nanosecond, Some("+TZ".into())),
+                        ]),
                         Exact(vec![
                             Interval(DayTime),
                             Timestamp(array_type.clone(), None),
                             Timestamp(Nanosecond, None),
                         ]),
+                        Exact(vec![
+                            Interval(DayTime),
+                            Timestamp(array_type.clone(), Some("+TZ".into())),
+                            Timestamp(Nanosecond, Some("+TZ".into())),
+                        ]),
+                        Exact(vec![
+                            Interval(MonthDayNano),
+                            Timestamp(array_type.clone(), None),
+                        ]),
                         Exact(vec![
                             Interval(MonthDayNano),
+                            Timestamp(array_type.clone(), Some("+TZ".into())),
+                        ]),
+                        Exact(vec![
+                            Interval(DayTime),
                             Timestamp(array_type.clone(), None),
                         ]),
-                        Exact(vec![Interval(DayTime), Timestamp(array_type, 
None)]),
+                        Exact(vec![
+                            Interval(DayTime),
+                            Timestamp(array_type, Some("+TZ".into())),
+                        ]),
                     ]
                 };
 
@@ -1060,10 +1085,13 @@ impl BuiltinScalarFunction {
                     Exact(vec![Utf8, Date32]),
                     Exact(vec![Utf8, Date64]),
                     Exact(vec![Utf8, Timestamp(Second, None)]),
+                    Exact(vec![Utf8, Timestamp(Second, Some("+TZ".into()))]),
                     Exact(vec![Utf8, Timestamp(Microsecond, None)]),
+                    Exact(vec![Utf8, Timestamp(Microsecond, 
Some("+TZ".into()))]),
                     Exact(vec![Utf8, Timestamp(Millisecond, None)]),
+                    Exact(vec![Utf8, Timestamp(Millisecond, 
Some("+TZ".into()))]),
                     Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
-                    Exact(vec![Utf8, Timestamp(Nanosecond, 
Some("+00:00".into()))]),
+                    Exact(vec![Utf8, Timestamp(Nanosecond, 
Some("+TZ".into()))]),
                 ],
                 self.volatility(),
             ),
diff --git a/datafusion/expr/src/type_coercion/functions.rs 
b/datafusion/expr/src/type_coercion/functions.rs
index 371a3950ac..883ca2b393 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -120,7 +120,7 @@ fn maybe_data_types(
             new_type.push(current_type.clone())
         } else {
             // attempt to coerce
-            if can_coerce_from(valid_type, current_type) {
+            if let Some(valid_type) = coerced_from(valid_type, current_type) {
                 new_type.push(valid_type.clone())
             } else {
                 // not possible
@@ -136,62 +136,97 @@ fn maybe_data_types(
 ///
 /// See the module level documentation for more detail on coercion.
 pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
-    use self::DataType::*;
-
     if type_into == type_from {
         return true;
     }
-    // Null can convert to most of types
+    if let Some(coerced) = coerced_from(type_into, type_from) {
+        return coerced == type_into;
+    }
+    false
+}
+
+fn coerced_from<'a>(
+    type_into: &'a DataType,
+    type_from: &'a DataType,
+) -> Option<&'a DataType> {
+    use self::DataType::*;
+
     match type_into {
-        Int8 => matches!(type_from, Null | Int8),
-        Int16 => matches!(type_from, Null | Int8 | Int16 | UInt8),
-        Int32 => matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | 
UInt16),
-        Int64 => matches!(
-            type_from,
-            Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
-        ),
-        UInt8 => matches!(type_from, Null | UInt8),
-        UInt16 => matches!(type_from, Null | UInt8 | UInt16),
-        UInt32 => matches!(type_from, Null | UInt8 | UInt16 | UInt32),
-        UInt64 => matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64),
-        Float32 => matches!(
-            type_from,
-            Null | Int8
-                | Int16
-                | Int32
-                | Int64
-                | UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Float32
-        ),
-        Float64 => matches!(
-            type_from,
-            Null | Int8
-                | Int16
-                | Int32
-                | Int64
-                | UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Float32
-                | Float64
-                | Decimal128(_, _)
-        ),
-        Timestamp(TimeUnit::Nanosecond, _) => {
-            matches!(
+        // coerced into type_into
+        Int8 if matches!(type_from, Null | Int8) => Some(type_into),
+        Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) => 
Some(type_into),
+        Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | 
UInt16) => {
+            Some(type_into)
+        }
+        Int64
+            if matches!(
                 type_from,
-                Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
-            )
+                Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
+            ) =>
+        {
+            Some(type_into)
         }
-        Interval(_) => {
-            matches!(type_from, Utf8 | LargeUtf8)
+        UInt8 if matches!(type_from, Null | UInt8) => Some(type_into),
+        UInt16 if matches!(type_from, Null | UInt8 | UInt16) => 
Some(type_into),
+        UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => 
Some(type_into),
+        UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64) 
=> {
+            Some(type_into)
         }
-        Utf8 | LargeUtf8 => true,
-        Null => can_cast_types(type_from, type_into),
-        _ => false,
+        Float32
+            if matches!(
+                type_from,
+                Null | Int8
+                    | Int16
+                    | Int32
+                    | Int64
+                    | UInt8
+                    | UInt16
+                    | UInt32
+                    | UInt64
+                    | Float32
+            ) =>
+        {
+            Some(type_into)
+        }
+        Float64
+            if matches!(
+                type_from,
+                Null | Int8
+                    | Int16
+                    | Int32
+                    | Int64
+                    | UInt8
+                    | UInt16
+                    | UInt32
+                    | UInt64
+                    | Float32
+                    | Float64
+                    | Decimal128(_, _)
+            ) =>
+        {
+            Some(type_into)
+        }
+        Timestamp(TimeUnit::Nanosecond, None)
+            if matches!(
+                type_from,
+                Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8
+            ) =>
+        {
+            Some(type_into)
+        }
+        Interval(_) if matches!(type_from, Utf8 | LargeUtf8) => 
Some(type_into),
+        Utf8 | LargeUtf8 => Some(type_into),
+        Null if can_cast_types(type_from, type_into) => Some(type_into),
+
+        // Coerce to consistent timezones, if the `type_from` timezone exists.
+        Timestamp(TimeUnit::Nanosecond, Some(_))
+            if matches!(type_from, Timestamp(TimeUnit::Nanosecond, Some(_))) =>
+        {
+            Some(type_from)
+        }
+
+        // cannot coerce
+        _ => None,
     }
 }
 
diff --git a/datafusion/physical-expr/src/datetime_expressions.rs 
b/datafusion/physical-expr/src/datetime_expressions.rs
index 63f7645fea..263b46192e 100644
--- a/datafusion/physical-expr/src/datetime_expressions.rs
+++ b/datafusion/physical-expr/src/datetime_expressions.rs
@@ -20,8 +20,10 @@
 use arrow::array::Float64Builder;
 use arrow::compute::cast;
 use arrow::{
-    array::TimestampNanosecondArray, compute::kernels::temporal, 
datatypes::TimeUnit,
-    temporal_conversions::timestamp_ns_to_datetime,
+    array::TimestampNanosecondArray,
+    compute::kernels::temporal,
+    datatypes::TimeUnit,
+    temporal_conversions::{as_datetime_with_timezone, 
timestamp_ns_to_datetime},
 };
 use arrow::{
     array::{Array, ArrayRef, Float64Array, OffsetSizeTrait, PrimitiveArray},
@@ -33,7 +35,8 @@ use arrow::{
     },
 };
 use arrow_array::{
-    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampSecondArray,
+    timezone::Tz, TimestampMicrosecondArray, TimestampMillisecondArray,
+    TimestampSecondArray,
 };
 use chrono::prelude::*;
 use chrono::{Duration, Months, NaiveDate};
@@ -47,6 +50,7 @@ use datafusion_common::{
     ScalarValue,
 };
 use datafusion_expr::ColumnarValue;
+use std::str::FromStr;
 use std::sync::Arc;
 
 /// given a function `op` that maps a `&str` to a Result of an arrow native 
type,
@@ -208,23 +212,20 @@ pub fn make_current_time(
     move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano)))
 }
 
-fn quarter_month(date: &NaiveDateTime) -> u32 {
+fn quarter_month<T>(date: &T) -> u32
+where
+    T: chrono::Datelike,
+{
     1 + 3 * ((date.month() - 1) / 3)
 }
 
-/// Tuncates the single `value`, expressed in nanoseconds since the
-/// epoch, for granularities greater than 1 second, in taking into
-/// account that some granularities are not uniform durations of time
-/// (e.g. months are not always the same lengths, leap seconds, etc)
-fn date_trunc_coarse(granularity: &str, value: i64) -> Result<i64> {
-    // Use chrono NaiveDateTime to clear the various fields
-    // correctly accounting for non uniform granularities
-    let value = timestamp_ns_to_datetime(value).ok_or_else(|| {
-        DataFusionError::Execution(format!("Timestamp {value} out of range"))
-    })?;
-
-    let value = Some(value);
-
+fn _date_trunc_coarse<T>(granularity: &str, value: Option<T>) -> 
Result<Option<T>>
+where
+    T: chrono::Datelike
+        + chrono::Timelike
+        + std::ops::Sub<chrono::Duration, Output = T>
+        + std::marker::Copy,
+{
     let value = match granularity {
         "millisecond" => value,
         "microsecond" => value,
@@ -271,7 +272,49 @@ fn date_trunc_coarse(granularity: &str, value: i64) -> 
Result<i64> {
             return exec_err!("Unsupported date_trunc granularity: 
{unsupported}");
         }
     };
-    let value = value.and_then(|value| value.timestamp_nanos_opt());
+    Ok(value)
+}
+
+fn _date_trunc_coarse_with_tz(
+    granularity: &str,
+    value: Option<DateTime<Tz>>,
+) -> Result<Option<i64>> {
+    let value = _date_trunc_coarse::<DateTime<Tz>>(granularity, value)?;
+    Ok(value.and_then(|value| value.timestamp_nanos_opt()))
+}
+
+fn _date_trunc_coarse_without_tz(
+    granularity: &str,
+    value: Option<NaiveDateTime>,
+) -> Result<Option<i64>> {
+    let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
+    Ok(value.and_then(|value| value.timestamp_nanos_opt()))
+}
+
+/// Tuncates the single `value`, expressed in nanoseconds since the
+/// epoch, for granularities greater than 1 second, in taking into
+/// account that some granularities are not uniform durations of time
+/// (e.g. months are not always the same lengths, leap seconds, etc)
+fn date_trunc_coarse(granularity: &str, value: i64, tz: Arc<Option<Tz>>) -> 
Result<i64> {
+    let value = match tz.as_ref() {
+        Some(tz) => {
+            // Use chrono DateTime<Tz> to clear the various fields because 
need to clear per timezone,
+            // and NaiveDateTime (ISO 8601) has no concept of timezones
+            let value = 
as_datetime_with_timezone::<TimestampNanosecondType>(value, *tz)
+                .ok_or(DataFusionError::Execution(format!(
+                    "Timestamp {value} out of range"
+                )))?;
+            _date_trunc_coarse_with_tz(granularity, Some(value))
+        }
+        None => {
+            // Use chrono NaiveDateTime to clear the various fields, if we 
don't have a timezone.
+            let value = timestamp_ns_to_datetime(value).ok_or_else(|| {
+                DataFusionError::Execution(format!("Timestamp {value} out of 
range"))
+            })?;
+            _date_trunc_coarse_without_tz(granularity, Some(value))
+        }
+    }?;
+
     // `with_x(0)` are infallible because `0` are always a valid
     Ok(value.unwrap())
 }
@@ -280,6 +323,7 @@ fn date_trunc_coarse(granularity: &str, value: i64) -> 
Result<i64> {
 fn _date_trunc(
     tu: TimeUnit,
     value: &Option<i64>,
+    tz: Arc<Option<Tz>>,
     granularity: &str,
 ) -> Result<Option<i64>, DataFusionError> {
     let scale = match tu {
@@ -294,7 +338,7 @@ fn _date_trunc(
     };
 
     // convert to nanoseconds
-    let nano = date_trunc_coarse(granularity, scale * value)?;
+    let nano = date_trunc_coarse(granularity, scale * value, tz)?;
 
     let result = match tu {
         TimeUnit::Second => match granularity {
@@ -323,6 +367,16 @@ fn _date_trunc(
     Ok(result)
 }
 
+fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
+    tz.as_ref()
+        .map(|tz| {
+            Tz::from_str(tz).map_err(|op| {
+                DataFusionError::Execution(format!("failed on timezone {tz}: 
{:?}", op))
+            })
+        })
+        .transpose()
+}
+
 /// date_trunc SQL function
 pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     let (granularity, array) = (&args[0], &args[1]);
@@ -336,62 +390,112 @@ pub fn date_trunc(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
 
     Ok(match array {
         ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
-            let value = _date_trunc(TimeUnit::Nanosecond, v, 
granularity.as_str())?;
+            let parsed_tz = parse_tz(tz_opt)?;
+            let value = _date_trunc(
+                TimeUnit::Nanosecond,
+                v,
+                Arc::new(parsed_tz),
+                granularity.as_str(),
+            )?;
             let value = ScalarValue::TimestampNanosecond(value, 
tz_opt.clone());
             ColumnarValue::Scalar(value)
         }
         ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => 
{
-            let value = _date_trunc(TimeUnit::Microsecond, v, 
granularity.as_str())?;
+            let parsed_tz = parse_tz(tz_opt)?;
+            let value = _date_trunc(
+                TimeUnit::Microsecond,
+                v,
+                Arc::new(parsed_tz),
+                granularity.as_str(),
+            )?;
             let value = ScalarValue::TimestampMicrosecond(value, 
tz_opt.clone());
             ColumnarValue::Scalar(value)
         }
         ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => 
{
-            let value = _date_trunc(TimeUnit::Millisecond, v, 
granularity.as_str())?;
+            let parsed_tz = parse_tz(tz_opt)?;
+            let value = _date_trunc(
+                TimeUnit::Millisecond,
+                v,
+                Arc::new(parsed_tz),
+                granularity.as_str(),
+            )?;
             let value = ScalarValue::TimestampMillisecond(value, 
tz_opt.clone());
             ColumnarValue::Scalar(value)
         }
         ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
-            let value = _date_trunc(TimeUnit::Second, v, 
granularity.as_str())?;
+            let parsed_tz = parse_tz(tz_opt)?;
+            let value = _date_trunc(
+                TimeUnit::Second,
+                v,
+                Arc::new(parsed_tz),
+                granularity.as_str(),
+            )?;
             let value = ScalarValue::TimestampSecond(value, tz_opt.clone());
             ColumnarValue::Scalar(value)
         }
         ColumnarValue::Array(array) => {
             let array_type = array.data_type();
             match array_type {
-                DataType::Timestamp(TimeUnit::Second, _) => {
+                DataType::Timestamp(TimeUnit::Second, tz_opt) => {
+                    let parsed_tz = Arc::new(parse_tz(tz_opt)?);
                     let array = as_timestamp_second_array(array)?;
                     let array = array
                         .iter()
-                        .map(|x| _date_trunc(TimeUnit::Second, &x, 
granularity.as_str()))
+                        .map(|x| {
+                            _date_trunc(
+                                TimeUnit::Second,
+                                &x,
+                                parsed_tz.clone(),
+                                granularity.as_str(),
+                            )
+                        })
                         .collect::<Result<TimestampSecondArray>>()?;
                     ColumnarValue::Array(Arc::new(array))
                 }
-                DataType::Timestamp(TimeUnit::Millisecond, _) => {
+                DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => {
+                    let parsed_tz = Arc::new(parse_tz(tz_opt)?);
                     let array = as_timestamp_millisecond_array(array)?;
                     let array = array
                         .iter()
                         .map(|x| {
-                            _date_trunc(TimeUnit::Millisecond, &x, 
granularity.as_str())
+                            _date_trunc(
+                                TimeUnit::Millisecond,
+                                &x,
+                                parsed_tz.clone(),
+                                granularity.as_str(),
+                            )
                         })
                         .collect::<Result<TimestampMillisecondArray>>()?;
                     ColumnarValue::Array(Arc::new(array))
                 }
-                DataType::Timestamp(TimeUnit::Microsecond, _) => {
+                DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => {
+                    let parsed_tz = Arc::new(parse_tz(tz_opt)?);
                     let array = as_timestamp_microsecond_array(array)?;
                     let array = array
                         .iter()
                         .map(|x| {
-                            _date_trunc(TimeUnit::Microsecond, &x, 
granularity.as_str())
+                            _date_trunc(
+                                TimeUnit::Microsecond,
+                                &x,
+                                parsed_tz.clone(),
+                                granularity.as_str(),
+                            )
                         })
                         .collect::<Result<TimestampMicrosecondArray>>()?;
                     ColumnarValue::Array(Arc::new(array))
                 }
                 _ => {
+                    let parsed_tz = Arc::new(None);
                     let array = as_timestamp_nanosecond_array(array)?;
                     let array = array
                         .iter()
                         .map(|x| {
-                            _date_trunc(TimeUnit::Nanosecond, &x, 
granularity.as_str())
+                            _date_trunc(
+                                TimeUnit::Nanosecond,
+                                &x,
+                                parsed_tz.clone(),
+                                granularity.as_str(),
+                            )
                         })
                         .collect::<Result<TimestampNanosecondArray>>()?;
 
@@ -958,7 +1062,7 @@ mod tests {
         cases.iter().for_each(|(original, granularity, expected)| {
             let left = string_to_timestamp_nanos(original).unwrap();
             let right = string_to_timestamp_nanos(expected).unwrap();
-            let result = date_trunc_coarse(granularity, left).unwrap();
+            let result = date_trunc_coarse(granularity, left, 
Arc::new(None)).unwrap();
             assert_eq!(result, right, "{original} = {expected}");
         });
     }
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt 
b/datafusion/sqllogictest/test_files/timestamps.slt
index abb74b468e..88a024e0f9 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -100,6 +100,40 @@ select * from foo where ts != '2000-02-01T00:00:00';
 statement ok
 drop table foo;
 
+
+##########
+## Timezone Handling Tests
+##########
+
+statement ok
+SET TIME ZONE = '+08'
+
+# should use execution timezone
+query P
+SELECT TIMESTAMPTZ '2000-01-01T01:01:01'
+----
+2000-01-01T01:01:01+08:00
+
+# casts return timezone to use execution timezone (same as postgresql)
+query P
+SELECT TIMESTAMPTZ '2000-01-01T01:01:01+07:00'
+----
+2000-01-01T02:01:01+08:00
+
+query P
+SELECT TIMESTAMPTZ '2000-01-01T01:01:01Z'
+----
+2000-01-01T09:01:01+08:00
+
+statement ok
+SET TIME ZONE = '+00'
+
+query P
+SELECT TIMESTAMPTZ '2000-01-01T01:01:01'
+----
+2000-01-01T01:01:01Z
+
+
 ##########
 ## to_timestamp tests
 ##########
@@ -1307,3 +1341,340 @@ drop table ts_data_millis
 
 statement ok
 drop table ts_data_secs
+
+
+
+##########
+## Timezone impact on builtin scalar functions
+#
+# server time = +07
+##########
+
+statement ok
+set timezone to '+07';
+
+# postgresql: 2000-01-01 01:00:00+07
+query P
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T01:01:01') as ts
+----
+2000-01-01T01:00:00+07:00
+
+# postgresql: 2000-01-01 00:00:00+07
+query P
+SELECT date_trunc('day', TIMESTAMPTZ '2000-01-01T01:01:01') as ts
+----
+2000-01-01T00:00:00+07:00
+
+# postgresql: 2000-01-01 08:00:00+07
+query P
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T01:01:01Z') as ts
+----
+2000-01-01T08:00:00+07:00
+
+# postgresql: 2000-01-01 00:00:00+07
+query P
+SELECT date_trunc('day', TIMESTAMPTZ '2000-01-01T01:01:01Z') as ts
+----
+2000-01-01T00:00:00+07:00
+
+# postgresql:  2022-01-01 00:00:00+07
+query P
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 20:10:00', TIMESTAMPTZ 
'2020-01-01')
+----
+2022-01-01T00:00:00+07:00
+
+# postgresql: 2022-01-02 00:00:00+07
+query P
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 20:10:00Z', TIMESTAMPTZ 
'2020-01-01')
+----
+2022-01-02T00:00:00+07:00
+
+# postgresql: 1
+query R
+SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01') as part
+----
+1
+
+# postgresql: 8
+query R
+SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01Z') as part
+----
+8
+
+
+
+##########
+## Timezone impact on builtin scalar functions
+#
+# server time = UTC
+##########
+
+statement ok
+set timezone to '+00';
+
+# postgresql: 2000-01-01T01:00:00+00
+query P
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T01:01:01') as ts
+----
+2000-01-01T01:00:00Z
+
+# postgresql: 2000-01-01T00:00:00+00
+query P
+SELECT date_trunc('day', TIMESTAMPTZ '2000-01-01T01:01:01') as ts
+----
+2000-01-01T00:00:00Z
+
+# postgresql: 1999-12-31T18:00:00+00
+query P
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T01:01:01+07') as ts
+----
+1999-12-31T18:00:00Z
+
+# postgresql: 1999-12-31T00:00:00+00
+query P
+SELECT date_trunc('day', TIMESTAMPTZ '2000-01-01T01:01:01+07') as ts
+----
+1999-12-31T00:00:00Z
+
+# postgresql: 2022-01-01 00:00:00+00
+query P
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 20:10:00', TIMESTAMPTZ 
'2020-01-01')
+----
+2022-01-01T00:00:00Z
+
+# postgresql: 2021-12-31 00:00:00+00
+query P
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 01:10:00+07', TIMESTAMPTZ 
'2020-01-01')
+----
+2021-12-31T00:00:00Z
+
+# postgresql: 1
+query R
+SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01') as part
+----
+1
+
+# postgresql: 18
+query R
+SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01+07') as part
+----
+18
+
+
+
+##########
+## Timezone impact on builtin scalar functions
+#
+# irregular offsets
+##########
+
+query P rowsort
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00+00:45') as 
ts_irregular_offset
+ UNION ALL
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00+00:30') as 
ts_irregular_offset
+ UNION ALL
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00+00:15') as 
ts_irregular_offset
+ UNION ALL
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00-00:15') as 
ts_irregular_offset
+ UNION ALL
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00-00:30') as 
ts_irregular_offset
+ UNION ALL
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00-00:45') as 
ts_irregular_offset
+----
+1999-12-31T23:00:00Z
+1999-12-31T23:00:00Z
+1999-12-31T23:00:00Z
+2000-01-01T00:00:00Z
+2000-01-01T00:00:00Z
+2000-01-01T00:00:00Z
+
+query P rowsort
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 00:00:00+00:30', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 00:00:00+00:15', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 00:00:00-00:15', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 00:00:00-00:30', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+----
+2021-12-31T00:00:00Z
+2021-12-31T00:00:00Z
+2022-01-01T00:00:00Z
+2022-01-01T00:00:00Z
+
+query P rowsort
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00+01:15', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00+00:45', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00+00:30', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00+00:15', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00-00:15', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00-00:30', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00-00:45', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00-01:15', TIMESTAMPTZ 
'2020-01-01') as ts_irregular_offset
+----
+2021-12-31T22:00:00Z
+2021-12-31T23:00:00Z
+2021-12-31T23:00:00Z
+2021-12-31T23:00:00Z
+2022-01-01T00:00:00Z
+2022-01-01T00:00:00Z
+2022-01-01T00:00:00Z
+2022-01-01T01:00:00Z
+
+
+
+##########
+## Timezone acceptance bounds
+#
+# standard formats
+##########
+
+query P
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00' as rfc3339_no_tz
+----
+2022-01-01T01:10:00Z
+
+# +00, +00:00, +0000
+# +01, +01:00, +0100
+# -01, -01:00, -0100
+query P rowsort
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00+00' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00+00:00' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00+0000' as rfc3339_offset_tz
+ UNION ALL
+ SELECT TIMESTAMPTZ '2022-01-01 01:10:00+01' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00+01:00' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00+0100' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00-01' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00-01:00' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00-0100' as rfc3339_offset_tz
+----
+2022-01-01T00:10:00Z
+2022-01-01T00:10:00Z
+2022-01-01T00:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T02:10:00Z
+2022-01-01T02:10:00Z
+2022-01-01T02:10:00Z
+
+query P
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00' as iso8601_no_tz
+----
+2022-01-01T01:10:00Z
+
+# +00, +00:00, +0000
+# +01, +01:00, +0100
+# -01, -01:00, -0100
+query P rowsort
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+00' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+00:00' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+0000' as iso8601_offset_tz
+ UNION ALL
+ SELECT TIMESTAMPTZ '2022-01-01T01:10:00+01' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+01:00' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+0100' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00-01' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00-01:00' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00-0100' as iso8601_offset_tz
+----
+2022-01-01T00:10:00Z
+2022-01-01T00:10:00Z
+2022-01-01T00:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T02:10:00Z
+2022-01-01T02:10:00Z
+2022-01-01T02:10:00Z
+
+statement error
+SELECT TIMESTAMPTZ '2023‐W38‐5' as iso8601_week_designation
+
+statement error
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+Foo' as bad_tz
+
+statement error
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+42:00' as bad_tz
+
+query P rowsort
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 GMT' as ts_gmt
+----
+2022-01-01T01:10:00Z
+
+statement error
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 GMT-1' as ts_gmt_offset
+
+# will not accept non-GMT geo abbr
+# postgresql: accepts
+statement error
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 AEST'
+
+# ok to use geo longform
+query P rowsort
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 Australia/Sydney' as ts_geo
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 Antarctica/Vostok' as ts_geo
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 Africa/Johannesburg' as ts_geo
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 America/Los_Angeles' as ts_geo
+----
+2021-12-31T14:10:00Z
+2021-12-31T19:10:00Z
+2021-12-31T23:10:00Z
+2022-01-01T09:10:00Z
+
+# geo longform timezones need whitespace converted to underscore
+statement error
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 America/Los Angeles' as ts_geo
+
+statement error
+SELECT TIMESTAMPTZ 'Sat, 1 Jan 2022 01:10:00 GMT' as rfc1123
+
+
+
+##########
+## Timezone acceptance bounds
+#
+# daylight savings
+##########
+
+# will not accept daylight savings designations as geo abbr (because not 
accepting geo abbr)
+# postgresql: accepts
+statement error
+SELECT TIMESTAMPTZ '2023-03-12 02:00:00 EDT'
+
+# ok to use geo longform
+query P
+SELECT TIMESTAMPTZ '2023-03-11 02:00:00 America/Los_Angeles' as ts_geo
+----
+2023-03-11T10:00:00Z
+
+# will error if provide geo longform with time not possible due to daylight 
savings
+# Arrow error: Parser error: Error parsing timestamp from '2023-03-12 02:00:00 
America/Los_Angeles': error computing timezone offset
+# postgresql: accepts
+statement error
+SELECT TIMESTAMPTZ '2023-03-12 02:00:00 America/Los_Angeles' as ts_geo


Reply via email to