wiedld commented on code in PR #7614:
URL: https://github.com/apache/arrow-datafusion/pull/7614#discussion_r1334849463
##########
datafusion/sqllogictest/test_files/timestamps.slt:
##########
@@ -1307,3 +1341,340 @@ drop table ts_data_millis
statement ok
drop table ts_data_secs
+
+
+
+##########
+## Timezone impact on builtin scalar functions
+#
+# server time = +07
+##########
+
+statement ok
+set timezone to '+07';
+
+# postgresql: 2000-01-01 01:00:00+07
+query P
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T01:01:01') as ts
+----
+2000-01-01T01:00:00+07:00
+
+# postgresql: 2000-01-01 00:00:00+07
+query P
+SELECT date_trunc('day', TIMESTAMPTZ '2000-01-01T01:01:01') as ts
+----
+2000-01-01T00:00:00+07:00
+
+# postgresql: 2000-01-01 08:00:00+07
+query P
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T01:01:01Z') as ts
+----
+2000-01-01T08:00:00+07:00
+
+# postgresql: 2000-01-01 00:00:00+07
+query P
+SELECT date_trunc('day', TIMESTAMPTZ '2000-01-01T01:01:01Z') as ts
+----
+2000-01-01T00:00:00+07:00
+
+# postgresql: 2022-01-01 00:00:00+07
+query P
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 20:10:00', TIMESTAMPTZ
'2020-01-01')
+----
+2022-01-01T00:00:00+07:00
+
+# postgresql: 2022-01-02 00:00:00+07
+query P
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 20:10:00Z', TIMESTAMPTZ
'2020-01-01')
+----
+2022-01-02T00:00:00+07:00
+
+# postgresql: 1
+query R
+SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01') as part
+----
+1
+
+# postgresql: 8
+query R
+SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01Z') as part
+----
+8
+
+
+
+##########
+## Timezone impact on builtin scalar functions
+#
+# server time = UTC
+##########
+
+statement ok
+set timezone to '+00';
+
+# postgresql: 2000-01-01T01:00:00+00
+query P
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T01:01:01') as ts
+----
+2000-01-01T01:00:00Z
+
+# postgresql: 2000-01-01T00:00:00+00
+query P
+SELECT date_trunc('day', TIMESTAMPTZ '2000-01-01T01:01:01') as ts
+----
+2000-01-01T00:00:00Z
+
+# postgresql: 1999-12-31T18:00:00+00
+query P
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T01:01:01+07') as ts
+----
+1999-12-31T18:00:00Z
+
+# postgresql: 1999-12-31T00:00:00+00
+query P
+SELECT date_trunc('day', TIMESTAMPTZ '2000-01-01T01:01:01+07') as ts
+----
+1999-12-31T00:00:00Z
+
+# postgresql: 2022-01-01 00:00:00+00
+query P
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 20:10:00', TIMESTAMPTZ
'2020-01-01')
+----
+2022-01-01T00:00:00Z
+
+# postgresql: 2021-12-31 00:00:00+00
+query P
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 01:10:00+07', TIMESTAMPTZ
'2020-01-01')
+----
+2021-12-31T00:00:00Z
+
+# postgresql: 1
+query R
+SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01') as part
+----
+1
+
+# postgresql: 18
+query R
+SELECT date_part('hour', TIMESTAMPTZ '2000-01-01T01:01:01+07') as part
+----
+18
+
+
+
+##########
+## Timezone impact on builtin scalar functions
+#
+# irregular offsets
+##########
+
+query P rowsort
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00+00:45') as
ts_irregular_offset
+ UNION ALL
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00+00:30') as
ts_irregular_offset
+ UNION ALL
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00+00:15') as
ts_irregular_offset
+ UNION ALL
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00-00:15') as
ts_irregular_offset
+ UNION ALL
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00-00:30') as
ts_irregular_offset
+ UNION ALL
+SELECT date_trunc('hour', TIMESTAMPTZ '2000-01-01T00:00:00-00:45') as
ts_irregular_offset
+----
+1999-12-31T23:00:00Z
+1999-12-31T23:00:00Z
+1999-12-31T23:00:00Z
+2000-01-01T00:00:00Z
+2000-01-01T00:00:00Z
+2000-01-01T00:00:00Z
+
+query P rowsort
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 00:00:00+00:30', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 00:00:00+00:15', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 00:00:00-00:15', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 day', TIMESTAMPTZ '2022-01-01 00:00:00-00:30', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+----
+2021-12-31T00:00:00Z
+2021-12-31T00:00:00Z
+2022-01-01T00:00:00Z
+2022-01-01T00:00:00Z
+
+query P rowsort
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00+01:15', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00+00:45', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00+00:30', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00+00:15', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00-00:15', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00-00:30', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00-00:45', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+ UNION ALL
+SELECT date_bin('1 hour', TIMESTAMPTZ '2022-01-01 00:00:00-01:15', TIMESTAMPTZ
'2020-01-01') as ts_irregular_offset
+----
+2021-12-31T22:00:00Z
+2021-12-31T23:00:00Z
+2021-12-31T23:00:00Z
+2021-12-31T23:00:00Z
+2022-01-01T00:00:00Z
+2022-01-01T00:00:00Z
+2022-01-01T00:00:00Z
+2022-01-01T01:00:00Z
+
+
+
+##########
+## Timezone acceptance bounds
+#
+# standard formats
+##########
+
+query P
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00' as rfc3339_no_tz
+----
+2022-01-01T01:10:00Z
+
+# +00, +00:00, +0000
+# +01, +01:00, +0100
+# -01, -01:00, -0100
+query P rowsort
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00+00' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00+00:00' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00+0000' as rfc3339_offset_tz
+ UNION ALL
+ SELECT TIMESTAMPTZ '2022-01-01 01:10:00+01' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00+01:00' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00+0100' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00-01' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00-01:00' as rfc3339_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00-0100' as rfc3339_offset_tz
+----
+2022-01-01T00:10:00Z
+2022-01-01T00:10:00Z
+2022-01-01T00:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T02:10:00Z
+2022-01-01T02:10:00Z
+2022-01-01T02:10:00Z
+
+query P
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00' as iso8601_no_tz
+----
+2022-01-01T01:10:00Z
+
+# +00, +00:00, +0000
+# +01, +01:00, +0100
+# -01, -01:00, -0100
+query P rowsort
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+00' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+00:00' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+0000' as iso8601_offset_tz
+ UNION ALL
+ SELECT TIMESTAMPTZ '2022-01-01T01:10:00+01' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+01:00' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+0100' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00-01' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00-01:00' as iso8601_offset_tz
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00-0100' as iso8601_offset_tz
+----
+2022-01-01T00:10:00Z
+2022-01-01T00:10:00Z
+2022-01-01T00:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T01:10:00Z
+2022-01-01T02:10:00Z
+2022-01-01T02:10:00Z
+2022-01-01T02:10:00Z
+
+statement error
+SELECT TIMESTAMPTZ '2023‐W38‐5' as iso8601_week_designation
+
+statement error
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+Foo' as bad_tz
+
+statement error
+SELECT TIMESTAMPTZ '2022-01-01T01:10:00+42:00' as bad_tz
+
+query P rowsort
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 GMT' as ts_gmt
+----
+2022-01-01T01:10:00Z
+
+statement error
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 GMT-1' as ts_gmt_offset
+
+# will not accept non-GMT geo abv
+# postgresql: accepts
+statement error
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 AEST'
+
+# ok to use geo longform
+query P rowsort
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 Australia/Sydney' as ts_geo
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 Antarctica/Vostok' as ts_geo
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 Africa/Johannesburg' as ts_geo
+ UNION ALL
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 America/Los_Angeles' as ts_geo
+----
+2021-12-31T14:10:00Z
+2021-12-31T19:10:00Z
+2021-12-31T23:10:00Z
+2022-01-01T09:10:00Z
+
+# geo longform timezones need whitespace converted to underscore
+statement error
+SELECT TIMESTAMPTZ '2022-01-01 01:10:00 America/Los Angeles' as ts_geo
+
+statement error
+SELECT TIMESTAMPTZ 'Sat, 1 Jan 2022 01:10:00 GMT' as rfc1123
+
+
+
+##########
+## Timezone acceptance bounds
+#
+# daylight savings
+##########
+
+# will not accept daylight savings designations as geo abv (because not
accepting geo abv)
+# postgresql: accepts
+statement error
+SELECT TIMESTAMPTZ '2023-03-12 02:00:00 EDT'
+
+# ok to use geo longform
+query P
+SELECT TIMESTAMPTZ '2023-03-11 02:00:00 America/Los_Angeles' as ts_geo
+----
+2023-03-11T10:00:00Z
+
+# will error if provide geo longform with time not possible due to daylight
savings
+# Arrow error: Parser error: Error parsing timestamp from '2023-03-12 02:00:00
America/Los_Angeles': error computing timezone offset
Review Comment:
Daylight savings is applied in postgresql via (at minimum) two ways:
* the non-GMT abbreviations (which the parse does not accepted)
* in the geo longform
Since we do support the geo longform, but selectively error for an invalid
time (due to daylight savings) -- is this something we wish to change?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]