stuartcarnie commented on code in PR #5982:
URL: https://github.com/apache/arrow-datafusion/pull/5982#discussion_r1168007392
##########
datafusion/core/tests/sqllogictests/test_files/timestamps.slt:
##########
Review Comment:
Great test coverage and comments, thanks @NGA-TRAN 💯
##########
datafusion/core/tests/sqllogictests/test_files/timestamps.slt:
##########
@@ -500,6 +500,246 @@ FROM (
(TIMESTAMP '2021-06-10 17:19:10Z', TIMESTAMP '2001-01-01T00:00:00Z', 0.3)
) as t (time, origin, val)
+
+# month interval with INTERVAL keyword in date_bin with default start time
+query P
+select date_bin(INTERVAL '1 month', column1)
+from (values
+ (timestamp '2022-01-01 00:00:00'),
+ (timestamp '2022-01-01 01:00:00'),
+ (timestamp '2022-01-02 00:00:00'),
+ (timestamp '2022-02-02 00:00:00'),
+ (timestamp '2022-02-15 00:00:00'),
+ (timestamp '2022-03-31 00:00:00')
+) as sq
+----
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-02-01T00:00:00
+2022-02-01T00:00:00
+2022-03-01T00:00:00
+
+
+# year interval in date_bin with default start time
+query P
+select date_bin(INTERVAL '1 year', column1)
+from (values
+ (timestamp '2022-01-01 00:00:00'),
+ (timestamp '2023-01-01 01:00:00'),
+ (timestamp '2022-01-02 00:00:00'),
+ (timestamp '2022-02-02 00:00:00'),
+ (timestamp '2022-02-15 00:00:00'),
+ (timestamp '2022-03-31 00:00:00')
+) as sq
+----
+2022-01-01T00:00:00
+2023-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+
+query P
+SELECT DATE_BIN('1 month', '2022-01-01 00:00:00Z', '1970-01-01T00:00:00Z');
+----
+2022-01-01T00:00:00
+
+
+# Tests without INTERVAL keyword
+# 1-month interval in date_bin with default start time
+query P
+select date_bin('1 month', column1)
+from (values
+ (timestamp '2022-01-01 00:00:00'),
+ (timestamp '2022-01-01 01:00:00'),
+ (timestamp '2022-01-02 00:00:00'),
+ (timestamp '2022-02-02 00:00:00'),
+ (timestamp '2022-02-15 00:00:00'),
+ (timestamp '2022-03-31 00:00:00')
+) as sq
+----
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-02-01T00:00:00
+2022-02-01T00:00:00
+2022-03-01T00:00:00
+
+# 2-month interval in date_bin with default start time
+query P
+select date_bin('2 month', column1)
+from (values
+ (timestamp '2022-01-01 00:00:00'),
+ (timestamp '2022-01-01 01:00:00'),
+ (timestamp '2022-01-02 00:00:00'),
+ (timestamp '2022-02-02 00:00:00'),
+ (timestamp '2022-02-15 00:00:00'),
+ (timestamp '2022-03-31 00:00:00')
+) as sq
+----
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-03-01T00:00:00
+
+
+# month interval with start date end of the month plus some minutes
+#
+# The the return of `date_bin` is the start of the bin. The bin width is one
year.
+# The source data must be inside the bin.
+# Since the origin is '1970-12-31T00:15:00Z', the start of the bins are
+# '1970-12-31T00:15:00Z',
+# '1971-12-31T00:15:00Z',
+# ...,
+# '2021-12-31T00:15:00Z',
+# '2022-12-31T00:15:00Z',
+# ...
+#
+# Note the datetime '2022-03-31 00:00:00'. Its bin is NOT '2022-03-31
00:15:00' which is after its time
+# Its bin is '2022-02-28T00:15:00'
+#
+query P
+select date_bin('1 month', column1, '1970-12-31T00:15:00Z')
+from (values
+ (timestamp '2022-01-01 00:00:00'),
+ (timestamp '2022-01-01 01:00:00'),
+ (timestamp '2022-01-02 00:00:00'),
+ (timestamp '2022-02-02 00:00:00'),
+ (timestamp '2022-02-15 00:00:00'),
+ (timestamp '2022-03-31 00:00:00')
+) as sq
+----
+2021-12-31T00:15:00
+2021-12-31T00:15:00
+2021-12-31T00:15:00
+2022-01-31T00:15:00
+2022-01-31T00:15:00
+2022-02-28T00:15:00
+
+# month interval with start date is end of the month plus some minutes
+query P
+select date_bin('2 months', column1, '1970-12-31T00:15:00Z')
+from (values
+ (timestamp '2022-01-01 00:00:00'),
+ (timestamp '2022-01-01 01:00:00'),
+ (timestamp '2022-01-02 00:00:00'),
+ (timestamp '2022-02-02 00:00:00'),
+ (timestamp '2022-02-15 00:00:00'),
+ (timestamp '2022-03-31 00:00:00')
+) as sq
+----
+2021-12-31T00:15:00
+2021-12-31T00:15:00
+2021-12-31T00:15:00
+2021-12-31T00:15:00
+2021-12-31T00:15:00
+2022-02-28T00:15:00
+
+# year interval in date_bin with default start time
+query P
+select date_bin('1 year', column1)
+from (values
+ (timestamp '2022-01-01 00:00:00'),
+ (timestamp '2022-01-01 01:00:00'),
+ (timestamp '2022-01-02 00:00:00'),
+ (timestamp '2022-02-02 00:00:00'),
+ (timestamp '2022-02-15 00:00:00'),
+ (timestamp '2022-03-31 00:00:00'),
+ (timestamp '2023-10-28 01:33:00')
+) as sq
+----
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2022-01-01T00:00:00
+2023-01-01T00:00:00
+
+# year interval with start date is end of the month plus some minutes
+query P
+select date_bin('1 year', column1, '1970-12-31T00:15:00Z')
+from (values
+ (timestamp '2022-01-01 00:00:00'),
+ (timestamp '2022-01-01 01:00:00'),
+ (timestamp '2022-01-02 00:00:00'),
+ (timestamp '2022-02-02 00:00:00'),
+ (timestamp '2022-02-15 00:00:00'),
+ (timestamp '2022-03-31 00:00:00'),
+ (timestamp '2023-03-31 00:00:00')
+) as sq
+----
+2021-12-31T00:15:00
+2021-12-31T00:15:00
+2021-12-31T00:15:00
+2021-12-31T00:15:00
+2021-12-31T00:15:00
+2021-12-31T00:15:00
+2022-12-31T00:15:00
+
+# month interval on constant
+query P
+SELECT DATE_BIN('1 month', '2022-01-01 00:00:00Z');
+----
+2022-01-01T00:00:00
+
+# five months interval on constant
+query P
+SELECT DATE_BIN('5 month', '2022-01-01T00:00:00Z');
+----
+2021-09-01T00:00:00
+
+# month interval with default start time
+query P
+SELECT DATE_BIN('1 month', '2022-01-01 00:00:00Z');
+----
+2022-01-01T00:00:00
+
+# origin on the first date but not at midnight
+query P
+SELECT DATE_BIN('1 month', '2022-04-01T00:00:00Z', '2021-05-01T00:04:00Z');
+----
+2022-03-01T00:04:00
+
+# origin is May 51 (last date of the month) to produce bin on Feb 28
Review Comment:
```suggestion
# origin is May 31 (last date of the month) to produce bin on Feb 28
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]