This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new a2e5330d5c fix: date_bin() on timstamps before 1970 (#13204)
a2e5330d5c is described below
commit a2e5330d5cad8516c1524bc86fce32cae324b761
Author: Martin Hilton <[email protected]>
AuthorDate: Fri Nov 1 17:24:33 2024 +0000
fix: date_bin() on timstamps before 1970 (#13204)
* fix: date_bin() on timstamps before 1970
The date_bin() function was not working correctly for timestamps before
1970. Specifically if the input timestamp was the exact time of the
start of a bin then it would be placed in the previous bin.
The % operator has a negative result when the dividend is negative.
This causes the date_bin calculation to round up to the next bin. To
compensate the size of 1 interval is subtracted from the result if the
input is negative. This subtraction is no longer performed if the input
is already the exact time of the start of a bin.
* fix clippy
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/functions/src/datetime/date_bin.rs | 30 ++++++++++++++++++++++-
datafusion/sqllogictest/test_files/timestamps.slt | 17 +++++++++++++
2 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/datafusion/functions/src/datetime/date_bin.rs
b/datafusion/functions/src/datetime/date_bin.rs
index e335c4e097..e8d065df86 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -240,7 +240,7 @@ fn date_bin_nanos_interval(stride_nanos: i64, source: i64,
origin: i64) -> i64 {
fn compute_distance(time_diff: i64, stride: i64) -> i64 {
let time_delta = time_diff - (time_diff % stride);
- if time_diff < 0 && stride > 1 {
+ if time_diff < 0 && stride > 1 && time_delta != time_diff {
// The origin is later than the source timestamp, round down to the
previous bin
time_delta - stride
} else {
@@ -864,4 +864,32 @@ mod tests {
assert_eq!(result, expected1, "{source} = {expected}");
})
}
+
+ #[test]
+ fn test_date_bin_before_epoch() {
+ let cases = [
+ (
+ (TimeDelta::try_minutes(15), "1969-12-31T23:44:59.999999999"),
+ "1969-12-31T23:30:00",
+ ),
+ (
+ (TimeDelta::try_minutes(15), "1969-12-31T23:45:00"),
+ "1969-12-31T23:45:00",
+ ),
+ (
+ (TimeDelta::try_minutes(15), "1969-12-31T23:45:00.000000001"),
+ "1969-12-31T23:45:00",
+ ),
+ ];
+
+ cases.iter().for_each(|((stride, source), expected)| {
+ let stride = stride.unwrap();
+ let stride1 = stride.num_nanoseconds().unwrap();
+ let source1 = string_to_timestamp_nanos(source).unwrap();
+
+ let expected1 = string_to_timestamp_nanos(expected).unwrap();
+ let result = date_bin_nanos_interval(stride1, source1, 0);
+ assert_eq!(result, expected1, "{source} = {expected}");
+ })
+ }
}
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt
b/datafusion/sqllogictest/test_files/timestamps.slt
index 38c2a66472..a09a63a791 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -980,6 +980,23 @@ SELECT DATE_BIN('3 years 1 months', '2022-09-01
00:00:00Z');
----
2022-06-01T00:00:00
+# Times before the unix epoch
+query P
+select date_bin('1 hour', column1)
+from (values
+ (timestamp '1969-01-01 00:00:00'),
+ (timestamp '1969-01-01 00:15:00'),
+ (timestamp '1969-01-01 00:30:00'),
+ (timestamp '1969-01-01 00:45:00'),
+ (timestamp '1969-01-01 01:00:00')
+) as sq
+----
+1969-01-01T00:00:00
+1969-01-01T00:00:00
+1969-01-01T00:00:00
+1969-01-01T00:00:00
+1969-01-01T01:00:00
+
###
## test date_trunc function
###
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]