This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new a2e5330d5c fix: date_bin() on timstamps before 1970 (#13204)
a2e5330d5c is described below

commit a2e5330d5cad8516c1524bc86fce32cae324b761
Author: Martin Hilton <[email protected]>
AuthorDate: Fri Nov 1 17:24:33 2024 +0000

    fix: date_bin() on timstamps before 1970 (#13204)
    
    * fix: date_bin() on timstamps before 1970
    
    The date_bin() function was not working correctly for timestamps before
    1970. Specifically if the input timestamp was the exact time of the
    start of a bin then it would be placed in the previous bin.
    
    The % operator has a negative result when the dividend is negative.
    This causes the date_bin calculation to round up to the next bin. To
    compensate the size of 1 interval is subtracted from the result if the
    input is negative. This subtraction is no longer performed if the input
    is already the exact time of the start of a bin.
    
    * fix clippy
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/functions/src/datetime/date_bin.rs     | 30 ++++++++++++++++++++++-
 datafusion/sqllogictest/test_files/timestamps.slt | 17 +++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/datafusion/functions/src/datetime/date_bin.rs 
b/datafusion/functions/src/datetime/date_bin.rs
index e335c4e097..e8d065df86 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -240,7 +240,7 @@ fn date_bin_nanos_interval(stride_nanos: i64, source: i64, 
origin: i64) -> i64 {
 fn compute_distance(time_diff: i64, stride: i64) -> i64 {
     let time_delta = time_diff - (time_diff % stride);
 
-    if time_diff < 0 && stride > 1 {
+    if time_diff < 0 && stride > 1 && time_delta != time_diff {
         // The origin is later than the source timestamp, round down to the 
previous bin
         time_delta - stride
     } else {
@@ -864,4 +864,32 @@ mod tests {
                 assert_eq!(result, expected1, "{source} = {expected}");
             })
     }
+
+    #[test]
+    fn test_date_bin_before_epoch() {
+        let cases = [
+            (
+                (TimeDelta::try_minutes(15), "1969-12-31T23:44:59.999999999"),
+                "1969-12-31T23:30:00",
+            ),
+            (
+                (TimeDelta::try_minutes(15), "1969-12-31T23:45:00"),
+                "1969-12-31T23:45:00",
+            ),
+            (
+                (TimeDelta::try_minutes(15), "1969-12-31T23:45:00.000000001"),
+                "1969-12-31T23:45:00",
+            ),
+        ];
+
+        cases.iter().for_each(|((stride, source), expected)| {
+            let stride = stride.unwrap();
+            let stride1 = stride.num_nanoseconds().unwrap();
+            let source1 = string_to_timestamp_nanos(source).unwrap();
+
+            let expected1 = string_to_timestamp_nanos(expected).unwrap();
+            let result = date_bin_nanos_interval(stride1, source1, 0);
+            assert_eq!(result, expected1, "{source} = {expected}");
+        })
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt 
b/datafusion/sqllogictest/test_files/timestamps.slt
index 38c2a66472..a09a63a791 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -980,6 +980,23 @@ SELECT DATE_BIN('3 years 1 months', '2022-09-01 
00:00:00Z');
 ----
 2022-06-01T00:00:00
 
+# Times before the unix epoch
+query P
+select date_bin('1 hour', column1)
+from (values
+  (timestamp '1969-01-01 00:00:00'),
+  (timestamp '1969-01-01 00:15:00'),
+  (timestamp '1969-01-01 00:30:00'),
+  (timestamp '1969-01-01 00:45:00'),
+  (timestamp '1969-01-01 01:00:00')
+) as sq
+----
+1969-01-01T00:00:00
+1969-01-01T00:00:00
+1969-01-01T00:00:00
+1969-01-01T00:00:00
+1969-01-01T01:00:00
+
 ###
 ## test date_trunc function
 ###


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to