NGA-TRAN commented on code in PR #5982:
URL: https://github.com/apache/arrow-datafusion/pull/5982#discussion_r1168949161


##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -333,19 +333,67 @@ pub fn date_trunc(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
     })
 }
 
-fn date_bin_single(stride: i64, source: i64, origin: i64) -> i64 {
+// return time in nanoseconds that the source timestamp falls into based on 
the stride and origin
+fn date_bin_nanos_interval(stride_nanos: i64, source: i64, origin: i64) -> i64 
{
     let time_diff = source - origin;
-    // distance to bin
+
+    // distance from origin to bin
+    let time_delta = compute_distance(time_diff, stride_nanos);
+
+    origin + time_delta
+}
+
+// distance from origin to bin
+fn compute_distance(time_diff: i64, stride: i64) -> i64 {
     let time_delta = time_diff - (time_diff % stride);
 
-    let time_delta = if time_diff < 0 && stride > 1 {
+    if time_diff < 0 && stride > 1 {
         // The origin is later than the source timestamp, round down to the 
previous bin
         time_delta - stride
     } else {
         time_delta
+    }
+}
+
+// return time in nanoseconds that the source timestamp falls into based on 
the stride and origin
+fn date_bin_months_interval(stride_months: i64, source: i64, origin: i64) -> 
i64 {
+    // convert source and origin to DateTime<Utc>
+    let source_date = to_utc_date_time(source);
+    let origin_date = to_utc_date_time(origin);
+
+    // calculate the number of months between the source and origin
+    let month_diff = (source_date.year() - origin_date.year()) * 12
+        + source_date.month() as i32
+        - origin_date.month() as i32;
+
+    // distance from origin to bin
+    let month_delta = compute_distance(month_diff as i64, stride_months);
+
+    let mut bin_time = if month_delta < 0 {
+        origin_date - Months::new(month_delta.unsigned_abs() as u32)
+    } else {
+        origin_date + Months::new(month_delta as u32)
     };
 
-    origin + time_delta
+    // If origin is not midnight of first date of the month, the bin_time may 
be larger than the source
+    // In this case, we need to move back to previous bin
+    if bin_time > source_date {
+        let month_delta = month_delta - stride_months;
+        bin_time = if month_delta < 0 {
+            origin_date - Months::new(month_delta.unsigned_abs() as u32)
+        } else {
+            origin_date + Months::new(month_delta as u32)
+        };
+    }

Review Comment:
   See 
https://github.com/apache/arrow-datafusion/pull/5982#issuecomment-1511640259



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to