pitrou commented on code in PR #12657:
URL: https://github.com/apache/arrow/pull/12657#discussion_r849301387


##########
python/pyarrow/tests/test_compute.py:
##########
@@ -2104,6 +2116,7 @@ def test_round_temporal(unit):
         "1967-02-26 05:56:46.922376960",
         "1975-11-01 10:55:37.016146432",
         "1982-01-21 18:43:44.517366784",
+        "1992-01-01T00:00:00.100000000",

Review Comment:
   I'm curious, is the use of a "T" separator deliberate? Otherwise it might 
puzzle the reader.



##########
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc:
##########
@@ -705,21 +721,75 @@ year_month_day GetFlooredYmd(int64_t arg, int multiple, 
Localizer localizer_) {
     } else {
       total_months = (total_months - multiple + 1) / multiple * multiple;
     }
-    return year_month_day(year{1970} / jan / 0) + months{total_months};
+    return year{1970} / jan / 1 + months{total_months};
   }
 }
 
 template <typename Duration, typename Unit, typename Localizer>
-const Duration FloorTimePoint(const int64_t arg, const int64_t multiple,
+const Duration FloorTimePoint(const int64_t arg, const RoundTemporalOptions 
options,

Review Comment:
   Same here (pass by reference)?



##########
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc:
##########
@@ -705,21 +721,75 @@ year_month_day GetFlooredYmd(int64_t arg, int multiple, 
Localizer localizer_) {
     } else {
       total_months = (total_months - multiple + 1) / multiple * multiple;
     }
-    return year_month_day(year{1970} / jan / 0) + months{total_months};
+    return year{1970} / jan / 1 + months{total_months};
   }
 }
 
 template <typename Duration, typename Unit, typename Localizer>
-const Duration FloorTimePoint(const int64_t arg, const int64_t multiple,
+const Duration FloorTimePoint(const int64_t arg, const RoundTemporalOptions 
options,
                               Localizer localizer_, Status* st) {
   const auto t = localizer_.template ConvertTimePoint<Duration>(arg);
-  const Unit d = floor<Unit>(t).time_since_epoch();
 
-  if (multiple == 1) {
+  if (options.multiple == 1) {
+    const Unit d = floor<Unit>(t).time_since_epoch();
     return localizer_.template 
ConvertLocalToSys<Duration>(duration_cast<Duration>(d),
                                                            st);
+  } else if (options.calendar_based_origin) {
+    const Unit unit = Unit{options.multiple};
+
+    switch (options.unit) {
+      case compute::CalendarUnit::DAY: {
+        const auto origin =
+            localizer_.ConvertDays(year_month_day(floor<days>(t)).year() /
+                                   year_month_day(floor<days>(t)).month() / 1);
+        const auto m = (floor<days>(t) - origin) / unit * unit + origin;

Review Comment:
   Is `floor<days>` actually required here?



##########
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc:
##########
@@ -689,11 +689,27 @@ struct IsDaylightSavings {
 // Round temporal values to given frequency
 
 template <typename Duration, typename Localizer>
-year_month_day GetFlooredYmd(int64_t arg, int multiple, Localizer localizer_) {
+year_month_day GetFlooredYmd(int64_t arg, const int multiple,
+                             const RoundTemporalOptions options, Localizer 
localizer_) {

Review Comment:
   Probably better to pass the options by reference here?
   ```suggestion
   year_month_day GetFlooredYmd(int64_t arg, const int multiple,
                                const RoundTemporalOptions& options, Localizer 
localizer_) {
   ```



##########
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc:
##########
@@ -689,11 +689,27 @@ struct IsDaylightSavings {
 // Round temporal values to given frequency
 
 template <typename Duration, typename Localizer>
-year_month_day GetFlooredYmd(int64_t arg, int multiple, Localizer localizer_) {
+year_month_day GetFlooredYmd(int64_t arg, const int multiple,
+                             const RoundTemporalOptions options, Localizer 
localizer_) {
   year_month_day ymd{floor<days>(localizer_.template 
ConvertTimePoint<Duration>(arg))};
 
   if (multiple == 1) {
     return year_month_day(ymd.year() / ymd.month() / 1);
+  } else if (options.calendar_based_origin) {
+    switch (options.unit) {
+      case compute::CalendarUnit::MONTH: {
+        const auto m =
+            static_cast<uint32_t>(ymd.month()) / options.multiple * 
options.multiple;
+        return year_month_day(ymd.year() / 1 / 1) + months{m};
+      }
+      case compute::CalendarUnit::QUARTER: {
+        const auto m = static_cast<uint32_t>(ymd.month()) / (options.multiple 
* 3) *
+                       (options.multiple * 3);
+        return year_month_day(ymd.year() / 1 / 1) + months{m};
+      }
+      default:
+        return ymd;

Review Comment:
   What about `YEAR` and `DAY`?



##########
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc:
##########
@@ -705,21 +721,75 @@ year_month_day GetFlooredYmd(int64_t arg, int multiple, 
Localizer localizer_) {
     } else {
       total_months = (total_months - multiple + 1) / multiple * multiple;
     }
-    return year_month_day(year{1970} / jan / 0) + months{total_months};
+    return year{1970} / jan / 1 + months{total_months};

Review Comment:
   Was it a bug? If so, is a test added for it?



##########
python/pyarrow/_compute.pyx:
##########
@@ -901,10 +903,20 @@ class RoundTemporalOptions(_RoundTemporalOptions):
         "nanosecond".
     week_starts_monday : bool, default True
         If True, weeks start on Monday; if False, on Sunday.
+    change_on_boundary : bool, default False
+        If True times exactly on unit multiple boundary will be rounded
+        one unit multiple up. This applies for ceiling only.
+    calendar_based_origin : bool, default False
+        By default origin is 1970-01-01T00:00:00. By setting this to True,
+        rounding origin will be beginning of one less precise calendar unit.
+        E.g.: rounding to hours will use beginning of day as origin.
+
     """
 
-    def __init__(self, multiple=1, unit="day", week_starts_monday=True):
-        self._set_options(multiple, unit, week_starts_monday)
+    def __init__(self, multiple=1, unit="day", week_starts_monday=True,
+                 change_on_boundary=False, calendar_based_origin=False):

Review Comment:
   Hmm, can we make all boolean options keyword-only?
   ```suggestion
       def __init__(self, multiple=1, unit="day", *, week_starts_monday=True,
                    change_on_boundary=False, calendar_based_origin=False):
   ```



##########
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc:
##########
@@ -728,18 +798,31 @@ const Duration FloorTimePoint(const int64_t arg, const 
int64_t multiple,
 }
 
 template <typename Duration, typename Localizer>
-const Duration FloorWeekTimePoint(const int64_t arg, const int64_t multiple,
+const Duration FloorWeekTimePoint(const int64_t arg, const 
RoundTemporalOptions options,
                                   Localizer localizer_, const Duration 
weekday_offset,
                                   Status* st) {
   const auto t = localizer_.template ConvertTimePoint<Duration>(arg) + 
weekday_offset;
   const weeks d = floor<weeks>(t).time_since_epoch();
 
-  if (multiple == 1) {
+  if (options.multiple == 1) {
     return localizer_.template 
ConvertLocalToSys<Duration>(duration_cast<Duration>(d),
                                                            st) -
            weekday_offset;
+  } else if (options.calendar_based_origin) {
+    weekday wd_;

Review Comment:
   Can you add comments explaining the following calculation?



##########
cpp/src/arrow/compute/api_scalar.h:
##########
@@ -117,6 +119,13 @@ class ARROW_EXPORT RoundTemporalOptions : public 
FunctionOptions {
   CalendarUnit unit;
   /// What day does the week start with (Monday=true, Sunday=false)
   bool week_starts_monday;
+  /// Times exactly on unit multiple boundary will be rounded one unit 
multiple up.
+  /// This applies for ceiling only.
+  bool change_on_boundary;

Review Comment:
   Since this is only for `ceil`, perhaps rename it `strict_ceil` or something 
similar? @jorisvandenbossche what do you think?



##########
python/pyarrow/tests/test_compute.py:
##########
@@ -2078,6 +2078,19 @@ def _check_temporal_rounding(ts, values, unit):
         expected = ts.dt.round(frequency)
         np.testing.assert_array_equal(result, expected)
 
+    # TODO: should work for day

Review Comment:
   Why doesn't it? Is it a bug on our side or is it Pandas' fault?



##########
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc:
##########
@@ -705,21 +721,75 @@ year_month_day GetFlooredYmd(int64_t arg, int multiple, 
Localizer localizer_) {
     } else {
       total_months = (total_months - multiple + 1) / multiple * multiple;
     }
-    return year_month_day(year{1970} / jan / 0) + months{total_months};
+    return year{1970} / jan / 1 + months{total_months};
   }
 }
 
 template <typename Duration, typename Unit, typename Localizer>
-const Duration FloorTimePoint(const int64_t arg, const int64_t multiple,
+const Duration FloorTimePoint(const int64_t arg, const RoundTemporalOptions 
options,
                               Localizer localizer_, Status* st) {
   const auto t = localizer_.template ConvertTimePoint<Duration>(arg);
-  const Unit d = floor<Unit>(t).time_since_epoch();
 
-  if (multiple == 1) {
+  if (options.multiple == 1) {
+    const Unit d = floor<Unit>(t).time_since_epoch();
     return localizer_.template 
ConvertLocalToSys<Duration>(duration_cast<Duration>(d),
                                                            st);
+  } else if (options.calendar_based_origin) {
+    const Unit unit = Unit{options.multiple};
+
+    switch (options.unit) {
+      case compute::CalendarUnit::DAY: {
+        const auto origin =
+            localizer_.ConvertDays(year_month_day(floor<days>(t)).year() /
+                                   year_month_day(floor<days>(t)).month() / 1);
+        const auto m = (floor<days>(t) - origin) / unit * unit + origin;
+        return localizer_.template ConvertLocalToSys<Duration>(
+            duration_cast<Duration>(m.time_since_epoch()), st);
+      }
+      case compute::CalendarUnit::HOUR: {
+        const auto origin = 
localizer_.ConvertDays(year_month_day(floor<days>(t)));
+        const auto m = (t - origin) / unit * unit + origin;
+        return localizer_.template ConvertLocalToSys<Duration>(
+            duration_cast<Duration>(m.time_since_epoch()), st);

Review Comment:
   This snippet seems repeated in every `case` clause, perhaps move it out of 
the `switch` entirely?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to