tadeja commented on code in PR #12528:
URL: https://github.com/apache/arrow/pull/12528#discussion_r3086860825


##########
cpp/src/arrow/compute/kernels/temporal_internal.h:
##########
@@ -123,38 +133,237 @@ struct NonZonedLocalizer {
     return t;
   }
 
+  template <typename Duration>
+  Duration OriginHelper(const Duration& d, const sys_time<Duration>& st,
+                        const CalendarUnit& unit) const {
+    Duration origin;
+    switch (unit) {
+      case compute::CalendarUnit::DAY: {
+        const year_month_day ymd = year_month_day(floor<days>(st));
+        origin = duration_cast<Duration>(
+            local_days(ymd.year() / ymd.month() / 1).time_since_epoch());
+        break;
+      }
+      case compute::CalendarUnit::HOUR: {
+        origin = duration_cast<Duration>(floor<days>(st).time_since_epoch());
+        break;
+      }
+      case compute::CalendarUnit::MINUTE: {
+        origin =
+            
duration_cast<Duration>(floor<std::chrono::hours>(st).time_since_epoch());
+        break;
+      }
+      case compute::CalendarUnit::SECOND:
+        origin =
+            
duration_cast<Duration>(floor<std::chrono::minutes>(st).time_since_epoch());
+        break;
+      case compute::CalendarUnit::MILLISECOND:
+        origin = duration_cast<Duration>(floor<std::chrono::seconds>(d));
+        break;
+      case compute::CalendarUnit::MICROSECOND:
+        origin = duration_cast<Duration>(floor<std::chrono::milliseconds>(d));
+        break;
+      case compute::CalendarUnit::NANOSECOND:
+        origin = duration_cast<Duration>(floor<std::chrono::microseconds>(d));
+        break;
+      default:
+        origin = d;
+    }
+    return origin;
+  }
+
+  template <typename Duration, typename Unit>
+  Duration FloorTimePoint(int64_t t, const RoundTemporalOptions& options) 
const {
+    const Duration d = Duration{t};
+    if (options.calendar_based_origin) {
+      // Round to a multiple of units since the last greater unit.
+      // For example: round to multiple of days since the beginning of the 
month or
+      // to hours since the beginning of the day.
+      const Duration origin =
+          OriginHelper(d, ConvertTimePoint<Duration>(t), options.unit);
+      return duration_cast<Duration>(CeilHelper<Duration, Unit>((d - origin), 
options) +
+                                     origin);
+    } else {
+      return duration_cast<Duration>(FloorHelper<Duration, Unit>(d, options));
+    }
+  }
+
+  template <typename Duration, typename Unit>
+  Duration CeilTimePoint(int64_t t, const RoundTemporalOptions& options) const 
{
+    const Duration d = Duration{t};
+    if (options.calendar_based_origin) {
+      // Round to a multiple of units since the last greater unit.
+      // For example: round to multiple of days since the beginning of the 
month or
+      // to hours since the beginning of the day.
+      const Duration origin =
+          OriginHelper(d, ConvertTimePoint<Duration>(t), options.unit);
+      return duration_cast<Duration>(CeilHelper<Duration, Unit>((d - origin), 
options) +
+                                     origin);
+    } else {
+      return duration_cast<Duration>(CeilHelper<Duration, Unit>(d, options));
+    }
+  }
+
+  template <typename Duration, typename Unit>
+  Duration RoundTimePoint(const int64_t t, const RoundTemporalOptions& 
options) const {
+    return duration_cast<Duration>(RoundHelper<Duration, Unit>(Duration{t}, 
options));
+  }
+
   sys_days ConvertDays(sys_days d) const { return d; }
 };
 
 struct ZonedLocalizer {
   using days_t = local_days;
 
   // Timezone-localizing conversions: UTC -> local time
-  const ArrowTimeZone tz_;
+  const time_zone* tz;
 
   template <typename Duration>
   local_time<Duration> ConvertTimePoint(int64_t t) const {
-    const auto st = sys_time<Duration>(Duration{t});
-    return std::visit(
-        [st](const auto& tz) -> local_time<Duration> { return 
tz->to_local(st); }, tz_);
+    return tz->to_local(sys_time<Duration>(Duration{t}));
   }
 
   template <typename Duration>
-  Duration ConvertLocalToSys(Duration t, Status* st) const {
-    const auto lt = local_time<Duration>(t);
-    auto local_to_sys_time = [&](auto&& t) {
-      return t.get_sys_time().time_since_epoch();
-    };
+  Duration OriginHelper(const Duration& d, const local_time<Duration>& lt,
+                        const CalendarUnit& unit) const {
+    Duration origin;
+    switch (unit) {
+      case compute::CalendarUnit::DAY: {
+        const year_month_day ymd = year_month_day(floor<days>(lt));
+        origin = duration_cast<Duration>(
+            local_days(ymd.year() / ymd.month() / 1).time_since_epoch());
+        break;
+      }
+      case compute::CalendarUnit::HOUR: {
+        origin = duration_cast<Duration>(floor<days>(lt).time_since_epoch());
+        break;
+      }
+      case compute::CalendarUnit::MINUTE: {
+        origin =
+            
duration_cast<Duration>(floor<std::chrono::hours>(lt).time_since_epoch());
+        break;
+      }
+      case compute::CalendarUnit::SECOND:
+        origin =
+            
duration_cast<Duration>(floor<std::chrono::minutes>(lt).time_since_epoch());
+        break;
+      case compute::CalendarUnit::MILLISECOND:
+        origin = duration_cast<Duration>(floor<std::chrono::seconds>(d));
+        break;
+      case compute::CalendarUnit::MICROSECOND:
+        origin = duration_cast<Duration>(floor<std::chrono::milliseconds>(d));
+        break;
+      case compute::CalendarUnit::NANOSECOND:
+        origin = duration_cast<Duration>(floor<std::chrono::microseconds>(d));
+        break;
+      default:
+        origin = d;
+    }
+    return origin;
+  }
 
-    try {
-      return ApplyTimeZone(tz_, lt, std::nullopt, local_to_sys_time);
-    } catch (const arrow_vendored::date::nonexistent_local_time& e) {
-      *st = Status::Invalid("Local time does not exist: ", e.what());
-      return Duration{0};
-    } catch (const arrow_vendored::date::ambiguous_local_time& e) {
-      *st = Status::Invalid("Local time is ambiguous: ", e.what());
-      return Duration{0};
+  template <typename Duration, typename Unit>
+  Duration FloorTimePoint(const int64_t t, const RoundTemporalOptions& 
options) const {
+    const Duration d = Duration{t};
+    const sys_time<Duration> st = sys_time<Duration>(d);
+    const local_time<Duration> lt = tz->to_local(st);
+    const sys_info si = tz->get_info(st);
+    const local_info li = tz->get_info(lt);
+
+    Duration d2;
+    if (options.calendar_based_origin) {
+      // Round to a multiple of units since the last greater unit.
+      // For example: round to multiple of days since the beginning of the 
month or
+      // to hours since the beginning of the day.
+      const Duration origin = OriginHelper(d, lt, options.unit);
+      d2 = duration_cast<Duration>(
+          FloorHelper<Duration, Unit>((lt - origin).time_since_epoch(), 
options) +
+          origin);
+    } else {
+      d2 = duration_cast<Duration>(
+          FloorHelper<Duration, Unit>(lt.time_since_epoch(), options));
     }
+    const local_info li2 = tz->get_info(local_time<Duration>(d2));
+
+    if (li2.result == local_info::ambiguous && li.result == 
local_info::ambiguous) {
+      // In case we floor from an ambiguous period into an ambiguous period we 
need to
+      // decide how to disambiguate the result. We resolve this by adding 
post-ambiguous
+      // period offset to UTC, floor this time and subtract the post-ambiguous 
period
+      // offset to get the locally floored time. Please note pre-ambiguous 
offset is
+      // typically 1 hour greater than post-ambiguous offset. While this 
produces
+      // acceptable result in UTC it can cause discontinuities in local time 
and destroys
+      // local time sortedness.
+      const auto d3 = duration_cast<Duration>(
+          FloorHelper<Duration, Unit>(d + li2.second.offset, options) -
+          li2.second.offset);
+      const auto d4 = duration_cast<Duration>(
+          FloorHelper<Duration, Unit>(d + li2.first.offset, options) - 
li2.first.offset);
+      return d3 < d4 ? d3 : d4;
+    } else if (li2.result == local_info::nonexistent ||
+               li2.first.offset < li.first.offset) {
+      // In case we hit or cross a nonexistent period we add the pre-DST-jump 
offset to
+      // UTC, floor this time and subtract the pre-DST-jump offset from the 
floored time.
+      return duration_cast<Duration>(
+          FloorHelper<Duration, Unit>(d + li2.first.offset, options) - 
li2.first.offset);
+    }
+    return duration_cast<Duration>(d2 - si.offset);
+  }
+
+  template <typename Duration, typename Unit>
+  Duration CeilTimePoint(const int64_t t, const RoundTemporalOptions& options) 
const {
+    const Duration d = Duration{t};
+    const sys_time<Duration> st = sys_time<Duration>(d);
+    const local_time<Duration> lt = tz->to_local(st);
+    const sys_info si = tz->get_info(st);
+    const local_info li = tz->get_info(lt);
+
+    Duration d2;
+    if (options.calendar_based_origin) {
+      // Round to a multiple of units since the last greater unit.
+      // For example: round to multiple of days since the beginning of the 
month or
+      // to hours since the beginning of the day.
+      const Duration origin = OriginHelper(d, lt, options.unit);
+      d2 = duration_cast<Duration>(
+          CeilHelper<Duration, Unit>((lt - origin).time_since_epoch(), 
options) + origin);
+    } else {
+      d2 = duration_cast<Duration>(
+          CeilHelper<Duration, Unit>(lt.time_since_epoch(), options));
+    }
+    const local_info li2 = tz->get_info(local_time<Duration>(d2));
+
+    if (li2.result == local_info::ambiguous && li.result == 
local_info::ambiguous) {
+      // In case we ceil from an ambiguous period into an ambiguous period we 
need to
+      // decide how to disambiguate the result. We resolve this by adding 
post-ambiguous
+      // period offset to UTC, ceil this time and subtract the post-ambiguous 
period
+      // offset to get the locally ceiled time. Please note pre-ambiguous 
offset is
+      // typically 1 hour greater than post-ambiguous offset. While this 
produces
+      // acceptable result in UTC it can cause discontinuities in local time 
and destroys
+      // local time sortedness.
+      return duration_cast<Duration>(
+          CeilHelper<Duration, Unit>(d + li2.first.offset, options) - 
li2.first.offset);
+    } else if (li2.result == local_info::nonexistent ||
+               li2.first.offset > li.first.offset) {
+      // In case we hit or cross a nonexistent period we add the pre-DST-jump 
offset to
+      // UTC, ceil this time and subtract the pre-DST-jump offset from the 
ceiled time.
+      return duration_cast<Duration>(
+          CeilHelper<Duration, Unit>(d + li2.second.offset, options) - 
li2.second.offset);
+    }
+    return duration_cast<Duration>(d2 - si.offset);
+  }
+
+  template <typename Duration, typename Unit>
+  Duration RoundTimePoint(const int64_t t, const RoundTemporalOptions& 
options) const {
+    const Duration d = Duration{t};
+    const Duration c = CeilTimePoint<Duration, Unit>(t, options);
+    const Duration f = FloorTimePoint<Duration, Unit>(t, options);
+    return (d - f >= c - d) ? c : f;
+  }
+
+  template <typename Duration>
+  Duration ConvertLocalToSys(Duration t, Status* st) const {

Review Comment:
   Perhaps consider bringing back the try-catch here (or review parameter 
Status* st and callers in `scalar_temporal_unary.cc` between 753 - 1037)?



##########
python/pyarrow/tests/test_compute.py:
##########


Review Comment:
   To be removed?



##########
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc:
##########
@@ -887,10 +800,8 @@ Duration CeilWeekTimePoint(const int64_t arg, const 
RoundTemporalOptions& option
 template <typename Duration, typename Unit, typename Localizer>

Review Comment:
   No caller, to be removed?



##########
cpp/src/arrow/compute/kernels/temporal_internal.h:
##########
@@ -123,38 +133,237 @@ struct NonZonedLocalizer {
     return t;
   }
 
+  template <typename Duration>
+  Duration OriginHelper(const Duration& d, const sys_time<Duration>& st,
+                        const CalendarUnit& unit) const {
+    Duration origin;
+    switch (unit) {
+      case compute::CalendarUnit::DAY: {
+        const year_month_day ymd = year_month_day(floor<days>(st));
+        origin = duration_cast<Duration>(
+            local_days(ymd.year() / ymd.month() / 1).time_since_epoch());
+        break;
+      }
+      case compute::CalendarUnit::HOUR: {
+        origin = duration_cast<Duration>(floor<days>(st).time_since_epoch());
+        break;
+      }
+      case compute::CalendarUnit::MINUTE: {
+        origin =
+            
duration_cast<Duration>(floor<std::chrono::hours>(st).time_since_epoch());
+        break;
+      }
+      case compute::CalendarUnit::SECOND:
+        origin =
+            
duration_cast<Duration>(floor<std::chrono::minutes>(st).time_since_epoch());
+        break;
+      case compute::CalendarUnit::MILLISECOND:
+        origin = duration_cast<Duration>(floor<std::chrono::seconds>(d));
+        break;
+      case compute::CalendarUnit::MICROSECOND:
+        origin = duration_cast<Duration>(floor<std::chrono::milliseconds>(d));
+        break;
+      case compute::CalendarUnit::NANOSECOND:
+        origin = duration_cast<Duration>(floor<std::chrono::microseconds>(d));
+        break;
+      default:
+        origin = d;
+    }
+    return origin;
+  }
+
+  template <typename Duration, typename Unit>
+  Duration FloorTimePoint(int64_t t, const RoundTemporalOptions& options) 
const {
+    const Duration d = Duration{t};
+    if (options.calendar_based_origin) {
+      // Round to a multiple of units since the last greater unit.
+      // For example: round to multiple of days since the beginning of the 
month or
+      // to hours since the beginning of the day.
+      const Duration origin =
+          OriginHelper(d, ConvertTimePoint<Duration>(t), options.unit);
+      return duration_cast<Duration>(CeilHelper<Duration, Unit>((d - origin), 
options) +

Review Comment:
   (to self: verify `CeilHelper` under `FloorTimePoint` here)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to