lidavidm commented on a change in pull request #11990:
URL: https://github.com/apache/arrow/pull/11990#discussion_r771842555



##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -1024,6 +1024,18 @@ Result<Datum> Month(const Datum& values, ExecContext* 
ctx = NULLPTR);
 ARROW_EXPORT
 Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
 
+/// \brief DateStruct returns a struct containing the Year, Month and Day 
value for
+/// each element of `values`.
+///
+/// \param[in] values input to extract (year, month, day) struct from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 7.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> DateStruct(const Datum& values, ExecContext* ctx = NULLPTR);

Review comment:
       `YearMonthDay`/`year_month_day` might be a little more consistent with 
the naming here?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -580,6 +627,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) {
     CheckScalarUnary("year", unit, times_seconds_precision, int64(), year);
     CheckScalarUnary("month", unit, times_seconds_precision, int64(), month);
     CheckScalarUnary("day", unit, times_seconds_precision, int64(), day);
+    // TODO: where is year assigned in this function?

Review comment:
       TEST_F declares the test as using a [test fixture in 
Googletest](https://google.github.io/googletest/primer.html#same-data-multiple-tests)
 so it effectively becomes a method of the fixture class above. Hence `year` 
comes from here: 
https://github.com/apache/arrow/blob/b5a47e166c7c168c3393256d4da48bf7b7f6add5/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc#L141

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
##########
@@ -185,6 +191,98 @@ struct Day {
   Localizer localizer_;
 };
 
+// ----------------------------------------------------------------------
+// Extract (year, month, day) struct from temporal types
+
+template <typename Duration, typename InType, typename BuilderType>
+struct DateStructVisitValueFunction {
+  static Result<std::function<Status(typename InType::c_type arg)>> Get(
+      const std::vector<BuilderType*>& field_builders, const ArrayData&,
+      StructBuilder* struct_builder) {
+    return [=](typename InType::c_type arg) {
+      const auto ymd = year_month_day(floor<days>(NonZonedLocalizer{}.template 
ConvertTimePoint<Duration>(arg)));;
+      field_builders[0]->UnsafeAppend(static_cast<const int32_t>(ymd.year()));
+      field_builders[1]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.month()));
+      field_builders[2]->UnsafeAppend(static_cast<const uint32_t>(ymd.day()));
+      return struct_builder->Append();
+    };
+  };
+};  
+
+template <typename Duration, typename BuilderType>
+struct DateStructVisitValueFunction<Duration, TimestampType, BuilderType> {
+  static Result<std::function<Status(typename TimestampType::c_type arg)>> Get(
+      const std::vector<BuilderType*>& field_builders, const ArrayData& in,
+      StructBuilder* struct_builder) {
+    const auto& timezone = GetInputTimezone(in);
+    if (timezone.empty()) {
+      return [=](TimestampType::c_type arg) {
+        const auto ymd = 
year_month_day(floor<days>(NonZonedLocalizer{}.template 
ConvertTimePoint<Duration>(arg)));;
+        field_builders[0]->UnsafeAppend(static_cast<const 
int32_t>(ymd.year()));
+        field_builders[1]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.month()));
+        field_builders[2]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.day()));
+        return struct_builder->Append();
+      };
+    }
+    ARROW_ASSIGN_OR_RAISE(auto tz, LocateZone(timezone));
+    return [=](TimestampType::c_type arg) {
+        const auto ymd = 
year_month_day(floor<days>(ZonedLocalizer{tz}.template 
ConvertTimePoint<Duration>(arg)));      
+        field_builders[0]->UnsafeAppend(static_cast<const 
int32_t>(ymd.year()));
+        field_builders[1]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.month()));
+        field_builders[2]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.day()));
+      return struct_builder->Append();
+    };
+  }
+};
+
+template <typename Duration, typename InType>
+struct DateStruct {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    if (in.is_valid) {
+      const auto& in_val = internal::UnboxScalar<const InType>::Unbox(in);
+      const auto t = floor<days>(NonZonedLocalizer{}.template 
ConvertTimePoint<Duration>(in_val));

Review comment:
       It might be possible to structure things more like the other kernels 
except perhaps return `year_month_day`. And then we can dispatch to the right 
one variant here and reuse the same implementation across the scalar/array and 
zoned/naive timestamp cases.

##########
File path: docs/source/cpp/compute.rst
##########
@@ -1390,29 +1392,31 @@ For timestamps inputs with non-empty timezone, 
localized timestamp components wi
 
+--------------------+------------+-------------------+---------------+----------------------------+-------+
 | subsecond          | Unary      | Timestamp, Time   | Double        |        
                    |       |
 
+--------------------+------------+-------------------+---------------+----------------------------+-------+
-| us_week            | Unary      | Temporal          | Int64         |        
                    | \(4)  |
+| us_week            | Unary      | Temporal          | Int64         |        
                    | \(5)  |
 
+--------------------+------------+-------------------+---------------+----------------------------+-------+
-| week               | Unary      | Timestamp         | Int64         | 
:struct:`WeekOptions`      | \(5)  |
+| week               | Unary      | Timestamp         | Int64         | 
:struct:`WeekOptions`      | \(6)  |
 
+--------------------+------------+-------------------+---------------+----------------------------+-------+
 | year               | Unary      | Temporal          | Int64         |        
                    |       |
 
+--------------------+------------+-------------------+---------------+----------------------------+-------+
 
-* \(1) Outputs the number of the day of the week. By default week begins on 
Monday
+* \(1) Output is a ``{"year": output type, "month": output type, "day": output 
type}`` Struct.

Review comment:
       This should be something like `{"year": int64(), ...}` no?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
##########
@@ -833,6 +931,13 @@ const FunctionDoc day_doc{
      "cannot be found in the timezone database."),
     {"values"}};
 
+const FunctionDoc date_struct_doc{
+  "Extract (year, month, day) struct",
+  ("Null values emit null.\n"
+   "An error is returned in the values have a defined timezone but it\n"
+   "cannot be found in the timezone database."),

Review comment:
       ```suggestion
      "An error is returned if the values have a defined timezone but it\n"
      "cannot be found in the timezone database."),
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
##########
@@ -185,6 +191,98 @@ struct Day {
   Localizer localizer_;
 };
 
+// ----------------------------------------------------------------------
+// Extract (year, month, day) struct from temporal types
+
+template <typename Duration, typename InType, typename BuilderType>
+struct DateStructVisitValueFunction {
+  static Result<std::function<Status(typename InType::c_type arg)>> Get(
+      const std::vector<BuilderType*>& field_builders, const ArrayData&,
+      StructBuilder* struct_builder) {
+    return [=](typename InType::c_type arg) {
+      const auto ymd = year_month_day(floor<days>(NonZonedLocalizer{}.template 
ConvertTimePoint<Duration>(arg)));;
+      field_builders[0]->UnsafeAppend(static_cast<const int32_t>(ymd.year()));
+      field_builders[1]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.month()));
+      field_builders[2]->UnsafeAppend(static_cast<const uint32_t>(ymd.day()));
+      return struct_builder->Append();
+    };
+  };
+};  
+
+template <typename Duration, typename BuilderType>
+struct DateStructVisitValueFunction<Duration, TimestampType, BuilderType> {
+  static Result<std::function<Status(typename TimestampType::c_type arg)>> Get(
+      const std::vector<BuilderType*>& field_builders, const ArrayData& in,
+      StructBuilder* struct_builder) {
+    const auto& timezone = GetInputTimezone(in);
+    if (timezone.empty()) {
+      return [=](TimestampType::c_type arg) {
+        const auto ymd = 
year_month_day(floor<days>(NonZonedLocalizer{}.template 
ConvertTimePoint<Duration>(arg)));;
+        field_builders[0]->UnsafeAppend(static_cast<const 
int32_t>(ymd.year()));
+        field_builders[1]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.month()));
+        field_builders[2]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.day()));
+        return struct_builder->Append();
+      };
+    }
+    ARROW_ASSIGN_OR_RAISE(auto tz, LocateZone(timezone));
+    return [=](TimestampType::c_type arg) {
+        const auto ymd = 
year_month_day(floor<days>(ZonedLocalizer{tz}.template 
ConvertTimePoint<Duration>(arg)));      
+        field_builders[0]->UnsafeAppend(static_cast<const 
int32_t>(ymd.year()));
+        field_builders[1]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.month()));
+        field_builders[2]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.day()));
+      return struct_builder->Append();
+    };
+  }
+};
+
+template <typename Duration, typename InType>
+struct DateStruct {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    if (in.is_valid) {
+      const auto& in_val = internal::UnboxScalar<const InType>::Unbox(in);
+      const auto t = floor<days>(NonZonedLocalizer{}.template 
ConvertTimePoint<Duration>(in_val));

Review comment:
       What if the scalar type has a timezone?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
##########
@@ -185,6 +191,98 @@ struct Day {
   Localizer localizer_;
 };
 
+// ----------------------------------------------------------------------
+// Extract (year, month, day) struct from temporal types
+
+template <typename Duration, typename InType, typename BuilderType>
+struct DateStructVisitValueFunction {
+  static Result<std::function<Status(typename InType::c_type arg)>> Get(
+      const std::vector<BuilderType*>& field_builders, const ArrayData&,
+      StructBuilder* struct_builder) {
+    return [=](typename InType::c_type arg) {
+      const auto ymd = year_month_day(floor<days>(NonZonedLocalizer{}.template 
ConvertTimePoint<Duration>(arg)));;
+      field_builders[0]->UnsafeAppend(static_cast<const int32_t>(ymd.year()));
+      field_builders[1]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.month()));
+      field_builders[2]->UnsafeAppend(static_cast<const uint32_t>(ymd.day()));
+      return struct_builder->Append();
+    };
+  };
+};  
+
+template <typename Duration, typename BuilderType>
+struct DateStructVisitValueFunction<Duration, TimestampType, BuilderType> {
+  static Result<std::function<Status(typename TimestampType::c_type arg)>> Get(
+      const std::vector<BuilderType*>& field_builders, const ArrayData& in,
+      StructBuilder* struct_builder) {
+    const auto& timezone = GetInputTimezone(in);
+    if (timezone.empty()) {
+      return [=](TimestampType::c_type arg) {
+        const auto ymd = 
year_month_day(floor<days>(NonZonedLocalizer{}.template 
ConvertTimePoint<Duration>(arg)));;
+        field_builders[0]->UnsafeAppend(static_cast<const 
int32_t>(ymd.year()));
+        field_builders[1]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.month()));
+        field_builders[2]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.day()));
+        return struct_builder->Append();
+      };
+    }
+    ARROW_ASSIGN_OR_RAISE(auto tz, LocateZone(timezone));
+    return [=](TimestampType::c_type arg) {
+        const auto ymd = 
year_month_day(floor<days>(ZonedLocalizer{tz}.template 
ConvertTimePoint<Duration>(arg)));      
+        field_builders[0]->UnsafeAppend(static_cast<const 
int32_t>(ymd.year()));
+        field_builders[1]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.month()));
+        field_builders[2]->UnsafeAppend(static_cast<const 
uint32_t>(ymd.day()));
+      return struct_builder->Append();
+    };
+  }
+};
+
+template <typename Duration, typename InType>
+struct DateStruct {
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    if (in.is_valid) {
+      const auto& in_val = internal::UnboxScalar<const InType>::Unbox(in);
+      const auto t = floor<days>(NonZonedLocalizer{}.template 
ConvertTimePoint<Duration>(in_val));
+      const auto ymd = year_month_day(t);
+      
+      ScalarVector values = {std::make_shared<Int64Scalar>(static_cast<const 
int32_t>(ymd.year())),
+       std::make_shared<Int64Scalar>(static_cast<const uint32_t>(ymd.month())),
+       std::make_shared<Int64Scalar>(static_cast<const uint32_t>(ymd.day()))};
+      *checked_cast<StructScalar*>(out) =
+          StructScalar(std::move(values), DateStructType());

Review comment:
       You're going to want to format the code. See 
https://arrow.apache.org/docs/developers/cpp/development.html#code-style-linting-and-ci




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to