rok commented on a change in pull request #10176:
URL: https://github.com/apache/arrow/pull/10176#discussion_r633016379



##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -0,0 +1,239 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/formatting.h"
+
+namespace arrow {
+
+using internal::StringFormatter;
+
+class ScalarTemporalTest : public ::testing::Test {};
+
+namespace compute {
+
+TEST(ScalarTemporalTest, TestSimpleTemporalComponentExtraction) {
+  const char* json =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+          "3989-07-14T18:04:01","1900-01-01T01:59:20","2033-05-18T03:33:20"])";
+  auto time_points = ArrayFromJSON(timestamp(TimeUnit::SECOND), json);
+
+  auto year = ArrayFromJSON(int64(), "[1970, 2000, 3989, 1900, 2033]");
+  auto month = ArrayFromJSON(int64(), "[1, 2, 7, 1, 5]");
+  auto day = ArrayFromJSON(int64(), "[1, 29, 14, 1, 18]");
+  auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 195, 1, 138]");
+  auto week = ArrayFromJSON(int64(), "[1, 9, 28, 1, 20]");
+  auto quarter = ArrayFromJSON(int64(), "[1, 1, 3, 1, 2]");
+  auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 5, 1, 3]");
+  auto hour = ArrayFromJSON(int64(), "[0, 23, 18, 1, 3]");
+  auto minute = ArrayFromJSON(int64(), "[0, 23, 4, 59, 33]");
+  auto second = ArrayFromJSON(int64(), "[59, 23, 1, 20, 20]");
+  auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+  auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]");
+
+  ASSERT_OK_AND_ASSIGN(Datum actual_year, Year(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_month, Month(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day, Day(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day_of_year, DayOfYear(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_week, Week(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_quarter, Quarter(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_day_of_week, DayOfWeek(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_hour, Hour(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_minute, Minute(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_second, Second(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_millisecond, Millisecond(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_microsecond, Microsecond(time_points));
+  ASSERT_OK_AND_ASSIGN(Datum actual_nanosecond, Nanosecond(time_points));
+
+  ASSERT_EQ(actual_year, year);
+  ASSERT_EQ(actual_month, month);
+  ASSERT_EQ(actual_day, day);
+  ASSERT_EQ(actual_day_of_year, day_of_year);
+  ASSERT_EQ(actual_week, week);
+  ASSERT_EQ(actual_quarter, quarter);
+  ASSERT_EQ(actual_day_of_week, day_of_week);
+  ASSERT_EQ(actual_hour, hour);
+  ASSERT_EQ(actual_minute, minute);
+  ASSERT_EQ(actual_second, second);
+  ASSERT_EQ(actual_millisecond, millisecond);
+  ASSERT_EQ(actual_microsecond, microsecond);
+  ASSERT_EQ(actual_nanosecond, nanosecond);
+
+  CheckScalarUnary("year", time_points, year);
+  CheckScalarUnary("month", time_points, month);
+  CheckScalarUnary("day", time_points, day);
+  CheckScalarUnary("day_of_year", time_points, day_of_year);
+  CheckScalarUnary("week", time_points, week);
+  CheckScalarUnary("quarter", time_points, quarter);
+  CheckScalarUnary("day_of_week", time_points, day_of_week);
+  CheckScalarUnary("hour", time_points, hour);
+  CheckScalarUnary("minute", time_points, minute);
+  CheckScalarUnary("second", time_points, second);
+  CheckScalarUnary("millisecond", time_points, millisecond);
+  CheckScalarUnary("microsecond", time_points, microsecond);
+  CheckScalarUnary("nanosecond", time_points, nanosecond);
+}
+
+TEST(ScalarTemporalTest, TestTemporalComponentExtraction) {
+  const char* json_second = "[59, 951866603, -2208981640, 2000000000]";
+  const char* json_milli = "[59000, 951866603000, -2208981640000, 
2000000000000]";
+  const char* json_micro =
+      "[59000000, 951866603000000, -2208981640000000, 2000000000000000]";
+  const char* json_nano =
+      "[59000000000, 951866603000000000, -2208981640000000000, 
2000000000000000000]";
+
+  auto time_points_second = ArrayFromJSON(timestamp(TimeUnit::SECOND), 
json_second);
+  auto time_points_milli = ArrayFromJSON(timestamp(TimeUnit::MILLI), 
json_milli);
+  auto time_points_micro = ArrayFromJSON(timestamp(TimeUnit::MICRO), 
json_micro);
+  auto time_points_nano = ArrayFromJSON(timestamp(TimeUnit::NANO), json_nano);
+
+  auto year = ArrayFromJSON(int64(), "[1970, 2000, 1900, 2033]");
+  auto month = ArrayFromJSON(int64(), "[1, 2, 1, 5]");
+  auto day = ArrayFromJSON(int64(), "[1, 29, 1, 18]");
+  auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 1, 138]");
+  auto week = ArrayFromJSON(int64(), "[1, 9, 1, 20]");
+  auto quarter = ArrayFromJSON(int64(), "[1, 1, 1, 2]");
+  auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 1, 3]");
+  auto hour = ArrayFromJSON(int64(), "[0, 23, 1, 3]");
+  auto minute = ArrayFromJSON(int64(), "[0, 23, 59, 33]");
+  auto second = ArrayFromJSON(int64(), "[59, 23, 20, 20]");
+  auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");
+  auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");
+  auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]");
+
+  for (auto time_points :
+       {time_points_second, time_points_milli, time_points_micro, 
time_points_nano}) {
+    CheckScalarUnary("year", time_points, year);
+    CheckScalarUnary("month", time_points, month);
+    CheckScalarUnary("day", time_points, day);
+    CheckScalarUnary("day_of_year", time_points, day_of_year);
+    CheckScalarUnary("week", time_points, week);
+    CheckScalarUnary("quarter", time_points, quarter);
+    CheckScalarUnary("day_of_week", time_points, day_of_week);
+    CheckScalarUnary("hour", time_points, hour);
+    CheckScalarUnary("minute", time_points, minute);
+    CheckScalarUnary("second", time_points, second);
+    CheckScalarUnary("millisecond", time_points, millisecond);
+    CheckScalarUnary("microsecond", time_points, microsecond);
+    CheckScalarUnary("nanosecond", time_points, nanosecond);
+  }
+
+  std::string in = "[123, 999, 1, 31231000]";
+  auto out = ArrayFromJSON(int64(), "[123, 999, 1, 0]");
+
+  auto tp_milli = ArrayFromJSON(timestamp(TimeUnit::MILLI), in);
+  auto tp_milli_zoned = ArrayFromJSON(timestamp(TimeUnit::MILLI, "Etc/GMT+2"), 
in);
+  CheckScalarUnary("millisecond", tp_milli, out);
+  CheckScalarUnary("millisecond", tp_milli, out);
+
+  auto tp_micro = ArrayFromJSON(timestamp(TimeUnit::MICRO), in);
+  auto tp_micro_zoned = ArrayFromJSON(timestamp(TimeUnit::MICRO, "Etc/GMT+2"), 
in);
+  CheckScalarUnary("microsecond", tp_micro, out);
+  CheckScalarUnary("microsecond", tp_micro_zoned, out);
+
+  auto tp_nano = ArrayFromJSON(timestamp(TimeUnit::NANO), in);
+  auto tp_nano_zoned = ArrayFromJSON(timestamp(TimeUnit::NANO, "Etc/GMT+2"), 
in);
+  CheckScalarUnary("nanosecond", tp_nano, out);
+  CheckScalarUnary("nanosecond", tp_nano_zoned, out);
+}
+
+TEST(ScalarTemporalTest, TestSimpleZonedTemporalComponentExtraction) {
+  const char* json =
+      R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+          "3989-07-14T18:04:01","1900-01-01T01:59:20","2033-05-18T03:33:20"])";
+  auto time_points = ArrayFromJSON(timestamp(TimeUnit::SECOND, "Etc/GMT+2"), 
json);
+
+  auto year = ArrayFromJSON(int64(), "[1969, 2000, 3989, 1899, 2033]");

Review comment:
       It turns out `Etc/GMT+X == UTC-X` so this was actually ok. Weird mapping.

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc
##########
@@ -0,0 +1,348 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/time.h"
+#include "arrow/vendored/datetime.h"
+
+namespace arrow {
+
+namespace compute {
+namespace internal {
+
+using arrow_vendored::date::days;
+using arrow_vendored::date::floor;
+using arrow_vendored::date::hh_mm_ss;
+using arrow_vendored::date::sys_days;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::trunc;
+using arrow_vendored::date::weekday;
+using arrow_vendored::date::weeks;
+using arrow_vendored::date::year_month_day;
+using arrow_vendored::date::years;
+using arrow_vendored::date::literals::dec;
+using arrow_vendored::date::literals::jan;
+using arrow_vendored::date::literals::last;
+using arrow_vendored::date::literals::mon;
+using arrow_vendored::date::literals::thu;
+
+// ----------------------------------------------------------------------
+// Extract year from timestamp
+
+template <typename Duration>
+struct Year {
+  template <typename T, typename Arg>
+  static T Call(KernelContext*, Arg arg, Status*) {
+    return static_cast<const int32_t>(
+        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).year());
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract month from timestamp
+
+template <typename Duration>
+struct Month {
+  template <typename T, typename Arg>
+  static T Call(KernelContext*, Arg arg, Status*) {
+    return static_cast<const uint32_t>(
+        
year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).month());
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract day from timestamp
+
+template <typename Duration>
+struct Day {
+  template <typename T, typename Arg>
+  static T Call(KernelContext*, Arg arg, Status*) {
+    return static_cast<T>(static_cast<const uint32_t>(
+        year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).day()));
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract day of week from timestamp
+
+template <typename Duration>
+struct DayOfWeek {
+  template <typename T, typename Arg>
+  static T Call(KernelContext*, Arg arg, Status*) {
+    return 
weekday(year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))))
+        .iso_encoding();
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract day of year from timestamp
+
+template <typename Duration>
+struct DayOfYear {
+  template <typename T, typename Arg>
+  static T Call(KernelContext*, Arg arg, Status*) {
+    const auto sd = sys_days{floor<days>(Duration{arg})};
+    return (sd - sys_days(year_month_day(sd).year() / jan / 0)).count();
+  }
+};
+
+// ----------------------------------------------------------------------
+// Extract week from timestamp
+
+// Based on
+// 
https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503
+template <typename Duration>
+struct Week {

Review comment:
       I'll do that.
   By the way what would be a good data type for "isocalendar" output? It's a 
[tuple in 
Pandas](https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.isocalendar.html).

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -450,5 +450,145 @@ ARROW_EXPORT
 Result<Datum> FillNull(const Datum& values, const Datum& fill_value,
                        ExecContext* ctx = NULLPTR);
 
+/// \brief Year returns year value for each element of `values`
+///
+/// \param[in] values input to extract year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Month returns month value for each element of `values`
+///
+/// \param[in] values input to extract month from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Day returns day value for each element of `values`
+///
+/// \param[in] values input to extract day from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Week returns week of year value for each element of `values`
+///
+/// \param[in] values input to extract week of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Week(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief Quarter returns quarter of year value for each element of `values`
+///
+/// \param[in] values input to extract quarter of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief DayOfYear returns day of year value for each element of `values`
+///
+/// \param[in] values input to extract day of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief DayOfWeek returns day of the week value for each element of `values`
+///
+/// \param[in] values input to extract dat of the week from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Hour returns hour value for each element of `values`
+///
+/// \param[in] values input to extract hour from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Minute returns minutes value for each element of `values`
+///
+/// \param[in] values input to extract minutes from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Second returns seconds value for each element of `values`
+///
+/// \param[in] values input to extract seconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Millisecond returns milliseconds value for each element of `values`
+///
+/// \param[in] values input to extract milliseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Microsecond returns microseconds value for each element of `values`
+///
+/// \param[in] values input to extract microseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Nanosecond returns nanoseconds value for each element of `values`
+///
+/// \param[in] values input to extract nanoseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR);

Review comment:
       Oh yeah indeed. This would behave in the latter way which would be 
surprising to users. Will fix.

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -450,5 +450,145 @@ ARROW_EXPORT
 Result<Datum> FillNull(const Datum& values, const Datum& fill_value,
                        ExecContext* ctx = NULLPTR);
 
+/// \brief Year returns year value for each element of `values`
+///
+/// \param[in] values input to extract year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Month returns month value for each element of `values`
+///
+/// \param[in] values input to extract month from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Day returns day value for each element of `values`
+///
+/// \param[in] values input to extract day from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Week returns week of year value for each element of `values`
+///
+/// \param[in] values input to extract week of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Week(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief Quarter returns quarter of year value for each element of `values`
+///
+/// \param[in] values input to extract quarter of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief DayOfYear returns day of year value for each element of `values`
+///
+/// \param[in] values input to extract day of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief DayOfWeek returns day of the week value for each element of `values`
+///
+/// \param[in] values input to extract dat of the week from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Hour returns hour value for each element of `values`
+///
+/// \param[in] values input to extract hour from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Minute returns minutes value for each element of `values`
+///
+/// \param[in] values input to extract minutes from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Second returns seconds value for each element of `values`
+///
+/// \param[in] values input to extract seconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Millisecond returns milliseconds value for each element of `values`
+///
+/// \param[in] values input to extract milliseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Microsecond returns microseconds value for each element of `values`
+///
+/// \param[in] values input to extract microseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Nanosecond returns nanoseconds value for each element of `values`
+///
+/// \param[in] values input to extract nanoseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR);

Review comment:
       Do you know by hear what does pandas do? 
[Documentation](https://pandas.pydata.org/docs/reference/api/pandas.Series.dt.microsecond.html)
 is not clear.
   Perhaps it would be good to then have a `subseconds` function that would 
return the total nanoseconds since midnight?

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -450,5 +450,145 @@ ARROW_EXPORT
 Result<Datum> FillNull(const Datum& values, const Datum& fill_value,
                        ExecContext* ctx = NULLPTR);
 
+/// \brief Year returns year value for each element of `values`
+///
+/// \param[in] values input to extract year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Month returns month value for each element of `values`
+///
+/// \param[in] values input to extract month from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Day returns day value for each element of `values`
+///
+/// \param[in] values input to extract day from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Week returns week of year value for each element of `values`
+///
+/// \param[in] values input to extract week of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Week(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief Quarter returns quarter of year value for each element of `values`
+///
+/// \param[in] values input to extract quarter of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief DayOfYear returns day of year value for each element of `values`
+///
+/// \param[in] values input to extract day of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief DayOfWeek returns day of the week value for each element of `values`
+///
+/// \param[in] values input to extract dat of the week from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Hour returns hour value for each element of `values`
+///
+/// \param[in] values input to extract hour from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Minute returns minutes value for each element of `values`
+///
+/// \param[in] values input to extract minutes from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Second returns seconds value for each element of `values`
+///
+/// \param[in] values input to extract seconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Millisecond returns milliseconds value for each element of `values`
+///
+/// \param[in] values input to extract milliseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Microsecond returns microseconds value for each element of `values`
+///
+/// \param[in] values input to extract microseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Nanosecond returns nanoseconds value for each element of `values`
+///
+/// \param[in] values input to extract nanoseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR);

Review comment:
       Do you know by heart what does pandas do? 
[Documentation](https://pandas.pydata.org/docs/reference/api/pandas.Series.dt.microsecond.html)
 is not clear.
   Perhaps it would be good to then have a `subseconds` function that would 
return the total nanoseconds since midnight?

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -450,5 +450,145 @@ ARROW_EXPORT
 Result<Datum> FillNull(const Datum& values, const Datum& fill_value,
                        ExecContext* ctx = NULLPTR);
 
+/// \brief Year returns year value for each element of `values`
+///
+/// \param[in] values input to extract year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Month returns month value for each element of `values`
+///
+/// \param[in] values input to extract month from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Day returns day value for each element of `values`
+///
+/// \param[in] values input to extract day from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Week returns week of year value for each element of `values`
+///
+/// \param[in] values input to extract week of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Week(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief Quarter returns quarter of year value for each element of `values`
+///
+/// \param[in] values input to extract quarter of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief DayOfYear returns day of year value for each element of `values`
+///
+/// \param[in] values input to extract day of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief DayOfWeek returns day of the week value for each element of `values`
+///
+/// \param[in] values input to extract dat of the week from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Hour returns hour value for each element of `values`
+///
+/// \param[in] values input to extract hour from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Minute returns minutes value for each element of `values`
+///
+/// \param[in] values input to extract minutes from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Second returns seconds value for each element of `values`
+///
+/// \param[in] values input to extract seconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Millisecond returns milliseconds value for each element of `values`
+///
+/// \param[in] values input to extract milliseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Microsecond returns microseconds value for each element of `values`
+///
+/// \param[in] values input to extract microseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Nanosecond returns nanoseconds value for each element of `values`
+///
+/// \param[in] values input to extract nanoseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR);

Review comment:
       Well this is not ideal :).
   I propose to implement this as `total units since midnight` and then add an 
exception in python for nanoseconds.

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -450,5 +450,145 @@ ARROW_EXPORT
 Result<Datum> FillNull(const Datum& values, const Datum& fill_value,
                        ExecContext* ctx = NULLPTR);
 
+/// \brief Year returns year value for each element of `values`
+///
+/// \param[in] values input to extract year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Month returns month value for each element of `values`
+///
+/// \param[in] values input to extract month from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Day returns day value for each element of `values`
+///
+/// \param[in] values input to extract day from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Week returns week of year value for each element of `values`
+///
+/// \param[in] values input to extract week of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Week(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief Quarter returns quarter of year value for each element of `values`
+///
+/// \param[in] values input to extract quarter of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief DayOfYear returns day of year value for each element of `values`
+///
+/// \param[in] values input to extract day of year from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = 
NULLPTR);
+
+/// \brief DayOfWeek returns day of the week value for each element of `values`
+///
+/// \param[in] values input to extract dat of the week from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Hour returns hour value for each element of `values`
+///
+/// \param[in] values input to extract hour from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Minute returns minutes value for each element of `values`
+///
+/// \param[in] values input to extract minutes from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Second returns seconds value for each element of `values`
+///
+/// \param[in] values input to extract seconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Millisecond returns milliseconds value for each element of `values`
+///
+/// \param[in] values input to extract milliseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Microsecond returns microseconds value for each element of `values`
+///
+/// \param[in] values input to extract microseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR);
+
+/// \brief Nanosecond returns nanoseconds value for each element of `values`
+///
+/// \param[in] values input to extract nanoseconds from
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting datum
+///
+/// \since 4.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR);

Review comment:
       Well this is not ideal :).
   I propose to implement this as `total units since last second` and then add 
an exception in python for nanoseconds.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to