jorisvandenbossche commented on a change in pull request #10176: URL: https://github.com/apache/arrow/pull/10176#discussion_r633488308
########## File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc ########## @@ -0,0 +1,348 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/kernels/common.h" +#include "arrow/util/time.h" +#include "arrow/vendored/datetime.h" + +namespace arrow { + +namespace compute { +namespace internal { + +using arrow_vendored::date::days; +using arrow_vendored::date::floor; +using arrow_vendored::date::hh_mm_ss; +using arrow_vendored::date::sys_days; +using arrow_vendored::date::sys_time; +using arrow_vendored::date::trunc; +using arrow_vendored::date::weekday; +using arrow_vendored::date::weeks; +using arrow_vendored::date::year_month_day; +using arrow_vendored::date::years; +using arrow_vendored::date::literals::dec; +using arrow_vendored::date::literals::jan; +using arrow_vendored::date::literals::last; +using arrow_vendored::date::literals::mon; +using arrow_vendored::date::literals::thu; + +// ---------------------------------------------------------------------- +// Extract year from timestamp + +template <typename Duration> +struct Year { + template <typename T, typename Arg> + static T Call(KernelContext*, Arg arg, Status*) { + return static_cast<const int32_t>( + year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).year()); + } +}; + +// ---------------------------------------------------------------------- +// Extract month from timestamp + +template <typename Duration> +struct Month { + template <typename T, typename Arg> + static T Call(KernelContext*, Arg arg, Status*) { + return static_cast<const uint32_t>( + year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).month()); + } +}; + +// ---------------------------------------------------------------------- +// Extract day from timestamp + +template <typename Duration> +struct Day { + template <typename T, typename Arg> + static T Call(KernelContext*, Arg arg, Status*) { + return static_cast<T>(static_cast<const uint32_t>( + year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).day())); + } +}; + +// ---------------------------------------------------------------------- +// Extract day of week from timestamp + +template <typename Duration> +struct DayOfWeek { + template <typename T, typename Arg> + static T Call(KernelContext*, Arg arg, Status*) { + return weekday(year_month_day(floor<days>(sys_time<Duration>(Duration{arg})))) + .iso_encoding(); + } +}; + +// ---------------------------------------------------------------------- +// Extract day of year from timestamp + +template <typename Duration> +struct DayOfYear { + template <typename T, typename Arg> + static T Call(KernelContext*, Arg arg, Status*) { + const auto sd = sys_days{floor<days>(Duration{arg})}; + return (sd - sys_days(year_month_day(sd).year() / jan / 0)).count(); + } +}; + +// ---------------------------------------------------------------------- +// Extract week from timestamp + +// Based on +// https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503 +template <typename Duration> +struct Week { Review comment: We should probably be explicit here about it being the "ISO week" number (and not the "logical" week number), as it gives some surprising results around New Year (https://en.wikipedia.org/wiki/ISO_week_date#First_week) (and if adding a ISO week, we should maybe also add the corresponding ISO year) ########## File path: cpp/src/arrow/compute/api_scalar.h ########## @@ -450,5 +450,145 @@ ARROW_EXPORT Result<Datum> FillNull(const Datum& values, const Datum& fill_value, ExecContext* ctx = NULLPTR); +/// \brief Year returns year value for each element of `values` +/// +/// \param[in] values input to extract year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Month returns month value for each element of `values` +/// +/// \param[in] values input to extract month from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Day returns day value for each element of `values` +/// +/// \param[in] values input to extract day from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Week returns week of year value for each element of `values` +/// +/// \param[in] values input to extract week of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Week(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Quarter returns quarter of year value for each element of `values` +/// +/// \param[in] values input to extract quarter of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfYear returns day of year value for each element of `values` +/// +/// \param[in] values input to extract day of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfWeek returns day of the week value for each element of `values` +/// +/// \param[in] values input to extract dat of the week from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Hour returns hour value for each element of `values` +/// +/// \param[in] values input to extract hour from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Minute returns minutes value for each element of `values` +/// +/// \param[in] values input to extract minutes from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Second returns seconds value for each element of `values` +/// +/// \param[in] values input to extract seconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Millisecond returns milliseconds value for each element of `values` +/// +/// \param[in] values input to extract milliseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Microsecond returns microseconds value for each element of `values` +/// +/// \param[in] values input to extract microseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Nanosecond returns nanoseconds value for each element of `values` +/// +/// \param[in] values input to extract nanoseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR); Review comment: Are this the total nanoseconds, or only the nanoseconds part after the microseconds? (i.e. are microseconds and nanoseconds additive components, or rather both representing the full fractional part of the second using a different resolution?) ########## File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc ########## @@ -0,0 +1,348 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/kernels/common.h" +#include "arrow/util/time.h" +#include "arrow/vendored/datetime.h" + +namespace arrow { + +namespace compute { +namespace internal { + +using arrow_vendored::date::days; +using arrow_vendored::date::floor; +using arrow_vendored::date::hh_mm_ss; +using arrow_vendored::date::sys_days; +using arrow_vendored::date::sys_time; +using arrow_vendored::date::trunc; +using arrow_vendored::date::weekday; +using arrow_vendored::date::weeks; +using arrow_vendored::date::year_month_day; +using arrow_vendored::date::years; +using arrow_vendored::date::literals::dec; +using arrow_vendored::date::literals::jan; +using arrow_vendored::date::literals::last; +using arrow_vendored::date::literals::mon; +using arrow_vendored::date::literals::thu; + +// ---------------------------------------------------------------------- +// Extract year from timestamp + +template <typename Duration> +struct Year { + template <typename T, typename Arg> + static T Call(KernelContext*, Arg arg, Status*) { + return static_cast<const int32_t>( + year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).year()); + } +}; + +// ---------------------------------------------------------------------- +// Extract month from timestamp + +template <typename Duration> +struct Month { + template <typename T, typename Arg> + static T Call(KernelContext*, Arg arg, Status*) { + return static_cast<const uint32_t>( + year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).month()); + } +}; + +// ---------------------------------------------------------------------- +// Extract day from timestamp + +template <typename Duration> +struct Day { + template <typename T, typename Arg> + static T Call(KernelContext*, Arg arg, Status*) { + return static_cast<T>(static_cast<const uint32_t>( + year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))).day())); + } +}; + +// ---------------------------------------------------------------------- +// Extract day of week from timestamp + +template <typename Duration> +struct DayOfWeek { + template <typename T, typename Arg> + static T Call(KernelContext*, Arg arg, Status*) { + return weekday(year_month_day(floor<days>(sys_time<Duration>(Duration{arg})))) + .iso_encoding(); + } +}; + +// ---------------------------------------------------------------------- +// Extract day of year from timestamp + +template <typename Duration> +struct DayOfYear { + template <typename T, typename Arg> + static T Call(KernelContext*, Arg arg, Status*) { + const auto sd = sys_days{floor<days>(Duration{arg})}; + return (sd - sys_days(year_month_day(sd).year() / jan / 0)).count(); + } +}; + +// ---------------------------------------------------------------------- +// Extract week from timestamp + +// Based on +// https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503 +template <typename Duration> +struct Week { Review comment: A struct? (with year, week, day fields) ########## File path: cpp/src/arrow/compute/api_scalar.h ########## @@ -450,5 +450,145 @@ ARROW_EXPORT Result<Datum> FillNull(const Datum& values, const Datum& fill_value, ExecContext* ctx = NULLPTR); +/// \brief Year returns year value for each element of `values` +/// +/// \param[in] values input to extract year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Month returns month value for each element of `values` +/// +/// \param[in] values input to extract month from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Day returns day value for each element of `values` +/// +/// \param[in] values input to extract day from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Week returns week of year value for each element of `values` +/// +/// \param[in] values input to extract week of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Week(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Quarter returns quarter of year value for each element of `values` +/// +/// \param[in] values input to extract quarter of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfYear returns day of year value for each element of `values` +/// +/// \param[in] values input to extract day of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfWeek returns day of the week value for each element of `values` +/// +/// \param[in] values input to extract dat of the week from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Hour returns hour value for each element of `values` +/// +/// \param[in] values input to extract hour from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Minute returns minutes value for each element of `values` +/// +/// \param[in] values input to extract minutes from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Second returns seconds value for each element of `values` +/// +/// \param[in] values input to extract seconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Millisecond returns milliseconds value for each element of `values` +/// +/// \param[in] values input to extract milliseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Microsecond returns microseconds value for each element of `values` +/// +/// \param[in] values input to extract microseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Nanosecond returns nanoseconds value for each element of `values` +/// +/// \param[in] values input to extract nanoseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR); Review comment: Seeing the tests, it's clearly the first (additive components), which I think is good. I am only wondering if we can make that clearer in the terminology of the docstring ########## File path: cpp/src/arrow/compute/api_scalar.h ########## @@ -450,5 +450,145 @@ ARROW_EXPORT Result<Datum> FillNull(const Datum& values, const Datum& fill_value, ExecContext* ctx = NULLPTR); +/// \brief Year returns year value for each element of `values` +/// +/// \param[in] values input to extract year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Month returns month value for each element of `values` +/// +/// \param[in] values input to extract month from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Day returns day value for each element of `values` +/// +/// \param[in] values input to extract day from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Week returns week of year value for each element of `values` +/// +/// \param[in] values input to extract week of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Week(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Quarter returns quarter of year value for each element of `values` +/// +/// \param[in] values input to extract quarter of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfYear returns day of year value for each element of `values` +/// +/// \param[in] values input to extract day of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfWeek returns day of the week value for each element of `values` +/// +/// \param[in] values input to extract dat of the week from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Hour returns hour value for each element of `values` +/// +/// \param[in] values input to extract hour from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Minute returns minutes value for each element of `values` +/// +/// \param[in] values input to extract minutes from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Second returns seconds value for each element of `values` +/// +/// \param[in] values input to extract seconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Millisecond returns milliseconds value for each element of `values` +/// +/// \param[in] values input to extract milliseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Microsecond returns microseconds value for each element of `values` +/// +/// \param[in] values input to extract microseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Nanosecond returns nanoseconds value for each element of `values` +/// +/// \param[in] values input to extract nanoseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR); Review comment: Seeing the tests, it's clearly the first (additive components), which I think is good (and the most useful). I am only wondering if we can make that clearer in the terminology of the docstring ########## File path: cpp/src/arrow/compute/api_scalar.h ########## @@ -450,5 +450,145 @@ ARROW_EXPORT Result<Datum> FillNull(const Datum& values, const Datum& fill_value, ExecContext* ctx = NULLPTR); +/// \brief Year returns year value for each element of `values` +/// +/// \param[in] values input to extract year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Month returns month value for each element of `values` +/// +/// \param[in] values input to extract month from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Day returns day value for each element of `values` +/// +/// \param[in] values input to extract day from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Week returns week of year value for each element of `values` +/// +/// \param[in] values input to extract week of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Week(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Quarter returns quarter of year value for each element of `values` +/// +/// \param[in] values input to extract quarter of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfYear returns day of year value for each element of `values` +/// +/// \param[in] values input to extract day of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfWeek returns day of the week value for each element of `values` +/// +/// \param[in] values input to extract dat of the week from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Hour returns hour value for each element of `values` +/// +/// \param[in] values input to extract hour from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Minute returns minutes value for each element of `values` +/// +/// \param[in] values input to extract minutes from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Second returns seconds value for each element of `values` +/// +/// \param[in] values input to extract seconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Millisecond returns milliseconds value for each element of `values` +/// +/// \param[in] values input to extract milliseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Microsecond returns microseconds value for each element of `values` +/// +/// \param[in] values input to extract microseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Nanosecond returns nanoseconds value for each element of `values` +/// +/// \param[in] values input to extract nanoseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR); Review comment: To be clear, judging the tests, the current implementation is not using the "total" nanoseconds right now, but only the part after the microseconds. Personally I think this is the useful behaviour ########## File path: cpp/src/arrow/compute/api_scalar.h ########## @@ -450,5 +450,145 @@ ARROW_EXPORT Result<Datum> FillNull(const Datum& values, const Datum& fill_value, ExecContext* ctx = NULLPTR); +/// \brief Year returns year value for each element of `values` +/// +/// \param[in] values input to extract year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Month returns month value for each element of `values` +/// +/// \param[in] values input to extract month from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Day returns day value for each element of `values` +/// +/// \param[in] values input to extract day from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Week returns week of year value for each element of `values` +/// +/// \param[in] values input to extract week of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Week(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Quarter returns quarter of year value for each element of `values` +/// +/// \param[in] values input to extract quarter of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfYear returns day of year value for each element of `values` +/// +/// \param[in] values input to extract day of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfWeek returns day of the week value for each element of `values` +/// +/// \param[in] values input to extract dat of the week from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Hour returns hour value for each element of `values` +/// +/// \param[in] values input to extract hour from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Minute returns minutes value for each element of `values` +/// +/// \param[in] values input to extract minutes from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Second returns seconds value for each element of `values` +/// +/// \param[in] values input to extract seconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Millisecond returns milliseconds value for each element of `values` +/// +/// \param[in] values input to extract milliseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Microsecond returns microseconds value for each element of `values` +/// +/// \param[in] values input to extract microseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Nanosecond returns nanoseconds value for each element of `values` +/// +/// \param[in] values input to extract nanoseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR); Review comment: Yeah, I was also looking at pandas to compare ;) And that was the reason I am asking. So the situation is a bit complex there: ``` >>> ts = pd.Timestamp("2012-01-02 03:04:05.123456789") >>> ts.second 5 ``` There is a `microsecond` attribute which gives the total microseconds, but this is inherited from `datetime.datetime` (as this actually stores the sub-second part as microseconds): ``` >>> ts.microsecond 123456 ``` and then there is also a `nanosecond` attribute which is only the part after the microseconds (so the "additive componenent", not total nanoseconds): ``` >>> ts.nanosecond 789 ``` (the example in the pandas docs is indeed not good! As you can't tell which of the two cases it is because of the used value) ########## File path: cpp/src/arrow/compute/api_scalar.h ########## @@ -450,5 +450,145 @@ ARROW_EXPORT Result<Datum> FillNull(const Datum& values, const Datum& fill_value, ExecContext* ctx = NULLPTR); +/// \brief Year returns year value for each element of `values` +/// +/// \param[in] values input to extract year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Year(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Month returns month value for each element of `values` +/// +/// \param[in] values input to extract month from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Month(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Day returns day value for each element of `values` +/// +/// \param[in] values input to extract day from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Week returns week of year value for each element of `values` +/// +/// \param[in] values input to extract week of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Week(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Quarter returns quarter of year value for each element of `values` +/// +/// \param[in] values input to extract quarter of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> Quarter(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfYear returns day of year value for each element of `values` +/// +/// \param[in] values input to extract day of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result<Datum> DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfWeek returns day of the week value for each element of `values` +/// +/// \param[in] values input to extract dat of the week from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Hour returns hour value for each element of `values` +/// +/// \param[in] values input to extract hour from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Hour(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Minute returns minutes value for each element of `values` +/// +/// \param[in] values input to extract minutes from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Minute(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Second returns seconds value for each element of `values` +/// +/// \param[in] values input to extract seconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Second(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Millisecond returns milliseconds value for each element of `values` +/// +/// \param[in] values input to extract milliseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Millisecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Microsecond returns microseconds value for each element of `values` +/// +/// \param[in] values input to extract microseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Microsecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Nanosecond returns nanoseconds value for each element of `values` +/// +/// \param[in] values input to extract nanoseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result<Datum> Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR); Review comment: We don't need to match exactly what pandas does to be clear. pyarrow.compute should just follow Arrow's behaviour. So for Arrow the question is, what is most useful: the total units vs the component? (or do we need both?) As comparison, the R lubridate package only has component getter functions up to `second()`, and not for `milli/micro/..second`, but the `second` function returns the fractional part of the second as well: ```R > library(lubridate) > x <- ymd_hms("2012-03-26 01:02:03.123456") > second(x) [1] 3.123456 ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
