[GitHub] [arrow] pitrou commented on a change in pull request #10349: ARROW-12744: [C++][Compute] Add rounding kernel

GitBox Tue, 07 Sep 2021 08:45:08 -0700


pitrou commented on a change in pull request #10349:
URL: https://github.com/apache/arrow/pull/10349#discussion_r703602112




##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -852,24 +854,232 @@ struct LogbChecked {
   }
 };
 
+struct RoundUtil {
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxEqual(
+      const T a, const T b, const T abs_tol = kDefaultAbsoluteTolerance) {
+    return std::fabs(a - b) <= abs_tol;
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxInt(
+      const T val, const T abs_tol = kDefaultAbsoluteTolerance) {
+    // |frac| ~ 0.0?
+    return IsApproxEqual(val, std::round(val), abs_tol);
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxHalfInt(
+      const T val, const T abs_tol = kDefaultAbsoluteTolerance) {
+    // |frac| ~ 0.5?
+    return IsApproxEqual(val - std::floor(val), T(0.5), abs_tol);
+  }
+
+  template <typename T>
+  static enable_if_floating_point<T> Pow10(const int64_t power) {
+    const T lut[]{1e0F,  1e1F,  1e2F,  1e3F,  1e4F,  1e5F,  1e6F,  1e7F,

Review comment:
       Could probably be `constexpr`?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1352,6 +1412,212 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+const std::initializer_list<RoundMode> kRoundModes{

Review comment:
       Nit, but can avoid pecularities of `initializer_list` and use 
`std::vector`...

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -49,10 +49,66 @@ class ARROW_EXPORT ElementWiseAggregateOptions : public 
FunctionOptions {
   explicit ElementWiseAggregateOptions(bool skip_nulls = true);
   constexpr static char const kTypeName[] = "ElementWiseAggregateOptions";
   static ElementWiseAggregateOptions Defaults() { return 
ElementWiseAggregateOptions{}; }
-
   bool skip_nulls;
 };
 
+/// Rounding and tie-breaking modes for round compute functions.
+/// Additional details and examples are provided in compute.rst.
+enum class RoundMode : int8_t {
+  /// Round to nearest integer less than or equal in magnitude (aka "floor")
+  DOWN,
+  /// Round to nearest integer greater than or equal in magnitude (aka "ceil")
+  UP,
+  /// Get the integral part without fractional digits (aka "trunc")
+  TOWARDS_ZERO,
+  /// Round negative values with DOWN rule and positive values with UP rule
+  TOWARDS_INFINITY,
+  /// Round ties with DOWN rule
+  HALF_DOWN,
+  /// Round ties with UP rule
+  HALF_UP,
+  /// Round ties with TOWARDS_ZERO rule
+  HALF_TOWARDS_ZERO,
+  /// Round ties with TOWARDS_INFINITY rule
+  HALF_TOWARDS_INFINITY,
+  /// Round ties to nearest even integer
+  HALF_TO_EVEN,
+  /// Round ties to nearest odd integer
+  HALF_TO_ODD,
+};
+
+static constexpr double kDefaultAbsoluteTolerance = 1E-5;

Review comment:
       I'd make this a member of `RoundOptions` and `MRoundOptions` 
respectively.

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -583,10 +639,32 @@ Result<Datum> MinElementWise(
 ///
 /// \param[in] arg the value to extract sign from
 /// \param[in] ctx the function execution context, optional
-/// \return the elementwise sign function
+/// \return the element-wise sign function
 ARROW_EXPORT
 Result<Datum> Sign(const Datum& arg, ExecContext* ctx = NULLPTR);
 
+/// \brief Round a value to the given number of digits. Array values can be of
+/// arbitrary length. If argument is null the result will be null.

Review comment:
       Hmm... what does "of arbitrary length" imply here? I'm not sure I 
understand.

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -852,24 +854,232 @@ struct LogbChecked {
   }
 };
 
+struct RoundUtil {
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxEqual(
+      const T a, const T b, const T abs_tol = kDefaultAbsoluteTolerance) {
+    return std::fabs(a - b) <= abs_tol;
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxInt(
+      const T val, const T abs_tol = kDefaultAbsoluteTolerance) {
+    // |frac| ~ 0.0?
+    return IsApproxEqual(val, std::round(val), abs_tol);
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxHalfInt(
+      const T val, const T abs_tol = kDefaultAbsoluteTolerance) {
+    // |frac| ~ 0.5?
+    return IsApproxEqual(val - std::floor(val), T(0.5), abs_tol);
+  }
+
+  template <typename T>
+  static enable_if_floating_point<T> Pow10(const int64_t power) {
+    const T lut[]{1e0F,  1e1F,  1e2F,  1e3F,  1e4F,  1e5F,  1e6F,  1e7F,
+                  1e8F,  1e9F,  1e10F, 1e11F, 1e12F, 1e13F, 1e14F, 1e15F,
+                  1e16F, 1e17F, 1e18F, 1e19F, 1e20F, 1e21F, 1e22F};
+    // Return NaN if index is out-of-range.
+    auto lut_size = (int64_t)(sizeof(lut) / sizeof(*lut));
+    return (power >= 0 && power < lut_size) ? lut[power] : std::nanf("");
+  }
+};
+
+// Specializations of rounding implementations for round kernels
+template <typename T, RoundMode>
+struct RoundImpl {};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::DOWN> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::floor(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::UP> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::ceil(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::TOWARDS_ZERO> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::signbit(val) ? std::ceil(val) : std::floor(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::TOWARDS_INFINITY> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::signbit(val) ? std::floor(val) : std::ceil(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_DOWN> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::DOWN>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_UP> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::UP>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TOWARDS_ZERO> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::TOWARDS_ZERO>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TOWARDS_INFINITY> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::TOWARDS_INFINITY>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TO_EVEN> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::round(val * T(0.5)) * 2;
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TO_ODD> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::floor(val * T(0.5)) + std::ceil(val * T(0.5));
+  }
+};
+
+template <RoundMode RndMode>
+struct Round {
+  using RoundState = OptionsWrapper<RoundOptions>;
+
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext* ctx, Arg arg, 
Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    static_assert(RoundMode::HALF_DOWN > RoundMode::DOWN &&
+                      RoundMode::HALF_DOWN > RoundMode::UP &&
+                      RoundMode::HALF_DOWN > RoundMode::TOWARDS_ZERO &&
+                      RoundMode::HALF_DOWN > RoundMode::TOWARDS_INFINITY &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_UP &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TOWARDS_ZERO &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TOWARDS_INFINITY 
&&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TO_EVEN &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TO_ODD,
+                  "Round modes prefixed with HALF need to be defined last in 
enum and "
+                  "the first HALF entry has to be HALF_DOWN.");
+
+    auto options = RoundState::Get(ctx);
+    auto pow10 = RoundUtil::Pow10<T>(std::llabs(options.ndigits));
+    if (std::isnan(pow10)) {
+      *st = Status::Invalid("out-of-range value for rounding digits");
+      return arg;

Review comment:
       It would be much better to make the kernel stateful and precompute this 
in the init function, IMHO.

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -1667,6 +1930,22 @@ const FunctionDoc trunc_doc{
     ("Calculate the nearest integer not greater in magnitude than to the "
      "argument element-wise."),
     {"x"}};
+
+const FunctionDoc round_doc{
+    "Round the arguments to a given precision element-wise",
+    ("Options are used to control the number of digits and rounding mode.\n"
+     "Default behavior is to round to the nearest integer and use half-to-even 
"
+     "rule to break ties."),
+    {"x"},
+    "RoundOptions"};
+
+const FunctionDoc mround_doc{
+    "Round the arguments to a given multiple element-wise",

Review comment:
       "Round to a given multiple"?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -852,24 +854,232 @@ struct LogbChecked {
   }
 };
 
+struct RoundUtil {
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxEqual(
+      const T a, const T b, const T abs_tol = kDefaultAbsoluteTolerance) {
+    return std::fabs(a - b) <= abs_tol;
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxInt(
+      const T val, const T abs_tol = kDefaultAbsoluteTolerance) {
+    // |frac| ~ 0.0?
+    return IsApproxEqual(val, std::round(val), abs_tol);
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxHalfInt(
+      const T val, const T abs_tol = kDefaultAbsoluteTolerance) {
+    // |frac| ~ 0.5?
+    return IsApproxEqual(val - std::floor(val), T(0.5), abs_tol);
+  }
+
+  template <typename T>
+  static enable_if_floating_point<T> Pow10(const int64_t power) {
+    const T lut[]{1e0F,  1e1F,  1e2F,  1e3F,  1e4F,  1e5F,  1e6F,  1e7F,
+                  1e8F,  1e9F,  1e10F, 1e11F, 1e12F, 1e13F, 1e14F, 1e15F,
+                  1e16F, 1e17F, 1e18F, 1e19F, 1e20F, 1e21F, 1e22F};
+    // Return NaN if index is out-of-range.
+    auto lut_size = (int64_t)(sizeof(lut) / sizeof(*lut));
+    return (power >= 0 && power < lut_size) ? lut[power] : std::nanf("");

Review comment:
       Why don't you also store inverse powers of 10? Is it because of rounding 
issues?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -852,24 +854,232 @@ struct LogbChecked {
   }
 };
 
+struct RoundUtil {
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxEqual(
+      const T a, const T b, const T abs_tol = kDefaultAbsoluteTolerance) {
+    return std::fabs(a - b) <= abs_tol;
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxInt(
+      const T val, const T abs_tol = kDefaultAbsoluteTolerance) {
+    // |frac| ~ 0.0?
+    return IsApproxEqual(val, std::round(val), abs_tol);
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxHalfInt(
+      const T val, const T abs_tol = kDefaultAbsoluteTolerance) {
+    // |frac| ~ 0.5?
+    return IsApproxEqual(val - std::floor(val), T(0.5), abs_tol);
+  }
+
+  template <typename T>
+  static enable_if_floating_point<T> Pow10(const int64_t power) {
+    const T lut[]{1e0F,  1e1F,  1e2F,  1e3F,  1e4F,  1e5F,  1e6F,  1e7F,
+                  1e8F,  1e9F,  1e10F, 1e11F, 1e12F, 1e13F, 1e14F, 1e15F,
+                  1e16F, 1e17F, 1e18F, 1e19F, 1e20F, 1e21F, 1e22F};
+    // Return NaN if index is out-of-range.
+    auto lut_size = (int64_t)(sizeof(lut) / sizeof(*lut));
+    return (power >= 0 && power < lut_size) ? lut[power] : std::nanf("");
+  }
+};
+
+// Specializations of rounding implementations for round kernels
+template <typename T, RoundMode>
+struct RoundImpl {};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::DOWN> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::floor(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::UP> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::ceil(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::TOWARDS_ZERO> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::signbit(val) ? std::ceil(val) : std::floor(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::TOWARDS_INFINITY> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::signbit(val) ? std::floor(val) : std::ceil(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_DOWN> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::DOWN>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_UP> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::UP>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TOWARDS_ZERO> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::TOWARDS_ZERO>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TOWARDS_INFINITY> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::TOWARDS_INFINITY>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TO_EVEN> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::round(val * T(0.5)) * 2;
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TO_ODD> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::floor(val * T(0.5)) + std::ceil(val * T(0.5));
+  }
+};
+
+template <RoundMode RndMode>
+struct Round {
+  using RoundState = OptionsWrapper<RoundOptions>;
+
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext* ctx, Arg arg, 
Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    static_assert(RoundMode::HALF_DOWN > RoundMode::DOWN &&
+                      RoundMode::HALF_DOWN > RoundMode::UP &&
+                      RoundMode::HALF_DOWN > RoundMode::TOWARDS_ZERO &&
+                      RoundMode::HALF_DOWN > RoundMode::TOWARDS_INFINITY &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_UP &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TOWARDS_ZERO &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TOWARDS_INFINITY 
&&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TO_EVEN &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TO_ODD,
+                  "Round modes prefixed with HALF need to be defined last in 
enum and "
+                  "the first HALF entry has to be HALF_DOWN.");
+
+    auto options = RoundState::Get(ctx);
+    auto pow10 = RoundUtil::Pow10<T>(std::llabs(options.ndigits));
+    if (std::isnan(pow10)) {
+      *st = Status::Invalid("out-of-range value for rounding digits");
+      return arg;
+    } else if (!std::isfinite(arg)) {
+      return arg;
+    }
+
+    T scaled_arg = (options.ndigits >= 0) ? (arg * pow10) : (arg / pow10);
+    // Use std::round if scaled value is an integer or not 0.5 when a 
tie-breaking mode
+    // was set.
+    T result;
+    if (RoundUtil::IsApproxInt(scaled_arg, T(options.abs_tol)) ||
+        (options.round_mode >= RoundMode::HALF_DOWN &&
+         !RoundUtil::IsApproxHalfInt(scaled_arg, T(options.abs_tol)))) {
+      result = std::round(scaled_arg);
+    } else {
+      result = RoundImpl<T, RndMode>::Round(scaled_arg);
+    }
+    result = (options.ndigits >= 0) ? (result / pow10) : (result * pow10);
+    if (!std::isfinite(result)) {
+      *st = Status::Invalid("overflow occurred during rounding");
+      return arg;
+    }
+    // If rounding didn't change value, return original value
+    return RoundUtil::IsApproxEqual(arg, result, T(options.abs_tol)) ? arg : 
result;
+  }
+};
+
+template <RoundMode RndMode>
+struct MRound {
+  using MRoundState = OptionsWrapper<MRoundOptions>;
+
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext* ctx, Arg arg, 
Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    static_assert(RoundMode::HALF_DOWN > RoundMode::DOWN &&
+                      RoundMode::HALF_DOWN > RoundMode::UP &&
+                      RoundMode::HALF_DOWN > RoundMode::TOWARDS_ZERO &&
+                      RoundMode::HALF_DOWN > RoundMode::TOWARDS_INFINITY &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_UP &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TOWARDS_ZERO &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TOWARDS_INFINITY 
&&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TO_EVEN &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TO_ODD,
+                  "Round modes prefixed with HALF need to be defined last in 
enum and "
+                  "the first HALF entry has to be HALF_DOWN.");
+    auto options = MRoundState::Get(ctx);
+    auto mult = std::fabs(T(options.multiple));
+    if (mult == 0) {

Review comment:
       Can't we just mandate that `mult` is finite and strictly positive? It 
doesn't seem useful to support degenerate cases such as `mult == 0` or `mult == 
+inf`, etc.

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -49,10 +49,66 @@ class ARROW_EXPORT ElementWiseAggregateOptions : public 
FunctionOptions {
   explicit ElementWiseAggregateOptions(bool skip_nulls = true);
   constexpr static char const kTypeName[] = "ElementWiseAggregateOptions";
   static ElementWiseAggregateOptions Defaults() { return 
ElementWiseAggregateOptions{}; }
-
   bool skip_nulls;
 };
 
+/// Rounding and tie-breaking modes for round compute functions.
+/// Additional details and examples are provided in compute.rst.
+enum class RoundMode : int8_t {
+  /// Round to nearest integer less than or equal in magnitude (aka "floor")
+  DOWN,
+  /// Round to nearest integer greater than or equal in magnitude (aka "ceil")
+  UP,
+  /// Get the integral part without fractional digits (aka "trunc")
+  TOWARDS_ZERO,
+  /// Round negative values with DOWN rule and positive values with UP rule
+  TOWARDS_INFINITY,
+  /// Round ties with DOWN rule
+  HALF_DOWN,
+  /// Round ties with UP rule
+  HALF_UP,
+  /// Round ties with TOWARDS_ZERO rule
+  HALF_TOWARDS_ZERO,
+  /// Round ties with TOWARDS_INFINITY rule
+  HALF_TOWARDS_INFINITY,
+  /// Round ties to nearest even integer
+  HALF_TO_EVEN,
+  /// Round ties to nearest odd integer
+  HALF_TO_ODD,
+};
+
+static constexpr double kDefaultAbsoluteTolerance = 1E-5;
+
+class ARROW_EXPORT RoundOptions : public FunctionOptions {
+ public:
+  explicit RoundOptions(int64_t ndigits = 0,
+                        RoundMode round_mode = RoundMode::HALF_TO_EVEN,
+                        double abs_tol = kDefaultAbsoluteTolerance);
+  constexpr static char const kTypeName[] = "RoundOptions";
+  static RoundOptions Defaults() { return RoundOptions(); }
+  /// Rounding precision (number of digits to round to).
+  int64_t ndigits;
+  /// Rounding and tie-breaking mode
+  RoundMode round_mode;
+  /// Absolute tolerance for approximating values as integers and mid-point 
decimals
+  double abs_tol;

Review comment:
       Is it really desirable to implement this? If I have "1.500000000001", 
will it really be considered equal to 1.5?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -422,6 +472,16 @@ TYPED_TEST_SUITE(TestUnaryArithmeticSigned, 
SignedIntegerTypes);
 TYPED_TEST_SUITE(TestUnaryArithmeticUnsigned, UnsignedIntegerTypes);
 TYPED_TEST_SUITE(TestUnaryArithmeticFloating, FloatingTypes);
 
+TYPED_TEST_SUITE(TestUnaryRoundIntegral, IntegralTypes);
+TYPED_TEST_SUITE(TestUnaryRoundSigned, SignedIntegerTypes);
+TYPED_TEST_SUITE(TestUnaryRoundUnsigned, UnsignedIntegerTypes);
+TYPED_TEST_SUITE(TestUnaryRoundFloating, FloatingTypes);
+
+TYPED_TEST_SUITE(TestUnaryMRoundIntegral, IntegralTypes);
+TYPED_TEST_SUITE(TestUnaryMRoundSigned, SignedIntegerTypes);
+TYPED_TEST_SUITE(TestUnaryMRoundUnsigned, UnsignedIntegerTypes);
+TYPED_TEST_SUITE(TestUnaryMRoundFloating, FloatingTypes);

Review comment:
       Can you move these declarations near the corresponding `TYPED_TEST` 
definitions? Otherwise, things will be difficult to follow.

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -20,14 +20,16 @@
 #include <limits>
 #include <utility>
 
-#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/compare.h"
+#include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/compute/kernels/util_internal.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/map.h"

Review comment:
       This one doesn't seem used?

##########
File path: cpp/src/arrow/util/map.h
##########
@@ -17,13 +17,21 @@
 
 #pragma once
 
+#include <type_traits>
 #include <utility>
 
 #include "arrow/result.h"
 
 namespace arrow {
 namespace internal {
 
+/// Functor to make enums hashable.
+template <typename T, typename R = typename std::underlying_type<T>::type,
+          typename = std::enable_if<std::is_enum<T>::value>>
+struct EnumHash {
+  R operator()(T val) const { return static_cast<R>(val); }
+};

Review comment:
       Is it actually used in this PR?

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -1667,6 +1930,22 @@ const FunctionDoc trunc_doc{
     ("Calculate the nearest integer not greater in magnitude than to the "
      "argument element-wise."),
     {"x"}};
+
+const FunctionDoc round_doc{
+    "Round the arguments to a given precision element-wise",

Review comment:
       I don't think "element-wise" is informational (what would a 
non-element-wise round function?).

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -852,24 +854,232 @@ struct LogbChecked {
   }
 };
 
+struct RoundUtil {
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxEqual(
+      const T a, const T b, const T abs_tol = kDefaultAbsoluteTolerance) {
+    return std::fabs(a - b) <= abs_tol;
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxInt(
+      const T val, const T abs_tol = kDefaultAbsoluteTolerance) {
+    // |frac| ~ 0.0?
+    return IsApproxEqual(val, std::round(val), abs_tol);
+  }
+
+  template <typename T>
+  static constexpr enable_if_t<std::is_floating_point<T>::value, bool> 
IsApproxHalfInt(
+      const T val, const T abs_tol = kDefaultAbsoluteTolerance) {
+    // |frac| ~ 0.5?
+    return IsApproxEqual(val - std::floor(val), T(0.5), abs_tol);
+  }
+
+  template <typename T>
+  static enable_if_floating_point<T> Pow10(const int64_t power) {
+    const T lut[]{1e0F,  1e1F,  1e2F,  1e3F,  1e4F,  1e5F,  1e6F,  1e7F,
+                  1e8F,  1e9F,  1e10F, 1e11F, 1e12F, 1e13F, 1e14F, 1e15F,
+                  1e16F, 1e17F, 1e18F, 1e19F, 1e20F, 1e21F, 1e22F};
+    // Return NaN if index is out-of-range.
+    auto lut_size = (int64_t)(sizeof(lut) / sizeof(*lut));
+    return (power >= 0 && power < lut_size) ? lut[power] : std::nanf("");
+  }
+};
+
+// Specializations of rounding implementations for round kernels
+template <typename T, RoundMode>
+struct RoundImpl {};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::DOWN> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::floor(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::UP> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::ceil(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::TOWARDS_ZERO> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::signbit(val) ? std::ceil(val) : std::floor(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::TOWARDS_INFINITY> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::signbit(val) ? std::floor(val) : std::ceil(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_DOWN> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::DOWN>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_UP> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::UP>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TOWARDS_ZERO> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::TOWARDS_ZERO>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TOWARDS_INFINITY> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return RoundImpl<T, RoundMode::TOWARDS_INFINITY>::Round(val);
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TO_EVEN> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::round(val * T(0.5)) * 2;
+  }
+};
+
+template <typename T>
+struct RoundImpl<T, RoundMode::HALF_TO_ODD> {
+  static constexpr enable_if_floating_point<T> Round(const T val) {
+    return std::floor(val * T(0.5)) + std::ceil(val * T(0.5));
+  }
+};
+
+template <RoundMode RndMode>
+struct Round {
+  using RoundState = OptionsWrapper<RoundOptions>;
+
+  template <typename T, typename Arg>
+  static enable_if_floating_point<Arg, T> Call(KernelContext* ctx, Arg arg, 
Status* st) {
+    static_assert(std::is_same<T, Arg>::value, "");
+    static_assert(RoundMode::HALF_DOWN > RoundMode::DOWN &&
+                      RoundMode::HALF_DOWN > RoundMode::UP &&
+                      RoundMode::HALF_DOWN > RoundMode::TOWARDS_ZERO &&
+                      RoundMode::HALF_DOWN > RoundMode::TOWARDS_INFINITY &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_UP &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TOWARDS_ZERO &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TOWARDS_INFINITY 
&&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TO_EVEN &&
+                      RoundMode::HALF_DOWN < RoundMode::HALF_TO_ODD,
+                  "Round modes prefixed with HALF need to be defined last in 
enum and "
+                  "the first HALF entry has to be HALF_DOWN.");

Review comment:
       You can probably put this at the top level instead of repeating it in 
both kernels.

##########
File path: python/pyarrow/tests/test_compute.py
##########
@@ -1316,6 +1318,67 @@ def test_arithmetic_multiply():
     assert result.equals(expected)
 
 
[email protected]("ty", ["round", "mround"])
+def test_round_to_integer(ty):
+    if ty == "round":
+        round = pc.round
+        RoundOptions = partial(pc.RoundOptions, ndigits=0)
+    elif ty == "mround":
+        round = pc.mround
+        RoundOptions = partial(pc.MRoundOptions, multiple=1)
+
+    values = [3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7, None]
+    rmode_and_expected_map = {
+        "down": [3, 3, 3, 4, -4, -4, -4, None],
+        "up": [4, 4, 4, 5, -3, -3, -3, None],
+        "towards_zero": [3, 3, 3, 4, -3, -3, -3, None],
+        "towards_infinity": [4, 4, 4, 5, -4, -4, -4, None],
+        "half_down": [3, 3, 4, 4, -3, -4, -4, None],
+        "half_up": [3, 4, 4, 5, -3, -3, -4, None],
+        "half_towards_zero": [3, 3, 4, 4, -3, -3, -4, None],
+        "half_towards_infinity": [3, 4, 4, 5, -3, -4, -4, None],
+        "half_to_even": [3, 4, 4, 4, -3, -4, -4, None],
+        "half_to_odd": [3, 3, 4, 5, -3, -3, -4, None],
+    }
+    for round_mode, expected in rmode_and_expected_map.items():
+        options = RoundOptions(round_mode=round_mode)
+        result = round(values, options=options)
+        assert np.all(np.isclose(result, pa.array(expected), equal_nan=True))

Review comment:
       Given the results are all integers, we should check for exact equality 
here, not approximate.

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -1352,6 +1412,212 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
   }
 }
 
+const std::initializer_list<RoundMode> kRoundModes{
+    RoundMode::DOWN,
+    RoundMode::UP,
+    RoundMode::TOWARDS_ZERO,
+    RoundMode::TOWARDS_INFINITY,
+    RoundMode::HALF_DOWN,
+    RoundMode::HALF_UP,
+    RoundMode::HALF_TOWARDS_ZERO,
+    RoundMode::HALF_TOWARDS_INFINITY,
+    RoundMode::HALF_TO_EVEN,
+    RoundMode::HALF_TO_ODD,
+};
+
+TYPED_TEST(TestUnaryRoundSigned, Round) {
+  // Test different rounding modes for integer rounding
+  std::string values("[0, 1, -13, -50, 115]");
+  this->SetRoundNdigits(0);
+  for (const auto& round_mode : kRoundModes) {
+    this->SetRoundMode(round_mode);
+    this->AssertUnaryOp(Round, values, ArrayFromJSON(float64(), values));
+  }
+
+  // Test different round N-digits for nearest rounding mode
+  std::unordered_map<int64_t, std::string> ndigits_and_expected_map({

Review comment:
       This can trivially be a `std::vector<std::pair>`...

##########
File path: docs/source/cpp/compute.rst
##########
@@ -457,18 +458,100 @@ Bit-wise functions
 Rounding functions
 ~~~~~~~~~~~~~~~~~~
 
-Rounding functions convert a numeric input into an approximate value with a
-simpler representation based on the rounding strategy.
-
-+------------------+--------+----------------+-----------------+-------+
-| Function name    | Arity  | Input types    | Output type     | Notes |
-+==================+========+================+=================+=======+
-| floor            | Unary  | Numeric        | Float32/Float64 |       |
-+------------------+--------+----------------+-----------------+-------+
-| ceil             | Unary  | Numeric        | Float32/Float64 |       |
-+------------------+--------+----------------+-----------------+-------+
-| trunc            | Unary  | Numeric        | Float32/Float64 |       |
-+------------------+--------+----------------+-----------------+-------+
+Rounding functions displace numeric inputs to an approximate value with a 
simpler
+representation based on the rounding criterion.
+
++---------------+------------+-------------+------------------+-------------------------+--------+
+| Function name | Arity      | Input types | Output type      | Options class  
         | Notes  |
++===============+============+=============+==================+=========================+========+
+| ceil          | Unary      | Numeric     | Float32/Float64  |                
         |        |
++---------------+------------+-------------+------------------+-------------------------+--------+
+| floor         | Unary      | Numeric     | Float32/Float64  |                
         |        |
++---------------+------------+-------------+------------------+-------------------------+--------+
+| mround        | Unary      | Numeric     | Float32/Float64  | 
:struct:`MRoundOptions` | (1)(2) |
++---------------+------------+-------------+------------------+-------------------------+--------+
+| round         | Unary      | Numeric     | Float32/Float64  | 
:struct:`RoundOptions`  | (1)(3) |
++---------------+------------+-------------+------------------+-------------------------+--------+
+| trunc         | Unary      | Numeric     | Float32/Float64  |                
         |        |
++---------------+------------+-------------+------------------+-------------------------+--------+
+
+* \(1) Output value is a 64-bit floating-point for integral inputs and the
+  retains the same type for floating-point inputs.  By default rounding
+  functions displace a value to the nearest integer using HALF_TO_EVEN
+  to resolve ties.  Options are available to control the rounding criterion.
+  Both ``round`` and ``mround`` have the ``round_mode`` option to set the
+  rounding mode.
+* \(2) Round to a multiple where the ``multiple`` option of 
:struct:`MRoundOptions`
+  specifies the rounding scale.  Only the absolute value of the rounding
+  multiple is used, that is, its sign is ignored.  For example, 100 corresponds
+  to rounding to the nearest multiple of 100 (zeroing the ones and tens 
digits).
+  Default value of ``multiple`` is 1 which rounds to the nearest integer.
+  Also, an absolute tolerance ``abs_tol`` can be set to control equality
+  comparisons of floating-point values.
+* \(3) Round to a number of digits where the ``ndigits`` option of
+  :struct:`RoundOptions` specifies the rounding precision in terms of number of
+  digits.  A negative value corresponds to digits in the non-fractional part.
+  For example, -2 corresponds to rounding to the nearest multiple of 100
+  (zeroing the ones and tens digits).  Default value of ``ndigits`` is 0 which
+  rounds to the nearest integer.  Also, an absolute tolerance ``abs_tol`` can 
be
+  set to control equality comparisons of floating-point values.
+
+The following tables present the rounding operations performed for various
+rounding options and modes.
+
++--------------------+---------------+-------------------------------+
+| Round ``multiple`` | Round ``ndigits`` | Operation performed       |

Review comment:
       Looks like the first table line has an alignment issue above?

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -699,6 +699,78 @@ class 
ElementWiseAggregateOptions(_ElementWiseAggregateOptions):
         self._set_options(skip_nulls)
 
 
+cdef class _RoundOptions(FunctionOptions):
+    def _set_options(self, int64_t ndigits, round_mode, double abs_tol):
+        cdef:
+            CRoundMode c_round_mode = CRoundMode_HALF_TO_EVEN
+        if round_mode == 'down':
+            c_round_mode = CRoundMode_DOWN
+        elif round_mode == 'up':
+            c_round_mode = CRoundMode_UP
+        elif round_mode == 'towards_zero':
+            c_round_mode = CRoundMode_TOWARDS_ZERO
+        elif round_mode == 'towards_infinity':
+            c_round_mode = CRoundMode_TOWARDS_INFINITY
+        elif round_mode == 'half_down':
+            c_round_mode = CRoundMode_HALF_DOWN
+        elif round_mode == 'half_up':
+            c_round_mode = CRoundMode_HALF_UP
+        elif round_mode == 'half_towards_zero':
+            c_round_mode = CRoundMode_HALF_TOWARDS_ZERO
+        elif round_mode == 'half_towards_infinity':
+            c_round_mode = CRoundMode_HALF_TOWARDS_INFINITY
+        elif round_mode == 'half_to_even':
+            c_round_mode = CRoundMode_HALF_TO_EVEN
+        elif round_mode == 'half_to_odd':
+            c_round_mode = CRoundMode_HALF_TO_ODD
+        else:
+            raise ValueError(
+                '"{}" is not a valid round mode'
+                .format(round_mode))

Review comment:
       Can probably factor all this out in a `cdef` function:
   ```cython
   
   cdef CRoundMode unwrap_round_mode(round_mode) except *:
       if round_mode == 'down':
           return CRoundMode_DOWN
       elif round_mode == 'up':
           return CRoundMode_UP
       elif round_mode == 'towards_zero':
           return CRoundMode_TOWARDS_ZERO
       elif round_mode == 'towards_infinity':
           return CRoundMode_TOWARDS_INFINITY
       elif round_mode == 'half_down':
           return CRoundMode_HALF_DOWN
       elif round_mode == 'half_up':
           return CRoundMode_HALF_UP
       elif round_mode == 'half_towards_zero':
           return CRoundMode_HALF_TOWARDS_ZERO
       elif round_mode == 'half_towards_infinity':
           return CRoundMode_HALF_TOWARDS_INFINITY
       elif round_mode == 'half_to_even':
           return CRoundMode_HALF_TO_EVEN
       elif round_mode == 'half_to_odd':
           return CRoundMode_HALF_TO_ODD
       else:
           raise ValueError(
               '"{}" is not a valid round mode'
               .format(round_mode))
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
##########
@@ -66,18 +65,20 @@ class TestUnaryArithmetic : public TestBase {
     return *arrow::MakeScalar(type_singleton(), value);
   }
 
+  void SetUp() override {}
+
   // (CScalar, CScalar)
   void AssertUnaryOp(UnaryFunction func, CType argument, CType expected) {
     auto arg = MakeScalar(argument);
     auto exp = MakeScalar(expected);
-    ASSERT_OK_AND_ASSIGN(auto actual, func(arg, options_, nullptr));
+    ASSERT_OK_AND_ASSIGN(auto actual, func(arg, options_, NULLPTR));

Review comment:
       Those changes shouldn't be necessary, `NULLPTR` is only required in 
header files.

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -49,10 +49,66 @@ class ARROW_EXPORT ElementWiseAggregateOptions : public 
FunctionOptions {
   explicit ElementWiseAggregateOptions(bool skip_nulls = true);
   constexpr static char const kTypeName[] = "ElementWiseAggregateOptions";
   static ElementWiseAggregateOptions Defaults() { return 
ElementWiseAggregateOptions{}; }
-
   bool skip_nulls;
 };
 
+/// Rounding and tie-breaking modes for round compute functions.
+/// Additional details and examples are provided in compute.rst.
+enum class RoundMode : int8_t {
+  /// Round to nearest integer less than or equal in magnitude (aka "floor")
+  DOWN,
+  /// Round to nearest integer greater than or equal in magnitude (aka "ceil")
+  UP,
+  /// Get the integral part without fractional digits (aka "trunc")
+  TOWARDS_ZERO,
+  /// Round negative values with DOWN rule and positive values with UP rule
+  TOWARDS_INFINITY,
+  /// Round ties with DOWN rule
+  HALF_DOWN,
+  /// Round ties with UP rule
+  HALF_UP,
+  /// Round ties with TOWARDS_ZERO rule
+  HALF_TOWARDS_ZERO,
+  /// Round ties with TOWARDS_INFINITY rule
+  HALF_TOWARDS_INFINITY,
+  /// Round ties to nearest even integer
+  HALF_TO_EVEN,
+  /// Round ties to nearest odd integer
+  HALF_TO_ODD,
+};
+
+static constexpr double kDefaultAbsoluteTolerance = 1E-5;
+
+class ARROW_EXPORT RoundOptions : public FunctionOptions {
+ public:
+  explicit RoundOptions(int64_t ndigits = 0,
+                        RoundMode round_mode = RoundMode::HALF_TO_EVEN,
+                        double abs_tol = kDefaultAbsoluteTolerance);
+  constexpr static char const kTypeName[] = "RoundOptions";
+  static RoundOptions Defaults() { return RoundOptions(); }
+  /// Rounding precision (number of digits to round to).
+  int64_t ndigits;
+  /// Rounding and tie-breaking mode
+  RoundMode round_mode;
+  /// Absolute tolerance for approximating values as integers and mid-point 
decimals
+  double abs_tol;

Review comment:
       And in case an optional tolerance is actually useful, I would really 
make it 0 by default.

##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -1667,6 +1930,22 @@ const FunctionDoc trunc_doc{
     ("Calculate the nearest integer not greater in magnitude than to the "
      "argument element-wise."),
     {"x"}};
+
+const FunctionDoc round_doc{
+    "Round the arguments to a given precision element-wise",

Review comment:
       So just "Round to a given precision" perhaps?

##########
File path: python/pyarrow/tests/test_compute.py
##########
@@ -1316,6 +1318,67 @@ def test_arithmetic_multiply():
     assert result.equals(expected)
 
 
[email protected]("ty", ["round", "mround"])
+def test_round_to_integer(ty):
+    if ty == "round":
+        round = pc.round
+        RoundOptions = partial(pc.RoundOptions, ndigits=0)
+    elif ty == "mround":
+        round = pc.mround
+        RoundOptions = partial(pc.MRoundOptions, multiple=1)
+
+    values = [3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7, None]
+    rmode_and_expected_map = {
+        "down": [3, 3, 3, 4, -4, -4, -4, None],
+        "up": [4, 4, 4, 5, -3, -3, -3, None],
+        "towards_zero": [3, 3, 3, 4, -3, -3, -3, None],
+        "towards_infinity": [4, 4, 4, 5, -4, -4, -4, None],
+        "half_down": [3, 3, 4, 4, -3, -4, -4, None],
+        "half_up": [3, 4, 4, 5, -3, -3, -4, None],
+        "half_towards_zero": [3, 3, 4, 4, -3, -3, -4, None],
+        "half_towards_infinity": [3, 4, 4, 5, -3, -4, -4, None],
+        "half_to_even": [3, 4, 4, 4, -3, -4, -4, None],
+        "half_to_odd": [3, 3, 4, 5, -3, -3, -4, None],
+    }
+    for round_mode, expected in rmode_and_expected_map.items():
+        options = RoundOptions(round_mode=round_mode)
+        result = round(values, options=options)
+        assert np.all(np.isclose(result, pa.array(expected), equal_nan=True))

Review comment:
       Also, the Numpy [testing 
assertions](https://numpy.org/doc/stable/reference/routines.testing.html) 
should give a more informative output on failure ;-)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] pitrou commented on a change in pull request #10349: ARROW-12744: [C++][Compute] Add rounding kernel

Reply via email to