(arrow) branch main updated: GH-44903: [C++] Add the Expm1(exponent) scalar arithmetic function (#44904)

felipecrv Mon, 02 Dec 2024 13:45:35 -0800

This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 5e476b3d36 GH-44903: [C++] Add the Expm1(exponent) scalar arithmetic 
function (#44904)
5e476b3d36 is described below

commit 5e476b3d36591dac7ce24067167bb5efa24fd63d
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Mon Dec 2 18:45:15 2024 -0300

    GH-44903: [C++] Add the Expm1(exponent) scalar arithmetic function (#44904)
    
    `Expm1(exponent)` is a more accurate way of computing `Exp(exponent) - 1.0` 
for small values of exponent.
    
    ### Rationale for this change
    
    `expm1(x)` is specifically designed to compute `exp(x)−1` more accurately, 
particularly for small x. It uses numerical techniques and approximations that 
minimize the loss of precision.
    
    When x is very small (close to 0), `exp(x)` is approximately `1+x`. 
Subtracting 1 from `exp(x)` (i.e., `(exp(x)−1)` can result in significant 
cancellation of significant digits due to floating-point arithmetic, leading to 
a loss of precision).
    
    For example:
    
    When `x = 10^−8`, `exp(x)` is close to `1 + 10^−8`, so subtracting 1 leaves 
only the small `10^−8`, which may lose accuracy due to floating-point 
limitations.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes and documentation was updated to list the new function.
    * GitHub Issue: #44903
    
    Authored-by: Felipe Oliveira Carvalho <[email protected]>
    Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
 cpp/src/arrow/compute/api_scalar.cc                |   1 +
 cpp/src/arrow/compute/api_scalar.h                 |  12 +++
 .../compute/kernels/base_arithmetic_internal.h     |   8 ++
 cpp/src/arrow/compute/kernels/scalar_arithmetic.cc |  10 ++
 .../compute/kernels/scalar_arithmetic_test.cc      | 104 ++++++++++++++++++++-
 docs/source/cpp/compute.rst                        |   2 +
 6 files changed, 133 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc 
b/cpp/src/arrow/compute/api_scalar.cc
index 7c3bc46650..f00fad29d7 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -744,6 +744,7 @@ SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked")
 SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked")
 SCALAR_EAGER_UNARY(Atan, "atan")
 SCALAR_EAGER_UNARY(Exp, "exp")
+SCALAR_EAGER_UNARY(Expm1, "expm1")
 SCALAR_EAGER_UNARY(Sign, "sign")
 
 Result<Datum> Round(const Datum& arg, RoundOptions options, ExecContext* ctx) {
diff --git a/cpp/src/arrow/compute/api_scalar.h 
b/cpp/src/arrow/compute/api_scalar.h
index 947474e596..21daf936fd 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -684,6 +684,18 @@ Result<Datum> Power(const Datum& left, const Datum& right,
 ARROW_EXPORT
 Result<Datum> Exp(const Datum& arg, ExecContext* ctx = NULLPTR);
 
+/// \brief More accurately calculate `exp(arg) - 1` for values close to zero.
+/// If the exponent value is null the result will be null.
+///
+/// This function is more accurate than calculating `exp(value) - 1` directly 
for values
+/// close to zero.
+///
+/// \param[in] arg the exponent
+/// \param[in] ctx the function execution context, optional
+/// \return the element-wise Euler's number raised to the power of exponent 
minus 1
+ARROW_EXPORT
+Result<Datum> Expm1(const Datum& arg, ExecContext* ctx = NULLPTR);
+
 /// \brief Left shift the left array by the right array. Array values must be 
the
 /// same length. If either operand is null, the result will be null.
 ///
diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h 
b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
index d59320d270..f045e323b3 100644
--- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
+++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
@@ -532,6 +532,14 @@ struct Exp {
   }
 };
 
+struct Expm1 {
+  template <typename T, typename Arg>
+  static T Call(KernelContext*, Arg exp, Status*) {
+    static_assert(std::is_same<T, Arg>::value);
+    return std::expm1(exp);
+  }
+};
+
 struct Power {
   ARROW_NOINLINE
   static uint64_t IntegerPower(uint64_t base, uint64_t exp) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc 
b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index eb243de4a7..f11449ad57 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -1087,6 +1087,12 @@ const FunctionDoc exp_doc{
     ("If exponent is null the result will be null."),
     {"exponent"}};
 
+const FunctionDoc expm1_doc{
+    "Compute Euler's number raised to the power of specified exponent, "
+    "then decrement 1, element-wise",
+    ("If exponent is null the result will be null."),
+    {"exponent"}};
+
 const FunctionDoc pow_checked_doc{
     "Raise arguments to power element-wise",
     ("An error is returned when integer to negative integer power is 
encountered,\n"
@@ -1614,6 +1620,10 @@ void RegisterScalarArithmetic(FunctionRegistry* 
registry) {
   auto exp = MakeUnaryArithmeticFunctionFloatingPoint<Exp>("exp", exp_doc);
   DCHECK_OK(registry->AddFunction(std::move(exp)));
 
+  // ----------------------------------------------------------------------
+  auto expm1 = MakeUnaryArithmeticFunctionFloatingPoint<Expm1>("expm1", 
expm1_doc);
+  DCHECK_OK(registry->AddFunction(std::move(expm1)));
+
   // ----------------------------------------------------------------------
   auto sqrt = MakeUnaryArithmeticFunctionFloatingPoint<SquareRoot>("sqrt", 
sqrt_doc);
   DCHECK_OK(registry->AddFunction(std::move(sqrt)));
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc 
b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 37a1bcbc02..9cabebc3f4 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -43,6 +43,9 @@ namespace arrow {
 namespace compute {
 namespace {
 
+// 2.718281828459045090795598298427648842334747314453125
+constexpr double kEuler64 = 0x1.5bf0a8b145769p+1;
+
 using IntegralTypes = testing::Types<Int8Type, Int16Type, Int32Type, 
Int64Type, UInt8Type,
                                      UInt16Type, UInt32Type, UInt64Type>;
 
@@ -1485,8 +1488,7 @@ TYPED_TEST(TestUnaryArithmeticUnsigned, Exp) {
   this->AssertUnaryOp(
       exp, "[null, 1, 10]",
       ArrayFromJSON(float64(), "[null, 2.718281828459045, 
22026.465794806718]"));
-  this->AssertUnaryOp(exp, this->MakeScalar(1),
-                      arrow::MakeScalar<double>(2.718281828459045F));
+  this->AssertUnaryOp(exp, this->MakeScalar(1), arrow::MakeScalar(kEuler64));
 }
 
 TYPED_TEST(TestUnaryArithmeticSigned, Exp) {
@@ -1502,8 +1504,7 @@ TYPED_TEST(TestUnaryArithmeticSigned, Exp) {
                       ArrayFromJSON(float64(),
                                     "[0.000045399929762484854, 
0.36787944117144233, "
                                     "null, 2.718281828459045, 
22026.465794806718]"));
-  this->AssertUnaryOp(exp, this->MakeScalar(1),
-                      arrow::MakeScalar<double>(2.718281828459045F));
+  this->AssertUnaryOp(exp, this->MakeScalar(1), arrow::MakeScalar(kEuler64));
 }
 
 TYPED_TEST(TestUnaryArithmeticFloating, Exp) {
@@ -1563,6 +1564,101 @@ TEST_F(TestUnaryArithmeticDecimal, Exp) {
   }
 }
 
+TYPED_TEST(TestUnaryArithmeticUnsigned, Expm1) {
+  auto expm1 = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Expm1(arg, ctx);
+  };
+  // Empty arrays
+  this->AssertUnaryOp(expm1, "[]", ArrayFromJSON(float64(), "[]"));
+  // Array with nulls
+  this->AssertUnaryOp(expm1, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(expm1, this->MakeNullScalar(), 
arrow::MakeNullScalar(float64()));
+  this->AssertUnaryOp(
+      expm1, "[null, 0, 1, 10]",
+      ArrayFromJSON(float64(), "[null, 0.0, 1.718281828459045, 
22025.465794806718]"));
+  this->AssertUnaryOp(expm1, this->MakeScalar(1), arrow::MakeScalar(kEuler64 - 
1.0));
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Expm1) {
+  auto expm1 = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Expm1(arg, ctx);
+  };
+  // Empty arrays
+  this->AssertUnaryOp(expm1, "[]", ArrayFromJSON(float64(), "[]"));
+  // Array with nulls
+  this->AssertUnaryOp(expm1, "[null]", ArrayFromJSON(float64(), "[null]"));
+  this->AssertUnaryOp(expm1, this->MakeNullScalar(), 
arrow::MakeNullScalar(float64()));
+  this->AssertUnaryOp(expm1, "[-10, -1, 0, null, 1, 10]",
+                      ArrayFromJSON(float64(),
+                                    "[-0.9999546000702375, 
-0.6321205588285577, 0.0, "
+                                    "null, 1.718281828459045, 
22025.465794806718]"));
+  this->AssertUnaryOp(expm1, this->MakeScalar(1), arrow::MakeScalar(kEuler64 - 
1.0));
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Expm1) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  auto expm1 = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+    return Expm1(arg, ctx);
+  };
+  // Empty arrays
+  this->AssertUnaryOp(expm1, "[]", "[]");
+  // Array with nulls
+  this->AssertUnaryOp(expm1, "[null]", "[null]");
+  this->AssertUnaryOp(expm1, this->MakeNullScalar(), this->MakeNullScalar());
+  this->AssertUnaryOp(expm1, "[-1.0, 0.0, 0.1, 0.00000001, null, 10.0]",
+                      "[-0.6321205588285577, 0.0, "
+                      "0.10517091807564763, 0.000000010000000050000001, "
+                      "null, 22025.465794806718]");
+  // Ordinary arrays (positive, negative, fractional, and zero inputs)
+  this->AssertUnaryOp(expm1, "[-10.0, 0.0, 0.1, 0.00000001, 0.5, 1.0]",
+                      "[-0.9999546000702375, 0.0, "
+                      "0.10517091807564763, 0.000000010000000050000001, "
+                      "0.6487212707001282, 1.718281828459045]");
+  this->AssertUnaryOp(expm1, 1.3F, 2.6692964926535487F);
+  this->AssertUnaryOp(expm1, this->MakeScalar(1.3F),
+                      this->MakeScalar(2.6692964926535487F));
+  // Arrays with infinites
+  this->AssertUnaryOp(expm1, "[-Inf, Inf]", "[-1, Inf]");
+  // Arrays with NaNs
+  this->SetNansEqual(true);
+  this->AssertUnaryOp(expm1, "[NaN]", "[NaN]");
+  // Min/max
+  this->AssertUnaryOp(expm1, min, -1.0);
+  this->AssertUnaryOp(expm1, max, std::numeric_limits<CType>::infinity());
+}
+
+TEST_F(TestUnaryArithmeticDecimal, Expm1) {
+  auto max128 = Decimal128::GetMaxValue(38);
+  auto max256 = Decimal256::GetMaxValue(76);
+  const auto func = "expm1";
+  for (const auto& ty : PositiveScaleTypes()) {
+    CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(float64(), 
"[]"));
+    CheckScalar(
+        func, {ArrayFromJSON(ty, R"(["-1.00", "0.00", "0.10", "0.01", "10.00", 
null])")},
+        ArrayFromJSON(float64(),
+                      "[-0.6321205588285577, 0.0, "
+                      "0.10517091807564763, 0.010050167084168058, "
+                      "22025.465794806718, null]"));
+  }
+  CheckScalar(func, {std::make_shared<Decimal128Scalar>(max128, decimal128(38, 
0))},
+              ScalarFromJSON(float64(), "Inf"));
+  CheckScalar(func, {std::make_shared<Decimal128Scalar>(-max128, 
decimal128(38, 0))},
+              ScalarFromJSON(float64(), "-1.0"));
+  CheckScalar(func, {std::make_shared<Decimal256Scalar>(max256, decimal256(76, 
0))},
+              ScalarFromJSON(float64(), "Inf"));
+  CheckScalar(func, {std::make_shared<Decimal256Scalar>(-max256, 
decimal256(76, 0))},
+              ScalarFromJSON(float64(), "-1.0"));
+  for (const auto& ty : NegativeScaleTypes()) {
+    CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(float64(), 
"[]"));
+    CheckScalar(func, {DecimalArrayFromJSON(ty, R"(["12E2", "0", "-42E2", 
null])")},
+                ArrayFromJSON(float64(), "[Inf, 0.0, -1.0, null]"));
+  }
+}
+
 TEST_F(TestUnaryArithmeticDecimal, Log) {
   std::vector<std::string> unchecked = {"ln", "log2", "log10", "log1p"};
   std::vector<std::string> checked = {"ln_checked", "log2_checked", 
"log10_checked",
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 093b160d8e..3c264fb476 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -476,6 +476,8 @@ Mixed time resolution temporal inputs will be cast to 
finest input resolution.
 
+------------------+--------+-------------------------+---------------------------+-------+
 | exp              | Unary  | Numeric                 | Float32/Float64        
   |       |
 
+------------------+--------+-------------------------+---------------------------+-------+
+| expm1            | Unary  | Numeric                 | Float32/Float64        
   |       |
++------------------+--------+-------------------------+---------------------------+-------+
 | multiply         | Binary | Numeric/Temporal        | Numeric/Temporal       
   | \(1)  |
 
+------------------+--------+-------------------------+---------------------------+-------+
 | multiply_checked | Binary | Numeric/Temporal        | Numeric/Temporal       
   | \(1)  |

(arrow) branch main updated: GH-44903: [C++] Add the Expm1(exponent) scalar arithmetic function (#44904)

Reply via email to