This is an automated email from the ASF dual-hosted git repository.
felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 5e476b3d36 GH-44903: [C++] Add the Expm1(exponent) scalar arithmetic
function (#44904)
5e476b3d36 is described below
commit 5e476b3d36591dac7ce24067167bb5efa24fd63d
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Mon Dec 2 18:45:15 2024 -0300
GH-44903: [C++] Add the Expm1(exponent) scalar arithmetic function (#44904)
`Expm1(exponent)` is a more accurate way of computing `Exp(exponent) - 1.0`
for small values of exponent.
### Rationale for this change
`expm1(x)` is specifically designed to compute `exp(x)−1` more accurately,
particularly for small x. It uses numerical techniques and approximations that
minimize the loss of precision.
When x is very small (close to 0), `exp(x)` is approximately `1+x`.
Subtracting 1 from `exp(x)` (i.e., `(exp(x)−1)` can result in significant
cancellation of significant digits due to floating-point arithmetic, leading to
a loss of precision).
For example:
When `x = 10^−8`, `exp(x)` is close to `1 + 10^−8`, so subtracting 1 leaves
only the small `10^−8`, which may lose accuracy due to floating-point
limitations.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes and documentation was updated to list the new function.
* GitHub Issue: #44903
Authored-by: Felipe Oliveira Carvalho <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
cpp/src/arrow/compute/api_scalar.cc | 1 +
cpp/src/arrow/compute/api_scalar.h | 12 +++
.../compute/kernels/base_arithmetic_internal.h | 8 ++
cpp/src/arrow/compute/kernels/scalar_arithmetic.cc | 10 ++
.../compute/kernels/scalar_arithmetic_test.cc | 104 ++++++++++++++++++++-
docs/source/cpp/compute.rst | 2 +
6 files changed, 133 insertions(+), 4 deletions(-)
diff --git a/cpp/src/arrow/compute/api_scalar.cc
b/cpp/src/arrow/compute/api_scalar.cc
index 7c3bc46650..f00fad29d7 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -744,6 +744,7 @@ SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked")
SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked")
SCALAR_EAGER_UNARY(Atan, "atan")
SCALAR_EAGER_UNARY(Exp, "exp")
+SCALAR_EAGER_UNARY(Expm1, "expm1")
SCALAR_EAGER_UNARY(Sign, "sign")
Result<Datum> Round(const Datum& arg, RoundOptions options, ExecContext* ctx) {
diff --git a/cpp/src/arrow/compute/api_scalar.h
b/cpp/src/arrow/compute/api_scalar.h
index 947474e596..21daf936fd 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -684,6 +684,18 @@ Result<Datum> Power(const Datum& left, const Datum& right,
ARROW_EXPORT
Result<Datum> Exp(const Datum& arg, ExecContext* ctx = NULLPTR);
+/// \brief More accurately calculate `exp(arg) - 1` for values close to zero.
+/// If the exponent value is null the result will be null.
+///
+/// This function is more accurate than calculating `exp(value) - 1` directly
for values
+/// close to zero.
+///
+/// \param[in] arg the exponent
+/// \param[in] ctx the function execution context, optional
+/// \return the element-wise Euler's number raised to the power of exponent
minus 1
+ARROW_EXPORT
+Result<Datum> Expm1(const Datum& arg, ExecContext* ctx = NULLPTR);
+
/// \brief Left shift the left array by the right array. Array values must be
the
/// same length. If either operand is null, the result will be null.
///
diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
index d59320d270..f045e323b3 100644
--- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
+++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
@@ -532,6 +532,14 @@ struct Exp {
}
};
+struct Expm1 {
+ template <typename T, typename Arg>
+ static T Call(KernelContext*, Arg exp, Status*) {
+ static_assert(std::is_same<T, Arg>::value);
+ return std::expm1(exp);
+ }
+};
+
struct Power {
ARROW_NOINLINE
static uint64_t IntegerPower(uint64_t base, uint64_t exp) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index eb243de4a7..f11449ad57 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -1087,6 +1087,12 @@ const FunctionDoc exp_doc{
("If exponent is null the result will be null."),
{"exponent"}};
+const FunctionDoc expm1_doc{
+ "Compute Euler's number raised to the power of specified exponent, "
+ "then decrement 1, element-wise",
+ ("If exponent is null the result will be null."),
+ {"exponent"}};
+
const FunctionDoc pow_checked_doc{
"Raise arguments to power element-wise",
("An error is returned when integer to negative integer power is
encountered,\n"
@@ -1614,6 +1620,10 @@ void RegisterScalarArithmetic(FunctionRegistry*
registry) {
auto exp = MakeUnaryArithmeticFunctionFloatingPoint<Exp>("exp", exp_doc);
DCHECK_OK(registry->AddFunction(std::move(exp)));
+ // ----------------------------------------------------------------------
+ auto expm1 = MakeUnaryArithmeticFunctionFloatingPoint<Expm1>("expm1",
expm1_doc);
+ DCHECK_OK(registry->AddFunction(std::move(expm1)));
+
// ----------------------------------------------------------------------
auto sqrt = MakeUnaryArithmeticFunctionFloatingPoint<SquareRoot>("sqrt",
sqrt_doc);
DCHECK_OK(registry->AddFunction(std::move(sqrt)));
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 37a1bcbc02..9cabebc3f4 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -43,6 +43,9 @@ namespace arrow {
namespace compute {
namespace {
+// 2.718281828459045090795598298427648842334747314453125
+constexpr double kEuler64 = 0x1.5bf0a8b145769p+1;
+
using IntegralTypes = testing::Types<Int8Type, Int16Type, Int32Type,
Int64Type, UInt8Type,
UInt16Type, UInt32Type, UInt64Type>;
@@ -1485,8 +1488,7 @@ TYPED_TEST(TestUnaryArithmeticUnsigned, Exp) {
this->AssertUnaryOp(
exp, "[null, 1, 10]",
ArrayFromJSON(float64(), "[null, 2.718281828459045,
22026.465794806718]"));
- this->AssertUnaryOp(exp, this->MakeScalar(1),
- arrow::MakeScalar<double>(2.718281828459045F));
+ this->AssertUnaryOp(exp, this->MakeScalar(1), arrow::MakeScalar(kEuler64));
}
TYPED_TEST(TestUnaryArithmeticSigned, Exp) {
@@ -1502,8 +1504,7 @@ TYPED_TEST(TestUnaryArithmeticSigned, Exp) {
ArrayFromJSON(float64(),
"[0.000045399929762484854,
0.36787944117144233, "
"null, 2.718281828459045,
22026.465794806718]"));
- this->AssertUnaryOp(exp, this->MakeScalar(1),
- arrow::MakeScalar<double>(2.718281828459045F));
+ this->AssertUnaryOp(exp, this->MakeScalar(1), arrow::MakeScalar(kEuler64));
}
TYPED_TEST(TestUnaryArithmeticFloating, Exp) {
@@ -1563,6 +1564,101 @@ TEST_F(TestUnaryArithmeticDecimal, Exp) {
}
}
+TYPED_TEST(TestUnaryArithmeticUnsigned, Expm1) {
+ auto expm1 = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+ return Expm1(arg, ctx);
+ };
+ // Empty arrays
+ this->AssertUnaryOp(expm1, "[]", ArrayFromJSON(float64(), "[]"));
+ // Array with nulls
+ this->AssertUnaryOp(expm1, "[null]", ArrayFromJSON(float64(), "[null]"));
+ this->AssertUnaryOp(expm1, this->MakeNullScalar(),
arrow::MakeNullScalar(float64()));
+ this->AssertUnaryOp(
+ expm1, "[null, 0, 1, 10]",
+ ArrayFromJSON(float64(), "[null, 0.0, 1.718281828459045,
22025.465794806718]"));
+ this->AssertUnaryOp(expm1, this->MakeScalar(1), arrow::MakeScalar(kEuler64 -
1.0));
+}
+
+TYPED_TEST(TestUnaryArithmeticSigned, Expm1) {
+ auto expm1 = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+ return Expm1(arg, ctx);
+ };
+ // Empty arrays
+ this->AssertUnaryOp(expm1, "[]", ArrayFromJSON(float64(), "[]"));
+ // Array with nulls
+ this->AssertUnaryOp(expm1, "[null]", ArrayFromJSON(float64(), "[null]"));
+ this->AssertUnaryOp(expm1, this->MakeNullScalar(),
arrow::MakeNullScalar(float64()));
+ this->AssertUnaryOp(expm1, "[-10, -1, 0, null, 1, 10]",
+ ArrayFromJSON(float64(),
+ "[-0.9999546000702375,
-0.6321205588285577, 0.0, "
+ "null, 1.718281828459045,
22025.465794806718]"));
+ this->AssertUnaryOp(expm1, this->MakeScalar(1), arrow::MakeScalar(kEuler64 -
1.0));
+}
+
+TYPED_TEST(TestUnaryArithmeticFloating, Expm1) {
+ using CType = typename TestFixture::CType;
+
+ auto min = std::numeric_limits<CType>::lowest();
+ auto max = std::numeric_limits<CType>::max();
+
+ auto expm1 = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) {
+ return Expm1(arg, ctx);
+ };
+ // Empty arrays
+ this->AssertUnaryOp(expm1, "[]", "[]");
+ // Array with nulls
+ this->AssertUnaryOp(expm1, "[null]", "[null]");
+ this->AssertUnaryOp(expm1, this->MakeNullScalar(), this->MakeNullScalar());
+ this->AssertUnaryOp(expm1, "[-1.0, 0.0, 0.1, 0.00000001, null, 10.0]",
+ "[-0.6321205588285577, 0.0, "
+ "0.10517091807564763, 0.000000010000000050000001, "
+ "null, 22025.465794806718]");
+ // Ordinary arrays (positive, negative, fractional, and zero inputs)
+ this->AssertUnaryOp(expm1, "[-10.0, 0.0, 0.1, 0.00000001, 0.5, 1.0]",
+ "[-0.9999546000702375, 0.0, "
+ "0.10517091807564763, 0.000000010000000050000001, "
+ "0.6487212707001282, 1.718281828459045]");
+ this->AssertUnaryOp(expm1, 1.3F, 2.6692964926535487F);
+ this->AssertUnaryOp(expm1, this->MakeScalar(1.3F),
+ this->MakeScalar(2.6692964926535487F));
+ // Arrays with infinites
+ this->AssertUnaryOp(expm1, "[-Inf, Inf]", "[-1, Inf]");
+ // Arrays with NaNs
+ this->SetNansEqual(true);
+ this->AssertUnaryOp(expm1, "[NaN]", "[NaN]");
+ // Min/max
+ this->AssertUnaryOp(expm1, min, -1.0);
+ this->AssertUnaryOp(expm1, max, std::numeric_limits<CType>::infinity());
+}
+
+TEST_F(TestUnaryArithmeticDecimal, Expm1) {
+ auto max128 = Decimal128::GetMaxValue(38);
+ auto max256 = Decimal256::GetMaxValue(76);
+ const auto func = "expm1";
+ for (const auto& ty : PositiveScaleTypes()) {
+ CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(float64(),
"[]"));
+ CheckScalar(
+ func, {ArrayFromJSON(ty, R"(["-1.00", "0.00", "0.10", "0.01", "10.00",
null])")},
+ ArrayFromJSON(float64(),
+ "[-0.6321205588285577, 0.0, "
+ "0.10517091807564763, 0.010050167084168058, "
+ "22025.465794806718, null]"));
+ }
+ CheckScalar(func, {std::make_shared<Decimal128Scalar>(max128, decimal128(38,
0))},
+ ScalarFromJSON(float64(), "Inf"));
+ CheckScalar(func, {std::make_shared<Decimal128Scalar>(-max128,
decimal128(38, 0))},
+ ScalarFromJSON(float64(), "-1.0"));
+ CheckScalar(func, {std::make_shared<Decimal256Scalar>(max256, decimal256(76,
0))},
+ ScalarFromJSON(float64(), "Inf"));
+ CheckScalar(func, {std::make_shared<Decimal256Scalar>(-max256,
decimal256(76, 0))},
+ ScalarFromJSON(float64(), "-1.0"));
+ for (const auto& ty : NegativeScaleTypes()) {
+ CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(float64(),
"[]"));
+ CheckScalar(func, {DecimalArrayFromJSON(ty, R"(["12E2", "0", "-42E2",
null])")},
+ ArrayFromJSON(float64(), "[Inf, 0.0, -1.0, null]"));
+ }
+}
+
TEST_F(TestUnaryArithmeticDecimal, Log) {
std::vector<std::string> unchecked = {"ln", "log2", "log10", "log1p"};
std::vector<std::string> checked = {"ln_checked", "log2_checked",
"log10_checked",
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 093b160d8e..3c264fb476 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -476,6 +476,8 @@ Mixed time resolution temporal inputs will be cast to
finest input resolution.
+------------------+--------+-------------------------+---------------------------+-------+
| exp | Unary | Numeric | Float32/Float64
| |
+------------------+--------+-------------------------+---------------------------+-------+
+| expm1 | Unary | Numeric | Float32/Float64
| |
++------------------+--------+-------------------------+---------------------------+-------+
| multiply | Binary | Numeric/Temporal | Numeric/Temporal
| \(1) |
+------------------+--------+-------------------------+---------------------------+-------+
| multiply_checked | Binary | Numeric/Temporal | Numeric/Temporal
| \(1) |