This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 6e6e6f0340 GH-36931: [C++] Add cumulative_mean function (#36932)
6e6e6f0340 is described below
commit 6e6e6f0340672ed49fb8e7cddf7bc47f2ca360dd
Author: Jin Shang <[email protected]>
AuthorDate: Wed Aug 9 23:11:15 2023 +0800
GH-36931: [C++] Add cumulative_mean function (#36932)
### Rationale for this change
Add `cumulative_mean` function
### What changes are included in this PR?
Implement `cumulative_mean` function. The current cumulative_* kernel
generator can only be based on a simple binary arithmetic op and the state can
only be a single value. I refactored it to using of a generic state such that
it can handle complex operations such as `mean`, `median`, `var` etc.
### Are these changes tested?
Yes
### Are there any user-facing changes?
No
* Closes: #36931
Lead-authored-by: Jin Shang <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/compute/api_vector.cc | 5 +
cpp/src/arrow/compute/api_vector.h | 11 ++
.../arrow/compute/kernels/vector_cumulative_ops.cc | 195 +++++++++++++++------
.../compute/kernels/vector_cumulative_ops_test.cc | 100 ++++++++++-
docs/source/cpp/compute.rst | 34 ++--
5 files changed, 268 insertions(+), 77 deletions(-)
diff --git a/cpp/src/arrow/compute/api_vector.cc
b/cpp/src/arrow/compute/api_vector.cc
index f73b10e11e..d47ee42ebf 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -417,5 +417,10 @@ Result<Datum> CumulativeMin(const Datum& values, const
CumulativeOptions& option
return CallFunction("cumulative_min", {Datum(values)}, &options, ctx);
}
+Result<Datum> CumulativeMean(const Datum& values, const CumulativeOptions&
options,
+ ExecContext* ctx) {
+ return CallFunction("cumulative_mean", {Datum(values)}, &options, ctx);
+}
+
} // namespace compute
} // namespace arrow
diff --git a/cpp/src/arrow/compute/api_vector.h
b/cpp/src/arrow/compute/api_vector.h
index 4f226ac007..0233090ef6 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -226,6 +226,7 @@ class ARROW_EXPORT CumulativeOptions : public
FunctionOptions {
/// - prod: 1
/// - min: maximum of the input type
/// - max: minimum of the input type
+ /// - mean: start is ignored because it has no meaning for mean
std::optional<std::shared_ptr<Scalar>> start;
/// If true, nulls in the input are ignored and produce a corresponding null
output.
@@ -661,6 +662,16 @@ Result<Datum> CumulativeMin(
const Datum& values, const CumulativeOptions& options =
CumulativeOptions::Defaults(),
ExecContext* ctx = NULLPTR);
+/// \brief Compute the cumulative mean of an array-like object
+///
+/// \param[in] values array-like input
+/// \param[in] options configures cumulative mean behavior, `start` is ignored
+/// \param[in] ctx the function execution context, optional
+ARROW_EXPORT
+Result<Datum> CumulativeMean(
+ const Datum& values, const CumulativeOptions& options =
CumulativeOptions::Defaults(),
+ ExecContext* ctx = NULLPTR);
+
/// \brief Return the first order difference of an array.
///
/// Computes the first order difference of an array, i.e.
diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
index 82caa3bff5..86d2679486 100644
--- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
+++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
@@ -25,12 +25,11 @@
#include "arrow/compute/kernels/codegen_internal.h"
#include "arrow/compute/kernels/common_internal.h"
#include "arrow/result.h"
+#include "arrow/type_traits.h"
#include "arrow/util/bit_util.h"
#include "arrow/visit_type_inline.h"
-namespace arrow {
-namespace compute {
-namespace internal {
+namespace arrow::compute::internal {
namespace {
@@ -63,19 +62,60 @@ struct CumulativeOptionsWrapper : public
OptionsWrapper<OptionsType> {
}
};
-// The driver kernel for all cumulative compute functions. Op is a compute
kernel
-// representing any binary associative operation with an identity element
(add, product,
-// min, max, etc.), i.e. ones that form a monoid, and OptionsType the options
type
-// corresponding to Op. ArgType and OutType are the input and output types,
which will
+// The cumulative value is computed based on a simple arithmetic binary op
+// such as Add, Mul, Min, Max, etc.
+template <typename Op, typename ArgType>
+struct CumulativeBinaryOp {
+ using OutType = ArgType;
+ using OutValue = typename GetOutputType<OutType>::T;
+ using ArgValue = typename GetViewType<ArgType>::T;
+
+ OutValue current_value;
+
+ CumulativeBinaryOp() { current_value = Identity<Op>::template
value<OutValue>; }
+
+ explicit CumulativeBinaryOp(const std::shared_ptr<Scalar> start) {
+ current_value = UnboxScalar<OutType>::Unbox(*start);
+ }
+
+ OutValue Call(KernelContext* ctx, ArgValue arg, Status* st) {
+ current_value =
+ Op::template Call<OutValue, ArgValue, ArgValue>(ctx, arg,
current_value, st);
+ return current_value;
+ }
+};
+
+template <typename ArgType>
+struct CumulativeMean {
+ using OutType = DoubleType;
+ using ArgValue = typename GetViewType<ArgType>::T;
+ int64_t count = 0;
+ double sum = 0;
+
+ CumulativeMean() = default;
+
+ // start value is ignored for CumulativeMean
+ explicit CumulativeMean(const std::shared_ptr<Scalar> start) {}
+
+ double Call(KernelContext* ctx, ArgValue arg, Status* st) {
+ sum += static_cast<double>(arg);
+ ++count;
+ return sum / count;
+ }
+};
+
+// The driver kernel for all cumulative compute functions.
+// ArgType and OutType are the input and output types, which will
// normally be the same (e.g. the cumulative sum of an array of Int64Type will
result in
-// an array of Int64Type).
-template <typename OutType, typename ArgType, typename Op, typename
OptionsType>
+// an array of Int64Type) with the exception of CumulativeMean, which will
always return
+// a double.
+template <typename ArgType, typename CumulativeState>
struct Accumulator {
- using OutValue = typename GetOutputType<OutType>::T;
+ using OutType = typename CumulativeState::OutType;
using ArgValue = typename GetViewType<ArgType>::T;
KernelContext* ctx;
- ArgValue current_value;
+ CumulativeState current_state;
bool skip_nulls;
bool encountered_null = false;
NumericBuilder<OutType> builder;
@@ -88,11 +128,7 @@ struct Accumulator {
if (skip_nulls || (input.GetNullCount() == 0 && !encountered_null)) {
VisitArrayValuesInline<ArgType>(
input,
- [&](ArgValue v) {
- current_value = Op::template Call<OutValue, ArgValue, ArgValue>(
- ctx, v, current_value, &st);
- builder.UnsafeAppend(current_value);
- },
+ [&](ArgValue v) { builder.UnsafeAppend(current_state.Call(ctx, v,
&st)); },
[&]() { builder.UnsafeAppendNull(); });
} else {
int64_t nulls_start_idx = 0;
@@ -100,9 +136,7 @@ struct Accumulator {
input,
[&](ArgValue v) {
if (!encountered_null) {
- current_value = Op::template Call<OutValue, ArgValue, ArgValue>(
- ctx, v, current_value, &st);
- builder.UnsafeAppend(current_value);
+ builder.UnsafeAppend(current_state.Call(ctx, v, &st));
++nulls_start_idx;
}
},
@@ -115,16 +149,17 @@ struct Accumulator {
}
};
-template <typename OutType, typename ArgType, typename Op, typename
OptionsType>
+template <typename ArgType, typename CumulativeState, typename OptionsType>
struct CumulativeKernel {
+ using OutType = typename CumulativeState::OutType;
using OutValue = typename GetOutputType<OutType>::T;
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
const auto& options = CumulativeOptionsWrapper<OptionsType>::Get(ctx);
- Accumulator<OutType, ArgType, Op, OptionsType> accumulator(ctx);
+ Accumulator<ArgType, CumulativeState> accumulator(ctx);
if (options.start.has_value()) {
- accumulator.current_value =
UnboxScalar<OutType>::Unbox(*(options.start.value()));
+ accumulator.current_state = CumulativeState(options.start.value());
} else {
- accumulator.current_value = Identity<Op>::template value<OutValue>;
+ accumulator.current_state = CumulativeState();
}
accumulator.skip_nulls = options.skip_nulls;
@@ -138,16 +173,17 @@ struct CumulativeKernel {
}
};
-template <typename OutType, typename ArgType, typename Op, typename
OptionsType>
+template <typename ArgType, typename CumulativeState, typename OptionsType>
struct CumulativeKernelChunked {
+ using OutType = typename CumulativeState::OutType;
using OutValue = typename GetOutputType<OutType>::T;
static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const auto& options = CumulativeOptionsWrapper<OptionsType>::Get(ctx);
- Accumulator<OutType, ArgType, Op, OptionsType> accumulator(ctx);
+ Accumulator<ArgType, CumulativeState> accumulator(ctx);
if (options.start.has_value()) {
- accumulator.current_value =
UnboxScalar<OutType>::Unbox(*(options.start.value()));
+ accumulator.current_state = CumulativeState(options.start.value());
} else {
- accumulator.current_value = Identity<Op>::template value<OutValue>;
+ accumulator.current_state = CumulativeState();
}
accumulator.skip_nulls = options.skip_nulls;
@@ -217,11 +253,52 @@ const FunctionDoc cumulative_min_doc{
"start as the new minimum)."),
{"values"},
"CumulativeOptions"};
-} // namespace
-template <typename Op, typename OptionsType>
-void MakeVectorCumulativeFunction(FunctionRegistry* registry, const
std::string func_name,
- const FunctionDoc doc) {
+const FunctionDoc cumulative_mean_doc{
+ "Compute the cumulative mean over a numeric input",
+ ("`values` must be numeric. Return an array/chunked array which is the\n"
+ "cumulative mean computed over `values`. CumulativeOptions::start_value
is \n"
+ "ignored."),
+ {"values"},
+ "CumulativeOptions"};
+
+// Kernel factory for complex stateful computations.
+template <template <typename ArgType> typename State, typename OptionsType>
+struct CumulativeStatefulKernelFactory {
+ VectorKernel kernel;
+
+ CumulativeStatefulKernelFactory() {
+ kernel.can_execute_chunkwise = false;
+ kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+ kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE;
+ kernel.init = CumulativeOptionsWrapper<OptionsType>::Init;
+ }
+
+ template <typename Type>
+ enable_if_number<Type, Status> Visit(const Type& type) {
+ kernel.signature = KernelSignature::Make(
+ {type.GetSharedPtr()},
+ OutputType(TypeTraits<typename
State<Type>::OutType>::type_singleton()));
+ kernel.exec = CumulativeKernel<Type, State<Type>, OptionsType>::Exec;
+ kernel.exec_chunked = CumulativeKernelChunked<Type, State<Type>,
OptionsType>::Exec;
+ return arrow::Status::OK();
+ }
+
+ Status Visit(const DataType& type) {
+ return Status::NotImplemented("Cumulative kernel not implemented for type
",
+ type.ToString());
+ }
+
+ Result<VectorKernel> Make(const DataType& type) {
+ RETURN_NOT_OK(VisitTypeInline(type, this));
+ return kernel;
+ }
+};
+
+template <template <typename ArgType> typename State, typename OptionsType>
+void MakeVectorCumulativeStatefulFunction(FunctionRegistry* registry,
+ const std::string func_name,
+ const FunctionDoc doc) {
static const OptionsType kDefaultOptions = OptionsType::Defaults();
auto func =
std::make_shared<VectorFunction>(func_name, Arity::Unary(), doc,
&kDefaultOptions);
@@ -229,41 +306,49 @@ void MakeVectorCumulativeFunction(FunctionRegistry*
registry, const std::string
std::vector<std::shared_ptr<DataType>> types;
types.insert(types.end(), NumericTypes().begin(), NumericTypes().end());
+ CumulativeStatefulKernelFactory<State, OptionsType> kernel_factory;
for (const auto& ty : types) {
- VectorKernel kernel;
- kernel.can_execute_chunkwise = false;
- kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
- kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE;
- kernel.signature = KernelSignature::Make({ty}, OutputType(ty));
- kernel.exec =
- ArithmeticExecFromOp<CumulativeKernel, Op, ArrayKernelExec,
OptionsType>(ty);
- kernel.exec_chunked =
- ArithmeticExecFromOp<CumulativeKernelChunked, Op,
VectorKernel::ChunkedExec,
- OptionsType>(ty);
- kernel.init = CumulativeOptionsWrapper<OptionsType>::Init;
+ auto kernel = kernel_factory.Make(*ty).ValueOrDie();
DCHECK_OK(func->AddKernel(std::move(kernel)));
}
DCHECK_OK(registry->AddFunction(std::move(func)));
}
+// A kernel factory that forwards to CumulativeBinaryOp<Op, ...> for the given
type.
+// Need to use a struct because template-using declarations cannot appear in
+// function scope.
+template <typename Op, typename OptionsType>
+struct MakeVectorCumulativeBinaryOpFunction {
+ template <typename ArgType>
+ using State = CumulativeBinaryOp<Op, ArgType>;
+
+ static void Call(FunctionRegistry* registry, std::string func_name,
FunctionDoc doc) {
+ MakeVectorCumulativeStatefulFunction<State, OptionsType>(
+ registry, std::move(func_name), std::move(doc));
+ }
+};
+
+} // namespace
+
void RegisterVectorCumulativeSum(FunctionRegistry* registry) {
- MakeVectorCumulativeFunction<Add, CumulativeOptions>(registry,
"cumulative_sum",
- cumulative_sum_doc);
- MakeVectorCumulativeFunction<AddChecked, CumulativeOptions>(
+ MakeVectorCumulativeBinaryOpFunction<Add, CumulativeOptions>::Call(
+ registry, "cumulative_sum", cumulative_sum_doc);
+ MakeVectorCumulativeBinaryOpFunction<AddChecked, CumulativeOptions>::Call(
registry, "cumulative_sum_checked", cumulative_sum_checked_doc);
- MakeVectorCumulativeFunction<Multiply, CumulativeOptions>(registry,
"cumulative_prod",
-
cumulative_prod_doc);
- MakeVectorCumulativeFunction<MultiplyChecked, CumulativeOptions>(
+ MakeVectorCumulativeBinaryOpFunction<Multiply, CumulativeOptions>::Call(
+ registry, "cumulative_prod", cumulative_prod_doc);
+ MakeVectorCumulativeBinaryOpFunction<MultiplyChecked,
CumulativeOptions>::Call(
registry, "cumulative_prod_checked", cumulative_prod_checked_doc);
- MakeVectorCumulativeFunction<Min, CumulativeOptions>(registry,
"cumulative_min",
- cumulative_min_doc);
- MakeVectorCumulativeFunction<Max, CumulativeOptions>(registry,
"cumulative_max",
- cumulative_max_doc);
+ MakeVectorCumulativeBinaryOpFunction<Min, CumulativeOptions>::Call(
+ registry, "cumulative_min", cumulative_min_doc);
+ MakeVectorCumulativeBinaryOpFunction<Max, CumulativeOptions>::Call(
+ registry, "cumulative_max", cumulative_max_doc);
+
+ MakeVectorCumulativeStatefulFunction<CumulativeMean, CumulativeOptions>(
+ registry, "cumulative_mean", cumulative_max_doc);
}
-} // namespace internal
-} // namespace compute
-} // namespace arrow
+} // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
index 4ff46eb4ac..6760be26fc 100644
--- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
@@ -37,19 +37,23 @@
namespace arrow {
namespace compute {
-constexpr static std::array<const char*, 6> kCumulativeFunctionNames{
+static const std::vector<std::string> kCumulativeFunctionNames{
"cumulative_sum", "cumulative_sum_checked", "cumulative_prod",
- "cumulative_prod_checked", "cumulative_min", "cumulative_max"};
+ "cumulative_prod_checked", "cumulative_min", "cumulative_max",
+ "cumulative_mean"};
TEST(TestCumulative, Empty) {
for (auto function : kCumulativeFunctionNames) {
CumulativeOptions options;
for (auto ty : NumericTypes()) {
+ auto return_ty = std::string(function) == "cumulative_mean" ? float64()
: ty;
auto empty_arr = ArrayFromJSON(ty, "[]");
+ auto expected_arr = ArrayFromJSON(return_ty, "[]");
auto empty_chunked = ChunkedArrayFromJSON(ty, {"[]"});
- CheckVectorUnary(function, empty_arr, empty_arr, &options);
+ auto expected_chunked = ChunkedArrayFromJSON(return_ty, {"[]"});
+ CheckVectorUnary(function, empty_arr, expected_arr, &options);
- CheckVectorUnary(function, empty_chunked, empty_chunked, &options);
+ CheckVectorUnary(function, empty_chunked, expected_chunked, &options);
}
}
}
@@ -58,14 +62,19 @@ TEST(TestCumulative, AllNulls) {
for (auto function : kCumulativeFunctionNames) {
CumulativeOptions options;
for (auto ty : NumericTypes()) {
+ auto return_ty = std::string(function) == "cumulative_mean" ? float64()
: ty;
auto nulls_arr = ArrayFromJSON(ty, "[null, null, null]");
+ auto expected_arr = ArrayFromJSON(return_ty, "[null, null, null]");
auto nulls_one_chunk = ChunkedArrayFromJSON(ty, {"[null, null, null]"});
+ auto expected_one_chunk = ChunkedArrayFromJSON(return_ty, {"[null, null,
null]"});
auto nulls_three_chunks = ChunkedArrayFromJSON(ty, {"[null]", "[null]",
"[null]"});
- CheckVectorUnary(function, nulls_arr, nulls_arr, &options);
+ auto expected_three_chunks =
+ ChunkedArrayFromJSON(return_ty, {"[null]", "[null]", "[null]"});
+ CheckVectorUnary(function, nulls_arr, expected_arr, &options);
- CheckVectorUnary(function, nulls_one_chunk, nulls_one_chunk, &options);
+ CheckVectorUnary(function, nulls_one_chunk, expected_one_chunk,
&options);
- CheckVectorUnary(function, nulls_three_chunks, nulls_one_chunk,
&options);
+ CheckVectorUnary(function, nulls_three_chunks, expected_one_chunk,
&options);
}
}
}
@@ -810,6 +819,72 @@ TEST(TestCumulativeMin, NoStartDoSkip) {
}
}
+TEST(TestCumulativeMean, NoSkip) {
+ CumulativeOptions options(false);
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_mean", ArrayFromJSON(ty, "[5, 6, 4, 2, 3,
1]"),
+ ArrayFromJSON(float64(), "[5, 5.5, 5, 4.25, 4, 3.5]"),
&options);
+
+ CheckVectorUnary("cumulative_mean", ArrayFromJSON(ty, "[5, 6, null, 2,
null, 1]"),
+ ArrayFromJSON(float64(), "[5, 5.5, null, null, null,
null]"),
+ &options);
+
+ CheckVectorUnary("cumulative_mean", ArrayFromJSON(ty, "[null, 6, null, 2,
null, 1]"),
+ ArrayFromJSON(float64(), "[null, null, null, null, null,
null]"),
+ &options);
+
+ CheckVectorUnary(
+ "cumulative_mean", ChunkedArrayFromJSON(ty, {"[5, 6, 4]", "[2, 3,
1]"}),
+ ChunkedArrayFromJSON(float64(), {"[5, 5.5, 5, 4.25, 4, 3.5]"}),
&options);
+
+ CheckVectorUnary(
+ "cumulative_mean", ChunkedArrayFromJSON(ty, {"[5, 6, null]", "[2,
null, 1]"}),
+ ChunkedArrayFromJSON(float64(), {"[5, 5.5, null, null, null, null]"}),
&options);
+
+ CheckVectorUnary(
+ "cumulative_mean", ChunkedArrayFromJSON(ty, {"[null, 6, null]", "[2,
null, 1]"}),
+ ChunkedArrayFromJSON(float64(), {"[null, null, null, null, null,
null]"}),
+ &options);
+ }
+}
+
+TEST(TestCumulativeMean, DoSkip) {
+ CumulativeOptions options(true);
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_mean", ArrayFromJSON(ty, "[5, 6, 4, 2, 3,
1]"),
+ ArrayFromJSON(float64(), "[5, 5.5, 5, 4.25, 4, 3.5]"),
&options);
+
+ CheckVectorUnary(
+ "cumulative_mean", ArrayFromJSON(ty, "[5, 6, null, 2, null, 1]"),
+ ArrayFromJSON(float64(), "[5, 5.5, null, 4.333333333333333, null,
3.5]"),
+ &options);
+
+ CheckVectorUnary("cumulative_mean", ArrayFromJSON(ty, "[null, 6, null, 2,
null, 1]"),
+ ArrayFromJSON(float64(), "[null, 6, null, 4, null, 3]"),
&options);
+
+ CheckVectorUnary(
+ "cumulative_mean", ChunkedArrayFromJSON(ty, {"[5, 6, 4]", "[2, 3,
1]"}),
+ ChunkedArrayFromJSON(float64(), {"[5, 5.5, 5, 4.25, 4, 3.5]"}),
&options);
+
+ CheckVectorUnary(
+ "cumulative_mean", ChunkedArrayFromJSON(ty, {"[5, 6, null]", "[2,
null, 1]"}),
+ ChunkedArrayFromJSON(float64(), {"[5, 5.5, null, 4.333333333333333,
null, 3.5]"}),
+ &options);
+
+ CheckVectorUnary(
+ "cumulative_mean", ChunkedArrayFromJSON(ty, {"[null, 6, null]", "[2,
null, 1]"}),
+ ChunkedArrayFromJSON(float64(), {"[null, 6, null, 4, null, 3]"}),
&options);
+ }
+}
+
+TEST(TestCumulativeMean, StartValue) {
+ CumulativeOptions options(3, true); // start should be ignored
+ for (auto ty : NumericTypes()) {
+ CheckVectorUnary("cumulative_mean", ArrayFromJSON(ty, "[5, 6, 4, 2, 3,
1]"),
+ ArrayFromJSON(float64(), "[5, 5.5, 5, 4.25, 4, 3.5]"),
&options);
+ }
+}
+
TEST(TestCumulativeSum, ConvenienceFunctionCheckOverflow) {
ASSERT_ARRAYS_EQUAL(*CumulativeSum(ArrayFromJSON(int8(), "[127, 1]"),
CumulativeOptions::Defaults(), false)
@@ -846,6 +921,13 @@ TEST(TestCumulativeMin, ConvenienceFunction) {
*ArrayFromJSON(int8(), "[-1, -2, -3]"));
}
+TEST(TestCumulativeMean, ConvenienceFunction) {
+ ASSERT_ARRAYS_EQUAL(*CumulativeMean(ArrayFromJSON(int8(), "[-1, -2, -3]"),
+ CumulativeOptions::Defaults())
+ ->make_array(),
+ *ArrayFromJSON(float64(), "[-1, -1.5, -2]"));
+}
+
TEST(TestCumulative, NaN) {
// addition with NaN is always NaN
CheckVectorUnary("cumulative_sum", ArrayFromJSON(float64(), "[1, 2, NaN, 4,
5]"),
@@ -862,6 +944,10 @@ TEST(TestCumulative, NaN) {
// min with NaN is always ignored because Nan < a always returns false
CheckVectorUnary("cumulative_min", ArrayFromJSON(float64(), "[5, 4, NaN, 2,
1]"),
ArrayFromJSON(float64(), "[5, 4, 4, 2, 1]"));
+
+ // mean with NaN is always Nan
+ CheckVectorUnary("cumulative_mean", ArrayFromJSON(float64(), "[5, 4, NaN, 2,
1]"),
+ ArrayFromJSON(float64(), "[5, 4.5, NaN, NaN, NaN]"));
}
} // namespace compute
} // namespace arrow
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 55e2958812..f8e3713146 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -1621,21 +1621,23 @@ do not detect overflow. They are alsoavailable in an
overflow-checking variant,
suffixed ``_checked``, which returns an ``Invalid`` :class:`Status` when
overflow is detected.
-+------------------------+-------+-------------+-------------+--------------------------------+-------+
-| Function name | Arity | Input types | Output type | Options class
| Notes |
-+=========================+=======+=============+=============+================================+=======+
-| cumulative_sum | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
-+-------------------------+-------+-------------+-------------+--------------------------------+-------+
-| cumulative_sum_checked | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
-+-------------------------+-------+-------------+-------------+--------------------------------+-------+
-| cumulative_prod | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
-+-------------------------+-------+-------------+-------------+--------------------------------+-------+
-| cumulative_prod_checked | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
-+-------------------------+-------+-------------+-------------+--------------------------------+-------+
-| cumulative_max | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
-+-------------------------+-------+-------------+-------------+--------------------------------+-------+
-| cumulative_min | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
-+-------------------------+-------+-------------+-------------+--------------------------------+-------+
++-------------------------+-------+-------------+-------------+--------------------------------+-----------+
+| Function name | Arity | Input types | Output type | Options class
| Notes |
++=========================+=======+=============+=============+================================+===========+
+| cumulative_sum | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-----------+
+| cumulative_sum_checked | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-----------+
+| cumulative_prod | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-----------+
+| cumulative_prod_checked | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-----------+
+| cumulative_max | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-----------+
+| cumulative_min | Unary | Numeric | Numeric |
:struct:`CumulativeOptions` | \(1) |
++-------------------------+-------+-------------+-------------+--------------------------------+-----------+
+| cumulative_mean | Unary | Numeric | Float64 |
:struct:`CumulativeOptions` | \(1) \(2) |
++-------------------------+-------+-------------+-------------+--------------------------------+-----------+
* \(1) CumulativeOptions has two optional parameters. The first parameter
:member:`CumulativeOptions::start` is a starting value for the running
@@ -1647,6 +1649,8 @@ overflow is detected.
true, each null in the input produces a corresponding null in the output and
doesn't affect the accumulation forward.
+* \(2) :member:`CumulativeOptions::start` is ignored.
+
Associative transforms
~~~~~~~~~~~~~~~~~~~~~~