pitrou commented on code in PR #44184:
URL: https://github.com/apache/arrow/pull/44184#discussion_r2137063623
##########
cpp/src/arrow/compute/kernels/codegen_internal.cc:
##########
@@ -528,6 +533,28 @@ Status CastDecimalArgs(TypeHolder* begin, size_t count) {
return Status::OK();
}
+Result<std::shared_ptr<DataType>> WidenDecimalToMaxPrecision(
+ std::shared_ptr<DataType> type) {
+ DCHECK(is_decimal(type->id()));
Review Comment:
Error checking here is weird. Why are we using asserts if we're also
returning a `Result` in case of error? We should do one or the other.
##########
cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc:
##########
@@ -259,8 +271,11 @@ struct GroupedReducingFactory {
// Sum implementation
template <typename Type>
-struct GroupedSumImpl : public GroupedReducingAggregator<Type,
GroupedSumImpl<Type>> {
- using Base = GroupedReducingAggregator<Type, GroupedSumImpl<Type>>;
+struct GroupedSumImpl
+ : public GroupedReducingAggregator<Type, GroupedSumImpl<Type>,
+ typename
FindAccumulatorType<Type>::Type> {
Review Comment:
I'm curious, why was this change necessary?
##########
docs/source/cpp/compute.rst:
##########
@@ -171,8 +171,8 @@ To avoid exhaustively listing supported types, the tables
below use a number
of general type categories:
* "Numeric": Integer types (Int8, etc.) and Floating-point types (Float32,
- Float64, sometimes Float16). Some functions also accept Decimal128 and
- Decimal256 input.
+ Float64, sometimes Float16). Some functions also accept
+ Decimal128/256 input.
Review Comment:
More and more functions now also accept decimal32/decimal64.
```suggestion
Float64, sometimes Float16). Some functions also accept Decimal input.
```
##########
python/pyarrow/tests/test_compute.py:
##########
@@ -355,10 +355,55 @@ def test_sum_array(arrow_type):
arr = pa.array([], type=arrow_type)
assert arr.sum().as_py() is None # noqa: E711
+ assert pc.sum(arr).as_py() is None # noqa: E711
Review Comment:
You don't have to test both `arr.sum()` and `pc.sum(arr)` everytime since
the former is just calling the latter. This will make the tests more readable
and easier to maintain.
##########
cpp/src/arrow/compute/kernels/aggregate_test.cc:
##########
@@ -495,51 +495,67 @@ TEST_F(TestSumKernelRoundOff, Basics) {
}
TEST(TestDecimalSumKernel, SimpleSum) {
- for (const auto& ty : {decimal128(3, 2), decimal256(3, 2)}) {
+ std::vector<std::shared_ptr<DataType>> init_types = {decimal128(3, 2),
+ decimal256(3, 2)};
+ std::vector<std::shared_ptr<DataType>> out_types = {decimal128(38, 2),
+ decimal256(76, 2)};
+
+ for (size_t i = 0; i < init_types.size(); ++i) {
+ auto& ty = init_types[i];
+ auto& out_ty = out_types[i];
Review Comment:
Nit: make those const
```suggestion
const auto& ty = init_types[i];
const auto& out_ty = out_types[i];
```
##########
cpp/src/arrow/compute/kernels/aggregate_test.cc:
##########
@@ -495,51 +495,67 @@ TEST_F(TestSumKernelRoundOff, Basics) {
}
TEST(TestDecimalSumKernel, SimpleSum) {
- for (const auto& ty : {decimal128(3, 2), decimal256(3, 2)}) {
+ std::vector<std::shared_ptr<DataType>> init_types = {decimal128(3, 2),
+ decimal256(3, 2)};
+ std::vector<std::shared_ptr<DataType>> out_types = {decimal128(38, 2),
+ decimal256(76, 2)};
+
+ for (size_t i = 0; i < init_types.size(); ++i) {
+ auto& ty = init_types[i];
+ auto& out_ty = out_types[i];
+
EXPECT_THAT(Sum(ArrayFromJSON(ty, R"([])")),
- ResultWith(ScalarFromJSON(ty, R"(null)")));
+ ResultWith(ScalarFromJSON(out_ty, R"(null)")));
EXPECT_THAT(Sum(ArrayFromJSON(ty, R"([null])")),
- ResultWith(ScalarFromJSON(ty, R"(null)")));
+ ResultWith(ScalarFromJSON(out_ty, R"(null)")));
EXPECT_THAT(
Sum(ArrayFromJSON(ty, R"(["0.00", "1.01", "2.02", "3.03", "4.04",
"5.05"])")),
- ResultWith(ScalarFromJSON(ty, R"("15.15")")));
+ ResultWith(ScalarFromJSON(out_ty, R"("15.15")")));
Datum chunks =
ChunkedArrayFromJSON(ty, {R"(["0.00", "1.01", "2.02", "3.03", "4.04",
"5.05"])"});
- EXPECT_THAT(Sum(chunks), ResultWith(ScalarFromJSON(ty, R"("15.15")")));
+ EXPECT_THAT(Sum(chunks), ResultWith(ScalarFromJSON(out_ty, R"("15.15")")));
chunks = ChunkedArrayFromJSON(
ty, {R"(["0.00", "1.01", "2.02"])", R"(["3.03", "4.04", "5.05"])"});
- EXPECT_THAT(Sum(chunks), ResultWith(ScalarFromJSON(ty, R"("15.15")")));
+ EXPECT_THAT(Sum(chunks), ResultWith(ScalarFromJSON(out_ty, R"("15.15")")));
chunks = ChunkedArrayFromJSON(
ty, {R"(["0.00", "1.01", "2.02"])", "[]", R"(["3.03", "4.04",
"5.05"])"});
- EXPECT_THAT(Sum(chunks), ResultWith(ScalarFromJSON(ty, R"("15.15")")));
+ EXPECT_THAT(Sum(chunks), ResultWith(ScalarFromJSON(out_ty, R"("15.15")")));
ScalarAggregateOptions options(/*skip_nulls=*/true, /*min_count=*/0);
EXPECT_THAT(Sum(ArrayFromJSON(ty, R"([])"), options),
- ResultWith(ScalarFromJSON(ty, R"("0.00")")));
+ ResultWith(ScalarFromJSON(out_ty, R"("0.00")")));
EXPECT_THAT(Sum(ArrayFromJSON(ty, R"([null])"), options),
- ResultWith(ScalarFromJSON(ty, R"("0.00")")));
+ ResultWith(ScalarFromJSON(out_ty, R"("0.00")")));
chunks = ChunkedArrayFromJSON(ty, {});
- EXPECT_THAT(Sum(chunks, options), ResultWith(ScalarFromJSON(ty,
R"("0.00")")));
+ EXPECT_THAT(Sum(chunks, options), ResultWith(ScalarFromJSON(out_ty,
R"("0.00")")));
EXPECT_THAT(
Sum(ArrayFromJSON(ty, R"(["1.01", null, "3.03", null, "5.05", null,
"7.07"])"),
options),
- ResultWith(ScalarFromJSON(ty, R"("16.16")")));
+ ResultWith(ScalarFromJSON(out_ty, R"("16.16")")));
EXPECT_THAT(Sum(ScalarFromJSON(ty, R"("5.05")")),
- ResultWith(ScalarFromJSON(ty, R"("5.05")")));
+ ResultWith(ScalarFromJSON(out_ty, R"("5.05")")));
EXPECT_THAT(Sum(ScalarFromJSON(ty, R"(null)")),
- ResultWith(ScalarFromJSON(ty, R"(null)")));
+ ResultWith(ScalarFromJSON(out_ty, R"(null)")));
EXPECT_THAT(Sum(ScalarFromJSON(ty, R"(null)"), options),
- ResultWith(ScalarFromJSON(ty, R"("0.00")")));
+ ResultWith(ScalarFromJSON(out_ty, R"("0.00")")));
Review Comment:
Ok, but can we also test the effects of the added precision? i.e. test a
case that would fail with the previous behavior of not increasing precision.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]