pitrou commented on a change in pull request #8637:
URL: https://github.com/apache/arrow/pull/8637#discussion_r521255222
##
File path: docs/source/cpp/compute.rst
##
@@ -140,7 +140,7 @@ Aggregations
+--+++---++
| min_max | Unary | Numeric| Scalar Struct
(1)| :struct:`MinMaxOptions`|
+--+++---++
-| mode | Unary | Numeric| Scalar Struct
(2)||
+| mode | Unary | Numeric| Scalar Struct
(2)| :struct:`ModeOptions` |
Review comment:
Replace "Scalar Struct" with "Struct".
##
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##
@@ -735,71 +735,63 @@ class TestPrimitiveModeKernel : public ::testing::Test {
public:
using ArrowType = T;
using Traits = TypeTraits;
- using c_type = typename ArrowType::c_type;
- using ModeType = typename Traits::ScalarType;
- using CountType = typename TypeTraits::ScalarType;
-
- void AssertModeIs(const Datum& array, c_type expected_mode, int64_t
expected_count) {
-ASSERT_OK_AND_ASSIGN(Datum out, Mode(array));
-const StructScalar& value = out.scalar_as();
+ using CType = typename ArrowType::c_type;
+
+ void AssertModesAre(const Datum& array, const int n,
+ const std::vector& expected_modes,
+ const std::vector& expected_counts) {
+ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, ModeOptions{n}));
+const StructArray out_array(out.array());
+ASSERT_EQ(out_array.length(), expected_modes.size());
+ASSERT_EQ(out_array.num_fields(), 2);
+
+const CType* out_modes = out_array.field(0)->data()->GetValues(1);
+const int64_t* out_counts =
out_array.field(1)->data()->GetValues(1);
+for (int i = 0; i < out_array.length(); ++i) {
+ // equal or nan equal
+ ASSERT_TRUE(
+ (expected_modes[i] == out_modes[i]) ||
+ (expected_modes[i] != expected_modes[i] && out_modes[i] !=
out_modes[i]));
+ ASSERT_EQ(expected_counts[i], out_counts[i]);
+}
+ }
-const auto& out_mode = checked_cast(*value.value[0]);
-ASSERT_EQ(expected_mode, out_mode.value);
+ void AssertModesAre(const std::string& json, const int n,
+ const std::vector& expected_modes,
+ const std::vector& expected_counts) {
+auto array = ArrayFromJSON(type_singleton(), json);
+AssertModesAre(array, n, expected_modes, expected_counts);
+ }
-const auto& out_count = checked_cast(*value.value[1]);
-ASSERT_EQ(expected_count, out_count.value);
+ void AssertModeIs(const Datum& array, CType expected_mode, int64_t
expected_count) {
+AssertModesAre(array, 1, {expected_mode}, {expected_count});
}
- void AssertModeIs(const std::string& json, c_type expected_mode,
+ void AssertModeIs(const std::string& json, CType expected_mode,
int64_t expected_count) {
auto array = ArrayFromJSON(type_singleton(), json);
AssertModeIs(array, expected_mode, expected_count);
}
- void AssertModeIs(const std::vector& json, c_type expected_mode,
+ void AssertModeIs(const std::vector& json, CType expected_mode,
int64_t expected_count) {
auto chunked = ChunkedArrayFromJSON(type_singleton(), json);
AssertModeIs(chunked, expected_mode, expected_count);
}
- void AssertModeIsNull(const Datum& array) {
-ASSERT_OK_AND_ASSIGN(Datum out, Mode(array));
-const StructScalar& value = out.scalar_as();
-
-for (const auto& val : value.value) {
- ASSERT_FALSE(val->is_valid);
-}
+ void AssertModeIsNull(const Datum& array, int n) {
Review comment:
This should be renamed, since it's not null anymore. `AssertModesEmpty`?
##
File path: cpp/src/arrow/compute/api_aggregate.h
##
@@ -76,6 +76,18 @@ struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
enum Mode null_handling;
};
+/// \brief Control Mode kernel behavior
+///
+/// Returns top-n common values and counts.
+/// By default, returns the most common value and count.
+struct ARROW_EXPORT ModeOptions : public FunctionOptions {
+ explicit ModeOptions(int n = 1) : n(n) {}
Review comment:
Can we use `int64_t` everywhere for this?
##
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##
@@ -735,71 +735,63 @@ class TestPrimitiveModeKernel : public ::testing::Test {
public:
using ArrowType = T;
using Traits = TypeTraits;
- using c_type = typename ArrowType::c_type;
- using ModeType = typename Traits::ScalarType;
- using CountType = typename TypeTraits::ScalarType;
-
- void