cyb70289 commented on a change in pull request #8637:
URL: https://github.com/apache/arrow/pull/8637#discussion_r521865256
##########
File path: cpp/src/arrow/compute/kernels/aggregate_mode.cc
##########
@@ -205,35 +243,59 @@ struct ModeImpl : public ScalarAggregator {
this->state.MergeFrom(std::move(other.state));
}
- void Finalize(KernelContext*, Datum* out) override {
- using ModeType = typename TypeTraits<ArrowType>::ScalarType;
- using CountType = typename TypeTraits<Int64Type>::ScalarType;
+ static std::shared_ptr<ArrayData> MakeArrayData(
+ const std::shared_ptr<DataType>& data_type, int64_t n) {
+ auto data = ArrayData::Make(data_type, n, 0);
+ data->buffers.resize(2);
+ data->buffers[0] = nullptr;
+ return data;
+ }
- std::vector<std::shared_ptr<Scalar>> values;
- auto mode_count = this->state.Finalize();
- auto mode = mode_count.first;
- auto count = mode_count.second;
- if (count == 0) {
- values = {std::make_shared<ModeType>(), std::make_shared<CountType>()};
- } else {
- values = {std::make_shared<ModeType>(mode),
std::make_shared<CountType>(count)};
+ void Finalize(KernelContext* ctx, Datum* out) override {
+ const auto& mode_type = TypeTraits<ArrowType>::type_singleton();
+ const auto& count_type = int64();
+ const auto& out_type =
+ struct_({field(kModeFieldName, mode_type), field(kCountFieldName,
count_type)});
+
+ int n = this->options.n;
+ if (n > state.DistinctValues()) {
+ n = state.DistinctValues();
+ }
+ if (n <= 0) {
+ *out = Datum(StructArray(out_type, 0, ArrayVector{}).data());
Review comment:
Done
##########
File path: cpp/src/arrow/compute/kernels/aggregate_mode.cc
##########
@@ -205,35 +243,59 @@ struct ModeImpl : public ScalarAggregator {
this->state.MergeFrom(std::move(other.state));
}
- void Finalize(KernelContext*, Datum* out) override {
- using ModeType = typename TypeTraits<ArrowType>::ScalarType;
- using CountType = typename TypeTraits<Int64Type>::ScalarType;
+ static std::shared_ptr<ArrayData> MakeArrayData(
+ const std::shared_ptr<DataType>& data_type, int64_t n) {
+ auto data = ArrayData::Make(data_type, n, 0);
+ data->buffers.resize(2);
+ data->buffers[0] = nullptr;
+ return data;
+ }
- std::vector<std::shared_ptr<Scalar>> values;
- auto mode_count = this->state.Finalize();
- auto mode = mode_count.first;
- auto count = mode_count.second;
- if (count == 0) {
- values = {std::make_shared<ModeType>(), std::make_shared<CountType>()};
- } else {
- values = {std::make_shared<ModeType>(mode),
std::make_shared<CountType>(count)};
+ void Finalize(KernelContext* ctx, Datum* out) override {
+ const auto& mode_type = TypeTraits<ArrowType>::type_singleton();
+ const auto& count_type = int64();
+ const auto& out_type =
+ struct_({field(kModeFieldName, mode_type), field(kCountFieldName,
count_type)});
+
+ int n = this->options.n;
+ if (n > state.DistinctValues()) {
+ n = state.DistinctValues();
+ }
+ if (n <= 0) {
+ *out = Datum(StructArray(out_type, 0, ArrayVector{}).data());
+ return;
}
- out->value = std::make_shared<StructScalar>(std::move(values),
this->out_type);
+
+ auto mode_data = this->MakeArrayData(mode_type, n);
+ auto count_data = this->MakeArrayData(count_type, n);
+ KERNEL_ASSIGN_OR_RAISE(mode_data->buffers[1], ctx, ctx->Allocate(n *
sizeof(CType)));
+ KERNEL_ASSIGN_OR_RAISE(count_data->buffers[1], ctx,
+ ctx->Allocate(n * sizeof(int64_t)));
+
+ CType* mode_buffer = mode_data->template GetMutableValues<CType>(1);
+ int64_t* count_buffer = count_data->template GetMutableValues<int64_t>(1);
+ this->state.Finalize(mode_buffer, count_buffer, n);
+
+ *out = Datum(
+ StructArray(out_type, n, ArrayVector{MakeArray(mode_data),
MakeArray(count_data)})
Review comment:
Done
##########
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##########
@@ -735,71 +735,63 @@ class TestPrimitiveModeKernel : public ::testing::Test {
public:
using ArrowType = T;
using Traits = TypeTraits<ArrowType>;
- using c_type = typename ArrowType::c_type;
- using ModeType = typename Traits::ScalarType;
- using CountType = typename TypeTraits<Int64Type>::ScalarType;
-
- void AssertModeIs(const Datum& array, c_type expected_mode, int64_t
expected_count) {
- ASSERT_OK_AND_ASSIGN(Datum out, Mode(array));
- const StructScalar& value = out.scalar_as<StructScalar>();
+ using CType = typename ArrowType::c_type;
+
+ void AssertModesAre(const Datum& array, const int n,
+ const std::vector<CType>& expected_modes,
+ const std::vector<int64_t>& expected_counts) {
+ ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, ModeOptions{n}));
Review comment:
Done
##########
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##########
@@ -735,71 +735,63 @@ class TestPrimitiveModeKernel : public ::testing::Test {
public:
using ArrowType = T;
using Traits = TypeTraits<ArrowType>;
- using c_type = typename ArrowType::c_type;
- using ModeType = typename Traits::ScalarType;
- using CountType = typename TypeTraits<Int64Type>::ScalarType;
-
- void AssertModeIs(const Datum& array, c_type expected_mode, int64_t
expected_count) {
- ASSERT_OK_AND_ASSIGN(Datum out, Mode(array));
- const StructScalar& value = out.scalar_as<StructScalar>();
+ using CType = typename ArrowType::c_type;
+
+ void AssertModesAre(const Datum& array, const int n,
+ const std::vector<CType>& expected_modes,
+ const std::vector<int64_t>& expected_counts) {
+ ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, ModeOptions{n}));
+ const StructArray out_array(out.array());
+ ASSERT_EQ(out_array.length(), expected_modes.size());
+ ASSERT_EQ(out_array.num_fields(), 2);
+
+ const CType* out_modes = out_array.field(0)->data()->GetValues<CType>(1);
+ const int64_t* out_counts =
out_array.field(1)->data()->GetValues<int64_t>(1);
+ for (int i = 0; i < out_array.length(); ++i) {
+ // equal or nan equal
+ ASSERT_TRUE(
+ (expected_modes[i] == out_modes[i]) ||
+ (expected_modes[i] != expected_modes[i] && out_modes[i] !=
out_modes[i]));
+ ASSERT_EQ(expected_counts[i], out_counts[i]);
+ }
+ }
- const auto& out_mode = checked_cast<const ModeType&>(*value.value[0]);
- ASSERT_EQ(expected_mode, out_mode.value);
+ void AssertModesAre(const std::string& json, const int n,
+ const std::vector<CType>& expected_modes,
+ const std::vector<int64_t>& expected_counts) {
+ auto array = ArrayFromJSON(type_singleton(), json);
+ AssertModesAre(array, n, expected_modes, expected_counts);
+ }
- const auto& out_count = checked_cast<const CountType&>(*value.value[1]);
- ASSERT_EQ(expected_count, out_count.value);
+ void AssertModeIs(const Datum& array, CType expected_mode, int64_t
expected_count) {
+ AssertModesAre(array, 1, {expected_mode}, {expected_count});
}
- void AssertModeIs(const std::string& json, c_type expected_mode,
+ void AssertModeIs(const std::string& json, CType expected_mode,
int64_t expected_count) {
auto array = ArrayFromJSON(type_singleton(), json);
AssertModeIs(array, expected_mode, expected_count);
}
- void AssertModeIs(const std::vector<std::string>& json, c_type expected_mode,
+ void AssertModeIs(const std::vector<std::string>& json, CType expected_mode,
int64_t expected_count) {
auto chunked = ChunkedArrayFromJSON(type_singleton(), json);
AssertModeIs(chunked, expected_mode, expected_count);
}
- void AssertModeIsNull(const Datum& array) {
- ASSERT_OK_AND_ASSIGN(Datum out, Mode(array));
- const StructScalar& value = out.scalar_as<StructScalar>();
-
- for (const auto& val : value.value) {
- ASSERT_FALSE(val->is_valid);
- }
+ void AssertModeIsNull(const Datum& array, int n) {
Review comment:
Done
##########
File path: docs/source/cpp/compute.rst
##########
@@ -153,7 +153,7 @@ Notes:
* \(1) Output is a ``{"min": input type, "max": input type}`` Struct
-* \(2) Output is a ``{"mode": input type, "count": Int64}`` Struct
+* \(2) Output is an array of ``{"mode": input type, "count": Int64}`` Struct
Review comment:
Done
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]