felipecrv commented on code in PR #15083:
URL: https://github.com/apache/arrow/pull/15083#discussion_r1072868585
##########
cpp/src/arrow/compute/exec/aggregate.cc:
##########
@@ -121,27 +127,48 @@ Result<Datum> GroupBy(const std::vector<Datum>&
arguments, const std::vector<Dat
ExecSpanIterator argument_iterator;
ExecBatch args_batch;
- if (!arguments.empty()) {
- ARROW_ASSIGN_OR_RAISE(args_batch, ExecBatch::Make(arguments));
+ std::optional<int64_t> inferred_length = ExecBatch::InferLength(arguments);
+ if (!inferred_length.has_value()) {
+ inferred_length = ExecBatch::InferLength(keys);
+ }
+ DCHECK(inferred_length.has_value());
+ const int64_t length = inferred_length.value();
+ if (!aggregates.empty()) {
+ ARROW_ASSIGN_OR_RAISE(args_batch, ExecBatch::Make(arguments, length));
// Construct and initialize HashAggregateKernels
- auto argument_types = args_batch.GetTypes();
+ std::vector<std::vector<TypeHolder>>
aggs_argument_types(aggregates.size());
+ {
+ // Contains the flattened list of aggregate arguments. We use the size of
+ // each Aggregate::target to re-group the aggregate argument types.
+ auto argument_types = args_batch.GetTypes();
+ size_t i = 0;
+ for (size_t j = 0; j < aggregates.size(); j++) {
+ const size_t num_agg_args = aggregates[j].target.size();
+ for (size_t k = 0; k < num_agg_args && i < argument_types.size(); k++,
i++) {
+ aggs_argument_types[j].push_back(std::move(argument_types[i]));
+ }
+ }
+ DCHECK_EQ(i, argument_types.size())
+ << "argument_types should contain input types for all the
aggregates.";
+ }
- ARROW_ASSIGN_OR_RAISE(kernels, GetKernels(ctx, aggregates,
argument_types));
+ ARROW_ASSIGN_OR_RAISE(kernels, GetKernels(ctx, aggregates,
aggs_argument_types));
Review Comment:
Hmmm yes. I didn't consider changing the type signatures of these functions,
but they are internal and don't have many callsites, so I think I can collapse
these transformation steps.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]