westonpace commented on code in PR #15083:
URL: https://github.com/apache/arrow/pull/15083#discussion_r1069855381
##########
cpp/src/arrow/compute/exec/aggregate_node.cc:
##########
@@ -46,14 +46,26 @@ namespace {
void AggregatesToString(std::stringstream* ss, const Schema& input_schema,
const std::vector<Aggregate>& aggs,
- const std::vector<int>& target_field_ids, int indent =
0) {
+ const std::vector<std::vector<int>>& target_fieldsets,
+ int indent = 0) {
*ss << "aggregates=[" << std::endl;
for (size_t i = 0; i < aggs.size(); i++) {
for (int j = 0; j < indent; ++j) *ss << " ";
- *ss << '\t' << aggs[i].function << '('
- << input_schema.field(target_field_ids[i])->name();
+ *ss << '\t' << aggs[i].function << '(';
+ const auto& target = target_fieldsets[i];
+ if (target.size() == 0) {
+ *ss << "*";
+ } else {
+ *ss << input_schema.field(target[0])->name();
+ for (size_t k = 1; k < target.size(); k++) {
+ *ss << ", " << input_schema.field(target[k])->name();
+ }
+ }
if (aggs[i].options) {
- *ss << ", " << aggs[i].options->ToString();
+ auto* options_type = aggs[i].options->options_type();
+ if (options_type->num_properties() > 0) {
Review Comment:
I kind of like `count_all(*, {})` to help distinguish options from arguments.
##########
cpp/src/arrow/compute/kernels/hash_aggregate.cc:
##########
@@ -108,19 +108,31 @@ Result<TypeHolder> ResolveGroupOutputType(KernelContext*
ctx,
return checked_cast<GroupedAggregator*>(ctx->state())->out_type();
}
-HashAggregateKernel MakeKernel(InputType argument_type, KernelInit init) {
+HashAggregateKernel MakeKernel(std::shared_ptr<KernelSignature> signature,
+ KernelInit init) {
HashAggregateKernel kernel;
kernel.init = std::move(init);
- kernel.signature =
- KernelSignature::Make({std::move(argument_type),
InputType(Type::UINT32)},
- OutputType(ResolveGroupOutputType));
+ kernel.signature = std::move(signature);
kernel.resize = HashAggregateResize;
kernel.consume = HashAggregateConsume;
kernel.merge = HashAggregateMerge;
kernel.finalize = HashAggregateFinalize;
return kernel;
}
+HashAggregateKernel MakeKernel(InputType argument_type, KernelInit init) {
+ return MakeKernel(
+ KernelSignature::Make({std::move(argument_type),
InputType(Type::UINT32)},
+ OutputType(ResolveGroupOutputType)),
+ std::move(init));
+}
+
+HashAggregateKernel MakeUnaryKernel(KernelInit init) {
+ return MakeKernel(KernelSignature::Make({InputType(Type::UINT32)},
+ OutputType(ResolveGroupOutputType)),
+ std::move(init));
+}
+
Review Comment:
Ok, I agree. Unary makes sense given the context.
##########
cpp/src/arrow/compute/exec.cc:
##########
@@ -164,13 +171,43 @@ Result<ExecBatch> ExecBatch::Make(std::vector<Datum>
values) {
}
if (length != value.length()) {
- return Status::Invalid(
- "Arrays used to construct an ExecBatch must have equal length");
+ // all the arrays should have the same length
+ return kInvalidValues;
}
}
- if (length == -1) {
- length = 1;
+ return length == -1 ? 1 : length;
+}
+
+} // namespace
+
+std::optional<int64_t> ExecBatch::InferLength(const std::vector<Datum>&
values) {
+ const int64_t length = DoInferLength(values);
+ if (length < 0) {
+ return std::nullopt;
+ }
+ return {length};
+}
+
+Result<ExecBatch> ExecBatch::Make(std::vector<Datum> values, int64_t length) {
+ // Infer the length again and/or validate the given length.
+ const int64_t inferred_length = DoInferLength(values);
+ switch (inferred_length) {
+ case kEmptyInput:
+ if (length < 0) {
+ return Status::Invalid(
+ "Cannot infer ExecBatch length without at least one value");
+ }
+ break;
+
+ case kInvalidValues:
+ return Status::Invalid(
+ "Arrays used to construct an ExecBatch must have equal length");
+
+ default:
+ DCHECK(length < 0 || length == inferred_length);
Review Comment:
I wonder if we should make `length != inferred_length` an invalid status
instead of a failed DCHECK. I could go either way on this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]