edponce commented on a change in pull request #11152:
URL: https://github.com/apache/arrow/pull/11152#discussion_r709377405



##########
File path: cpp/src/arrow/compute/kernels/aggregate_basic.cc
##########
@@ -782,6 +826,17 @@ void RegisterScalarAggregateBasic(FunctionRegistry* 
registry) {
 
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
+  // Add min/max as convenience functions
+  func = std::make_shared<ScalarAggregateFunction>("min", Arity::Unary(), 
&min_or_max_doc,
+                                                   
&default_scalar_aggregate_options);
+  aggregate::AddMinOrMaxAggKernel</*index=*/0>(func.get());
+  DCHECK_OK(registry->AddFunction(std::move(func)));

Review comment:
       If the comment of using enum is accepted, then use enum as template 
parameter.

##########
File path: cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
##########
@@ -1425,6 +1425,51 @@ TEST(GroupBy, MinMaxDecimal) {
   }
 }
 
+TEST(GroupBy, MinOrMax) {
+  auto table =

Review comment:
       Maybe include tests of only `hash_min` and only `hash_max`.

##########
File path: cpp/src/arrow/compute/kernels/aggregate_basic.cc
##########
@@ -282,6 +282,43 @@ Result<std::unique_ptr<KernelState>> 
MinMaxInit(KernelContext* ctx,
   return visitor.Create();
 }
 
+// For "min" and "max" functions: dispatch to the MinMax kernel.
+Result<std::unique_ptr<KernelState>> MinOrMaxInit(KernelContext* ctx,
+                                                  const KernelInitArgs& args) {
+  std::vector<ValueDescr> inputs = args.inputs;
+
+  ARROW_ASSIGN_OR_RAISE(auto func, 
GetFunctionRegistry()->GetFunction("min_max"));
+  ARROW_ASSIGN_OR_RAISE(auto kernel, func->DispatchBest(&inputs));
+
+  KernelInitArgs new_args{kernel, inputs, args.options};
+  return kernel->init(ctx, new_args);
+}
+
+Result<ValueDescr> MinOrMaxType(KernelContext*, const std::vector<ValueDescr>& 
descrs) {
+  // any[T] -> scalar[T]
+  return ValueDescr::Scalar(descrs.front().type);
+}
+
+template <uint8_t index>
+Status MinOrMaxFinalize(KernelContext* ctx, Datum* out) {
+  Datum temp;
+  RETURN_NOT_OK(checked_cast<ScalarAggregator*>(ctx->state())->Finalize(ctx, 
&temp));
+  *out = temp.scalar_as<StructScalar>().value[index];
+  return Status::OK();

Review comment:
       It feels like the `index` value should not come from a template but an 
internal option, but given that we currently do not have a mechanism to extend 
options, I would suggest to use an enum instead of an explicit integer to make 
the code a bit more expressive.
   ```c++
   enum class MinMaxIndices : int8_t {
      MinIndex = 0,
      MaxIndex = 1,
   };
   ```

##########
File path: cpp/src/arrow/compute/kernels/hash_aggregate.cc
##########
@@ -1864,6 +1860,40 @@ Result<std::unique_ptr<KernelState>> 
MinMaxInit(KernelContext* ctx,
   return std::move(impl);
 }
 
+template <uint8_t index>
+Result<std::unique_ptr<KernelState>> MinOrMaxInit(KernelContext* ctx,
+                                                  const KernelInitArgs& args) {
+  std::vector<ValueDescr> inputs = args.inputs;
+
+  ARROW_ASSIGN_OR_RAISE(auto func, 
GetFunctionRegistry()->GetFunction("hash_min_max"));
+  ARROW_ASSIGN_OR_RAISE(auto kernel, func->DispatchBest(&inputs));
+
+  KernelInitArgs new_args{kernel, inputs, args.options};
+  return kernel->init(ctx, new_args);
+}
+
+template <uint8_t index>
+HashAggregateKernel MakeMinOrMaxKernel() {
+  HashAggregateKernel kernel;
+  kernel.init = MinOrMaxInit<index>;
+  kernel.signature = KernelSignature::Make(
+      {InputType(ValueDescr::ANY), InputType::Array(Type::UINT32)},
+      OutputType([](KernelContext* ctx,
+                    const std::vector<ValueDescr>& descrs) -> 
Result<ValueDescr> {
+        return ValueDescr::Array(descrs[0].type);
+      }));

Review comment:
       Nit: Here `OutputType` is expressed as a lambda function, so should 
[`MinMaxType`](https://github.com/apache/arrow/pull/11152/files#diff-fead2cb629ddc5a706a9383d4e64e24064cb3a2103e5e078bde7a11b57fd9654R297)
 be also a lambda.

##########
File path: cpp/src/arrow/compute/kernels/hash_aggregate.cc
##########
@@ -1864,6 +1860,40 @@ Result<std::unique_ptr<KernelState>> 
MinMaxInit(KernelContext* ctx,
   return std::move(impl);
 }
 
+template <uint8_t index>
+Result<std::unique_ptr<KernelState>> MinOrMaxInit(KernelContext* ctx,
+                                                  const KernelInitArgs& args) {
+  std::vector<ValueDescr> inputs = args.inputs;
+
+  ARROW_ASSIGN_OR_RAISE(auto func, 
GetFunctionRegistry()->GetFunction("hash_min_max"));
+  ARROW_ASSIGN_OR_RAISE(auto kernel, func->DispatchBest(&inputs));
+
+  KernelInitArgs new_args{kernel, inputs, args.options};
+  return kernel->init(ctx, new_args);
+}
+
+template <uint8_t index>
+HashAggregateKernel MakeMinOrMaxKernel() {
+  HashAggregateKernel kernel;
+  kernel.init = MinOrMaxInit<index>;
+  kernel.signature = KernelSignature::Make(
+      {InputType(ValueDescr::ANY), InputType::Array(Type::UINT32)},
+      OutputType([](KernelContext* ctx,
+                    const std::vector<ValueDescr>& descrs) -> 
Result<ValueDescr> {
+        return ValueDescr::Array(descrs[0].type);
+      }));
+  kernel.resize = HashAggregateResize;
+  kernel.consume = HashAggregateConsume;
+  kernel.merge = HashAggregateMerge;
+  kernel.finalize = [](KernelContext* ctx, Datum* out) {
+    ARROW_ASSIGN_OR_RAISE(Datum temp,
+                          
checked_cast<GroupedAggregator*>(ctx->state())->Finalize());
+    *out = temp.array_as<StructArray>()->field(index);

Review comment:
       Similar comment as above on lambda vs `MinOrMaxFinalize`.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to