dhruv9vats commented on a change in pull request #12484:
URL: https://github.com/apache/arrow/pull/12484#discussion_r824093460



##########
File path: cpp/src/arrow/compute/kernels/hash_aggregate.cc
##########
@@ -2939,26 +3287,26 @@ Result<Datum> GroupBy(const std::vector<Datum>& 
arguments, const std::vector<Dat
     ARROW_ASSIGN_OR_RAISE(Datum transposition, 
groupers[0]->Consume(other_keys));
     groupers[thread_index].reset();
 
-    for (size_t i = 0; i < kernels.size(); ++i) {
+    for (size_t idx = 0; idx < kernels.size(); ++idx) {

Review comment:
       Just a nit: but my clang-tidy kept showing me that this `Declaration 
shadows a local variable`. Will revert it if not needed.

##########
File path: cpp/src/arrow/compute/kernels/hash_aggregate.cc
##########
@@ -2758,6 +2771,343 @@ struct GroupedOneFactory {
   InputType argument_type;
 };
 
+// ----------------------------------------------------------------------
+// List implementation
+
+template <typename Type, typename Enable = void>
+struct GroupedListImpl final : public GroupedAggregator {
+  using CType = typename TypeTraits<Type>::CType;
+  using GetSet = GroupedValueTraits<Type>;
+
+  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
+              const FunctionOptions* options) override {
+    ctx_ = ctx;
+    has_nulls_ = false;
+    // out_type_ initialized by GroupedListInit
+    values_ = TypedBufferBuilder<CType>(ctx_->memory_pool());
+    groups_ = TypedBufferBuilder<uint32_t>(ctx_->memory_pool());
+    values_bitmap_ = TypedBufferBuilder<bool>(ctx_->memory_pool());
+    return Status::OK();
+  }
+
+  Status Resize(int64_t new_num_groups) override {
+    num_groups_ = new_num_groups;
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    int64_t num_values = batch[0].array()->length;
+    int64_t offset = batch[0].array()->offset;
+
+    const auto* groups = batch[1].array()->GetValues<uint32_t>(1, 0);
+    RETURN_NOT_OK(groups_.Append(groups, num_values));

Review comment:
       That `groups_.Append` is working correctly without passing the offset. 
Even though we use `GetValues<uint32_t>(1, 0)`?

##########
File path: cpp/src/arrow/compute/kernels/hash_aggregate.cc
##########
@@ -621,6 +622,12 @@ struct GroupedValueTraits {
 
   static CType Get(const CType* values, uint32_t g) { return values[g]; }
   static void Set(CType* values, uint32_t g, CType v) { values[g] = v; }
+  static Status AppendBuffers(TypedBufferBuilder<CType>* destination,
+                              const uint8_t* values, int64_t offset, int64_t 
num_values) {
+    RETURN_NOT_OK(
+        destination->Append(reinterpret_cast<const CType*>(values) + offset, 
num_values));

Review comment:
       Currently accounting for the type size with this cast. Will have to see 
how to extend to (fixed)-binary-types.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to