dhruv9vats commented on a change in pull request #12484:
URL: https://github.com/apache/arrow/pull/12484#discussion_r822585993



##########
File path: cpp/src/arrow/compute/kernels/hash_aggregate.cc
##########
@@ -2758,6 +2758,317 @@ struct GroupedOneFactory {
   InputType argument_type;
 };
 
+// ----------------------------------------------------------------------
+// List implementation
+
+template <typename Type, typename Enable = void>
+struct GroupedListImpl final : public GroupedAggregator {
+  using CType = typename TypeTraits<Type>::CType;
+  using GetSet = GroupedValueTraits<Type>;
+
+  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
+              const FunctionOptions* options) override {
+    ctx_ = ctx;
+    // out_type_ initialized by GroupedListInit
+    values_ = TypedBufferBuilder<CType>(ctx_->memory_pool());
+    groups_ = TypedBufferBuilder<uint32_t>(ctx_->memory_pool());
+    values_bitmap_ = TypedBufferBuilder<bool>(ctx_->memory_pool());
+    return Status::OK();
+  }
+
+  Status Resize(int64_t new_num_groups) override {
+    num_groups_ = new_num_groups;
+    return Status::OK();
+  }
+
+  Status Consume(const ExecBatch& batch) override {
+    const auto* groups = batch[1].array()->GetValues<uint32_t>(1);

Review comment:
       Is this somehow related to this?
   For columns 1 and 4 (zero indexed) in the table above for `use_exec_plan = 
true` I get incorrect `values` Array in the `Finalize` step:
   ```cpp
   // col 1 - boolean
   [
     true,
     true,
     false,
     false,
     null,
     null,
     false,
     false,
     true,
     false
   ]
   
   // col 4 fixed size binary
   [
     null,
     616161,
     626163,
     null,
     null,
     null,
     null,
     626162,
     313233,
     null
   ]
   ```
   
   While it appears to be correct with `use_exec_plan = false`:
   ```cpp
   // col 1 - boolean
   [
     true,
     true,
     false,
     false,
     null,
     true,
     false,
     false,
     true,
     true
   ]
   
   // col 4 - fixed sized binary
   [
     null,
     616161,
     626163,
     null,
     323334,
     646464,
     626364,
     626162,
     313233,
     null
   ]
   ```
   
   Am I handling these type incorrectly in the implementation? (These are 
without the `DCHECK_EQ`)
   What am I missing?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to