jorgecarleitao commented on a change in pull request #8172:
URL: https://github.com/apache/arrow/pull/8172#discussion_r487314133



##########
File path: rust/datafusion/src/physical_plan/hash_aggregate.rs
##########
@@ -276,117 +272,91 @@ impl RecordBatchReader for GroupedHashAggregateIterator {
                 })
                 .collect::<ArrowResult<Vec<_>>>()?;
 
-            // evaluate the inputs to the aggregate expressions for this batch
-            let aggr_input_values = self
-                .aggr_expr
-                .iter()
-                .map(|expr| {
-                    expr.evaluate_input(&batch)
-                        .map_err(ExecutionError::into_arrow_external_error)
-                })
-                .collect::<ArrowResult<Vec<_>>>()?;
+            // evaluate the aggregation expressions. We could evaluate them 
after the `take`, but since
+            // we need to evaluate all of them anyways, it is more performant 
to do it while they are together.
+            let aggr_input_values = evaluate(&expressions, &batch)
+                .map_err(ExecutionError::into_arrow_external_error)?;
 
             // create vector large enough to hold the grouping key
+            // this is an optimization to avoid allocating `key` on every row.
+            // it will be overwritten on the loop below
             let mut key = Vec::with_capacity(group_values.len());
             for _ in 0..group_values.len() {
                 key.push(GroupByScalar::UInt32(0));
             }
 
-            // iterate over each row in the batch and create the accumulators 
for each grouping key
-            let mut accums: Vec<Rc<AccumulatorSet>> =
-                Vec::with_capacity(batch.num_rows());
-
+            // 1.1 construct the key from the group values
+            // 1.2 construct/update the mapping key -> indexes (on the batch) 
used to `take` values from the batch in a single operation

Review comment:
       wrong comment, 1.2 and 1.3 are together.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to