eejbyfeldt commented on code in PR #12571:
URL: https://github.com/apache/datafusion/pull/12571#discussion_r1775739407


##########
datafusion/physical-plan/src/aggregates/mod.rs:
##########
@@ -210,13 +221,114 @@ impl PhysicalGroupBy {
             .collect()
     }
 
+    /// The number of expressions in the output schema.
+    fn num_output_exprs(&self, mode: &AggregateMode) -> usize {
+        let mut num_exprs = self.expr.len();
+        if !self.is_single() {
+            num_exprs += self.num_internal_exprs;
+        }
+        if *mode != AggregateMode::Partial {
+            num_exprs -= self.num_internal_exprs;
+        }
+        num_exprs
+    }
+
     /// Return grouping expressions as they occur in the output schema.
-    pub fn output_exprs(&self) -> Vec<Arc<dyn PhysicalExpr>> {
-        self.expr
-            .iter()
-            .enumerate()
-            .map(|(index, (_, name))| Arc::new(Column::new(name, index)) as _)
-            .collect()
+    pub fn output_exprs(&self, mode: &AggregateMode) -> Vec<Arc<dyn 
PhysicalExpr>> {
+        let num_output_exprs = self.num_output_exprs(mode);
+        let mut output_exprs = Vec::with_capacity(num_output_exprs);
+        output_exprs.extend(
+            self.expr
+                .iter()
+                .enumerate()
+                .take(num_output_exprs)
+                .map(|(index, (_, name))| Arc::new(Column::new(name, index)) 
as _),
+        );
+        if !self.is_single() && *mode == AggregateMode::Partial {
+            output_exprs
+                .push(Arc::new(Column::new(INTERNAL_GROUPING_ID, 
self.expr.len())) as _);
+        }
+        output_exprs
+    }
+
+    /// Returns the number expression as grouping keys.
+    fn num_group_exprs(&self) -> usize {
+        if self.is_single() {
+            self.expr.len()
+        } else {
+            self.expr.len() + self.num_internal_exprs
+        }
+    }
+
+    /// Returns the data type of the grouping id.

Review Comment:
   Added your comment.
   
   The only reason to implement as a bitmask is if we plan to follow up by 
implementing the grouping function 
(https://github.com/apache/datafusion/issues/5647) on top of that column. 
Otherwise it might be better to make the id just be a sequential number. (It 
would actually be better in some ways it would also fix 
https://github.com/apache/datafusion/issues/5672)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to