eejbyfeldt commented on code in PR #12571: URL: https://github.com/apache/datafusion/pull/12571#discussion_r1775739407
########## datafusion/physical-plan/src/aggregates/mod.rs: ########## @@ -210,13 +221,114 @@ impl PhysicalGroupBy { .collect() } + /// The number of expressions in the output schema. + fn num_output_exprs(&self, mode: &AggregateMode) -> usize { + let mut num_exprs = self.expr.len(); + if !self.is_single() { + num_exprs += self.num_internal_exprs; + } + if *mode != AggregateMode::Partial { + num_exprs -= self.num_internal_exprs; + } + num_exprs + } + /// Return grouping expressions as they occur in the output schema. - pub fn output_exprs(&self) -> Vec<Arc<dyn PhysicalExpr>> { - self.expr - .iter() - .enumerate() - .map(|(index, (_, name))| Arc::new(Column::new(name, index)) as _) - .collect() + pub fn output_exprs(&self, mode: &AggregateMode) -> Vec<Arc<dyn PhysicalExpr>> { + let num_output_exprs = self.num_output_exprs(mode); + let mut output_exprs = Vec::with_capacity(num_output_exprs); + output_exprs.extend( + self.expr + .iter() + .enumerate() + .take(num_output_exprs) + .map(|(index, (_, name))| Arc::new(Column::new(name, index)) as _), + ); + if !self.is_single() && *mode == AggregateMode::Partial { + output_exprs + .push(Arc::new(Column::new(INTERNAL_GROUPING_ID, self.expr.len())) as _); + } + output_exprs + } + + /// Returns the number expression as grouping keys. + fn num_group_exprs(&self) -> usize { + if self.is_single() { + self.expr.len() + } else { + self.expr.len() + self.num_internal_exprs + } + } + + /// Returns the data type of the grouping id. Review Comment: Added your comment. The only reason to implement as a bitmask is if we plan to follow up by implementing the grouping function (https://github.com/apache/datafusion/issues/5647) on top of that column. Otherwise it might be better to make the id just be a sequential number. (It would actually be better in some ways it would also fix https://github.com/apache/datafusion/issues/5672) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org