rluvaton commented on code in PR #15022: URL: https://github.com/apache/datafusion/pull/15022#discussion_r2049446143
########## datafusion/expr-common/src/groups_accumulator.rs: ########## @@ -106,6 +107,44 @@ impl EmitTo { /// [`Accumulator`]: crate::accumulator::Accumulator /// [Aggregating Millions of Groups Fast blog]: https://arrow.apache.org/blog/2023/08/05/datafusion_fast_grouping/ pub trait GroupsAccumulator: Send { + /// Whether [`Self::with_group_indices_order_mode`] should be called. + /// + /// this is when the accumulator would benefit from knowing the order of the group indices. + /// + fn group_order_sensitivity(&self) -> bool { + false + } + + /// Called with the order mode for the group indices. + /// + /// This will only be called if [`Self::group_order_sensitivity`] is true and can be called either right after initialization + /// or after [`Self::state`], [`Self::evaluate`] consumed all the groups. + /// + /// For example if `group_indices_order_mode` equals to [`InputOrderMode::Sorted`] it means that if you get the following group indices in [`Self::update_batch`]/[`Self::merge_batch`] + /// ```text + /// [1, 1, 1, 1, 1, 2, 2, 3] + /// ``` + /// + /// You can be sure that you will never get another group with index 1 or 2 (until call to [`Self::state`]/[`Self::evaluate`] which will shift the group indices). + /// However, you might get another group with index 3 in the future. + /// + /// Possible optimization you can do in your implementation when the input is sorted is: + /// 1. Only track the current group state + /// 2. Have a builder that is ready to be built by call to [`Self::state`]/[`Self::evaluate`] + /// + fn with_group_indices_order_mode( + self: Box<Self>, Review Comment: To be able to return specialized accumulator based on the order mode -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org