icexelloss commented on code in PR #34311: URL: https://github.com/apache/arrow/pull/34311#discussion_r1123686937
########## cpp/src/arrow/compute/exec/options.h: ########## @@ -199,21 +199,32 @@ class ARROW_EXPORT ProjectNodeOptions : public ExecNodeOptions { std::vector<std::string> names; }; -/// \brief Make a node which aggregates input batches, optionally grouped by keys. +/// \brief Make a node which aggregates input batches, optionally grouped by keys and +/// optionally segmented by segment-keys. Both keys and segment-keys determine the group. +/// However segment-keys are also used for determining grouping segments, which should be +/// large, and allow streaming a partial aggregation result after processing each segment. +/// One common use-case for segment-keys is ordered aggregation, in which the segment-key +/// attribute specifies a column with non-decreasing values or a lexigographically-ordered +/// set of such columns. /// /// If the keys attribute is a non-empty vector, then each aggregate in `aggregates` is /// expected to be a HashAggregate function. If the keys attribute is an empty vector, /// then each aggregate is assumed to be a ScalarAggregate function. class ARROW_EXPORT AggregateNodeOptions : public ExecNodeOptions { public: explicit AggregateNodeOptions(std::vector<Aggregate> aggregates, - std::vector<FieldRef> keys = {}) - : aggregates(std::move(aggregates)), keys(std::move(keys)) {} + std::vector<FieldRef> keys = {}, + std::vector<FieldRef> segment_keys = {}) + : aggregates(std::move(aggregates)), + keys(std::move(keys)), + segment_keys(std::move(segment_keys)) {} // aggregations which will be applied to the targetted fields std::vector<Aggregate> aggregates; // keys by which aggregations will be grouped std::vector<FieldRef> keys; + // keys by which aggregations will be segmented + std::vector<FieldRef> segment_keys; Review Comment: Can we document this in AggregateNodeOptions and make it super clear? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org