This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 63888e853b Improve AggregateUDFImpl::state_fields documentation (#9919)
63888e853b is described below
commit 63888e853b7b094f2f47f53192a94f38327f5f5a
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Apr 4 10:43:15 2024 -0400
Improve AggregateUDFImpl::state_fields documentation (#9919)
---
datafusion/expr/src/udaf.rs | 31 ++++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index ba80f39dde..14e5195116 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -171,9 +171,11 @@ impl AggregateUDF {
self.inner.accumulator(acc_args)
}
- /// Return the fields of the intermediate state used by this aggregator,
given
- /// its state name, value type and ordering fields. See
[`AggregateUDFImpl::state_fields`]
- /// for more details. Supports multi-phase aggregations
+ /// Return the fields used to store the intermediate state for this
aggregator, given
+ /// the name of the aggregate, value type and ordering fields. See
[`AggregateUDFImpl::state_fields`]
+ /// for more details.
+ ///
+ /// This is used to support multi-phase aggregations
pub fn state_fields(
&self,
name: &str,
@@ -283,13 +285,28 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
/// `acc_args`: the arguments to the accumulator. See [`AccumulatorArgs`]
for more details.
fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn
Accumulator>>;
- /// Return the fields of the intermediate state.
+ /// Return the fields used to store the intermediate state of this
accumulator.
+ ///
+ /// # Arguments:
+ /// 1. `name`: the name of the expression (e.g. AVG, SUM, etc)
+ /// 2. `value_type`: Aggregate's aggregate's output (returned by
[`Self::return_type`])
+ /// 3. `ordering_fields`: the fields used to order the input arguments, if
any.
+ /// Empty if no ordering expression is provided.
+ ///
+ /// # Notes:
///
- /// name: the name of the state
+ /// The default implementation returns a single state field named `name`
+ /// with the same type as `value_type`. This is suitable for aggregates
such
+ /// as `SUM` or `MIN` where partial state can be combined by applying the
+ /// same aggregate.
///
- /// value_type: the type of the value, it should be the result of the
`return_type`
+ /// For aggregates such as `AVG` where the partial state is more complex
+ /// (e.g. a COUNT and a SUM), this method is used to define the additional
+ /// fields.
///
- /// ordering_fields: the fields used for ordering, empty if no ordering
expression is provided
+ /// The name of the fields must be unique within the query and thus should
+ /// be derived from `name`. See [`format_state_name`] for a utility
function
+ /// to generate a unique name.
fn state_fields(
&self,
name: &str,