alamb commented on code in PR #15504:
URL: https://github.com/apache/datafusion/pull/15504#discussion_r2023660049
##########
datafusion/physical-expr/src/aggregate.rs:
##########
@@ -97,6 +97,165 @@ impl AggregateExprBuilder {
/// Constructs an `AggregateFunctionExpr` from the builder
///
/// Note that an [`Self::alias`] must be provided before calling this
method.
+ ///
+ /// # Example: Create an [`AggregateUDF`]
+ ///
+ /// In the following example, [`AggregateFunctionExpr`] will be built
using [`AggregateExprBuilder`]
+ /// which provides a build function. Full example could be accessed from
the source file.
+ ///
+ /// ```
+ /// # use std::any::Any;
+ /// # use std::sync::OnceLock;
+ /// # use std::sync::Arc;
+ /// # use arrow::datatypes::DataType;
+ /// # use datafusion_common::{DataFusionError, plan_err, Result,
ScalarValue};
+ /// # use datafusion_expr::{col, ColumnarValue, Signature, Volatility,
Expr, Documentation};
+ /// # use datafusion_expr::{AggregateUDFImpl, AggregateUDF, Accumulator,
function::{AccumulatorArgs, StateFieldsArgs}};
+ /// # use datafusion_expr::window_doc_sections::DOC_SECTION_AGGREGATE;
+ /// # use arrow::datatypes::Schema;
+ /// # use arrow::datatypes::Field;
+ /// # use arrow::array::Array;
+ /// #
+ /// # #[derive(Debug)]
+ /// # struct FirstValueAccumulator {
+ /// # value: Option<ScalarValue>,
+ /// # data_type: DataType,
+ /// # }
+ /// #
+ /// # impl Accumulator for FirstValueAccumulator {
+ /// # fn update_batch(&mut self, values: &[Arc<dyn Array>]) ->
Result<()> {
Review Comment:
I think we can significantly shorten the example by just using
`unimplemented()!` instead of adding an actual implementation for an
accumulator, and the methods.
##########
datafusion/physical-expr/src/aggregate.rs:
##########
@@ -97,6 +97,165 @@ impl AggregateExprBuilder {
/// Constructs an `AggregateFunctionExpr` from the builder
///
/// Note that an [`Self::alias`] must be provided before calling this
method.
+ ///
+ /// # Example: Create an [`AggregateUDF`]
+ ///
+ /// In the following example, [`AggregateFunctionExpr`] will be built
using [`AggregateExprBuilder`]
+ /// which provides a build function. Full example could be accessed from
the source file.
+ ///
+ /// ```
+ /// # use std::any::Any;
+ /// # use std::sync::OnceLock;
+ /// # use std::sync::Arc;
+ /// # use arrow::datatypes::DataType;
+ /// # use datafusion_common::{DataFusionError, plan_err, Result,
ScalarValue};
+ /// # use datafusion_expr::{col, ColumnarValue, Signature, Volatility,
Expr, Documentation};
+ /// # use datafusion_expr::{AggregateUDFImpl, AggregateUDF, Accumulator,
function::{AccumulatorArgs, StateFieldsArgs}};
+ /// # use datafusion_expr::window_doc_sections::DOC_SECTION_AGGREGATE;
+ /// # use arrow::datatypes::Schema;
+ /// # use arrow::datatypes::Field;
+ /// # use arrow::array::Array;
+ /// #
+ /// # #[derive(Debug)]
+ /// # struct FirstValueAccumulator {
+ /// # value: Option<ScalarValue>,
+ /// # data_type: DataType,
+ /// # }
+ /// #
+ /// # impl Accumulator for FirstValueAccumulator {
+ /// # fn update_batch(&mut self, values: &[Arc<dyn Array>]) ->
Result<()> {
+ /// # if self.value.is_none() && !values.is_empty() {
+ /// # let first_array = &values[0];
+ /// # for i in 0..first_array.len() {
+ /// # if !first_array.is_null(i) {
+ /// # self.value =
Some(ScalarValue::try_from_array(first_array, i)?);
+ /// # break;
+ /// # }
+ /// # }
+ /// # }
+ /// # Ok(())
+ /// # }
+ /// #
+ /// # fn merge_batch(&mut self, states: &[Arc<dyn Array>]) ->
Result<()> {
+ /// # if self.value.is_none() && !states.is_empty() {
+ /// # let first_array = &states[0];
+ /// # for i in 0..first_array.len() {
+ /// # if !first_array.is_null(i) {
+ /// # self.value =
Some(ScalarValue::try_from_array(first_array, i)?);
+ /// # break;
+ /// # }
+ /// # }
+ /// # }
+ /// # Ok(())
+ /// # }
+ /// #
+ /// # fn evaluate(&mut self) -> Result<ScalarValue> {
+ /// # match &self.value {
+ /// # Some(value) => Ok(value.clone()),
+ /// # None => ScalarValue::try_from(&self.data_type),
+ /// # }
+ /// # }
+ /// #
+ /// # fn size(&self) -> usize {
+ /// # std::mem::size_of_val(self)
+ /// # }
+ /// #
+ /// # fn state(&mut self) -> Result<Vec<ScalarValue>> {
+ /// # match &self.value {
+ /// # Some(value) => Ok(vec![value.clone()]),
+ /// # None => ScalarValue::try_from(&self.data_type).map(|v|
vec![v]),
+ /// # }
+ /// # }
+ /// # }
+ /// #
+ /// # #[derive(Debug, Clone)]
+ /// # struct FirstValueUdf {
+ /// # signature: Signature,
+ /// # }
+ /// #
+ /// # impl FirstValueUdf {
+ /// # fn new() -> Self {
+ /// # Self {
+ /// # signature: Signature::any(1, Volatility::Immutable),
+ /// # }
+ /// # }
+ /// # }
+ /// #
+ /// # static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+ /// #
+ /// # fn get_doc() -> &'static Documentation {
+ /// # DOCUMENTATION.get_or_init(|| {
+ /// # Documentation::builder(
+ /// # DOC_SECTION_AGGREGATE,
+ /// # "returns the first value in a set of values",
+ /// # "first_value(column)"
+ /// # )
+ /// # .with_argument("arg1", "The column to get the first value
from")
+ /// # .build()
+ /// # })
+ /// # }
+ /// #
+ /// # impl AggregateUDFImpl for FirstValueUdf {
+ /// # fn as_any(&self) -> &dyn Any { self }
+ /// # fn name(&self) -> &str { "first_value" }
+ /// # fn signature(&self) -> &Signature { &self.signature }
+ /// # fn return_type(&self, args: &[DataType]) -> Result<DataType> {
+ /// # Ok(args[0].clone())
+ /// # }
+ /// #
+ /// # fn accumulator(&self, acc_args: AccumulatorArgs) ->
Result<Box<dyn Accumulator>> {
+ /// # let input_type =
acc_args.schema.field(0).data_type().clone();
+ /// #
+ /// # Ok(Box::new(FirstValueAccumulator {
+ /// # value: None,
+ /// # data_type: input_type,
+ /// # }))
+ /// # }
Review Comment:
If you did this (and similar things to the rest of the PR) you can avoid
most of the rest of the copy/paste in the example above
```suggestion
/// # fn accumulator(&self, acc_args: AccumulatorArgs) ->
Result<Box<dyn Accumulator>> {
/// # unimplemented!()
/// # }
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]