This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 03d17e8473 Improve documentation for `AggregateUdfImpl::simplify` and 
`WindowUDFImpl::simplify` (#20712)
03d17e8473 is described below

commit 03d17e84737625df743f5a4755bd6583f4d75119
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Mar 5 16:14:15 2026 -0500

    Improve documentation for `AggregateUdfImpl::simplify` and 
`WindowUDFImpl::simplify` (#20712)
    
    ## Rationale for this change
    
    While working on https://github.com/apache/datafusion/pull/20665 I was
    somewhat confused about how the AggregateUDF simplify worked. So I added
    some more clarifying comments
    
    ## What changes are included in this PR?
    
    1. Improve documentation strings
    
    ## Are these changes tested?
    
    Yes by CI
    
    ## Are there any user-facing changes?
    Better API docs
    
    ---------
    
    Co-authored-by: Yongting You <[email protected]>
---
 datafusion/expr/src/expr.rs     |  4 ++--
 datafusion/expr/src/function.rs | 40 ++++++++++++++++++----------------------
 datafusion/expr/src/udaf.rs     | 36 ++++++++++++++++++++++--------------
 datafusion/expr/src/udf.rs      |  2 +-
 datafusion/expr/src/udwf.rs     | 27 +++++++++++++++------------
 5 files changed, 58 insertions(+), 51 deletions(-)

diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index ef0187a878..5c6acd480e 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1010,7 +1010,7 @@ impl WindowFunctionDefinition {
         }
     }
 
-    /// Return the inner window simplification function, if any
+    /// Returns this window function's simplification hook, if any.
     ///
     /// See [`WindowFunctionSimplification`] for more information
     pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
@@ -1097,7 +1097,7 @@ impl WindowFunction {
         }
     }
 
-    /// Return the inner window simplification function, if any
+    /// Returns this window function's simplification hook, if any.
     ///
     /// See [`WindowFunctionSimplification`] for more information
     pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index 68d2c90732..68865cbe1c 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -27,6 +27,8 @@ pub use datafusion_functions_aggregate_common::accumulator::{
     AccumulatorArgs, AccumulatorFactoryFunction, StateFieldsArgs,
 };
 
+use crate::expr::{AggregateFunction, WindowFunction};
+use crate::simplify::SimplifyContext;
 pub use datafusion_functions_window_common::expr::ExpressionArgs;
 pub use datafusion_functions_window_common::field::WindowUDFFieldArgs;
 pub use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
@@ -64,28 +66,22 @@ pub type PartitionEvaluatorFactory =
 pub type StateTypeFunction =
     Arc<dyn Fn(&DataType) -> Result<Arc<Vec<DataType>>> + Send + Sync>;
 
-/// [crate::udaf::AggregateUDFImpl::simplify] simplifier closure
-/// A closure with two arguments:
-/// * 'aggregate_function': [crate::expr::AggregateFunction] for which 
simplified has been invoked
-/// * 'info': [crate::simplify::SimplifyContext]
+/// Type alias for [crate::udaf::AggregateUDFImpl::simplify].
 ///
-/// Closure returns simplified [Expr] or an error.
-pub type AggregateFunctionSimplification = Box<
-    dyn Fn(
-        crate::expr::AggregateFunction,
-        &crate::simplify::SimplifyContext,
-    ) -> Result<Expr>,
->;
+/// This closure is invoked with:
+/// * `aggregate_function`: [AggregateFunction] with already simplified 
arguments
+/// * `info`: [SimplifyContext]
+///
+/// It returns a simplified [Expr] or an error.
+pub type AggregateFunctionSimplification =
+    Box<dyn Fn(AggregateFunction, &SimplifyContext) -> Result<Expr>>;
 
-/// [crate::udwf::WindowUDFImpl::simplify] simplifier closure
-/// A closure with two arguments:
-/// * 'window_function': [crate::expr::WindowFunction] for which simplified 
has been invoked
-/// * 'info': [crate::simplify::SimplifyContext]
+/// Type alias for [crate::udwf::WindowUDFImpl::simplify].
+///
+/// This closure is invoked with:
+/// * `window_function`: [WindowFunction] with already simplified arguments
+/// * `info`: [SimplifyContext]
 ///
-/// Closure returns simplified [Expr] or an error.
-pub type WindowFunctionSimplification = Box<
-    dyn Fn(
-        crate::expr::WindowFunction,
-        &crate::simplify::SimplifyContext,
-    ) -> Result<Expr>,
->;
+/// It returns a simplified [Expr] or an error.
+pub type WindowFunctionSimplification =
+    Box<dyn Fn(WindowFunction, &SimplifyContext) -> Result<Expr>>;
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index ee38077dbf..f2e2c53cdb 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -294,7 +294,7 @@ impl AggregateUDF {
         self.inner.reverse_expr()
     }
 
-    /// Do the function rewrite
+    /// Returns this aggregate function's simplification hook, if any.
     ///
     /// See [`AggregateUDFImpl::simplify`] for more details.
     pub fn simplify(&self) -> Option<AggregateFunctionSimplification> {
@@ -651,26 +651,34 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + 
Send + Sync {
         AggregateOrderSensitivity::HardRequirement
     }
 
-    /// Optionally apply per-UDaF simplification / rewrite rules.
+    /// Returns an optional hook for simplifying this user-defined aggregate.
     ///
-    /// This can be used to apply function specific simplification rules during
-    /// optimization (e.g. `arrow_cast` --> `Expr::Cast`). The default
-    /// implementation does nothing.
+    /// Use this hook to apply function-specific rewrites during optimization.
+    /// The default implementation returns `None`.
     ///
-    /// Note that DataFusion handles simplifying arguments and  "constant
-    /// folding" (replacing a function call with constant arguments such as
-    /// `my_add(1,2) --> 3` ). Thus, there is no need to implement such
-    /// optimizations manually for specific UDFs.
+    /// For example, `percentile_cont(x, 0.0)` and `percentile_cont(x, 1.0)` 
can
+    /// be rewritten to `MIN(x)` or `MAX(x)` depending on the `ORDER BY`
+    /// direction.
+    ///
+    /// DataFusion already simplifies arguments and performs constant folding
+    /// (for example, `my_add(1, 2) -> 3`). For nested expressions, the 
optimizer
+    /// runs simplification in multiple passes, so arguments are typically
+    /// simplified before this hook is invoked. As a result, UDF 
implementations
+    /// usually do not need to handle argument simplification themselves.
+    ///
+    /// See configuration `datafusion.optimizer.max_passes` for details on how 
many
+    /// optimization passes may be applied.
     ///
     /// # Returns
     ///
-    /// [None] if simplify is not defined or,
+    /// `None` if simplify is not defined.
     ///
-    /// Or, a closure with two arguments:
-    /// * 'aggregate_function': [AggregateFunction] for which simplified has 
been invoked
-    /// * 'info': [crate::simplify::SimplifyContext]
+    /// Or, a closure ([`AggregateFunctionSimplification`]) invoked with:
+    /// * `aggregate_function`: [AggregateFunction] with already simplified
+    ///   arguments
+    /// * `info`: [crate::simplify::SimplifyContext]
     ///
-    /// closure returns simplified [Expr] or an error.
+    /// The closure returns a simplified [Expr] or an error.
     ///
     /// # Notes
     ///
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 9705006e41..9286dd30b1 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -217,7 +217,7 @@ impl ScalarUDF {
         self.inner.return_field_from_args(args)
     }
 
-    /// Do the function rewrite
+    /// Returns this scalar function's simplification result.
     ///
     /// See [`ScalarUDFImpl::simplify`] for more details.
     pub fn simplify(
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 8f2b8a0d9b..7f86a69f87 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -157,7 +157,7 @@ impl WindowUDF {
         self.inner.signature()
     }
 
-    /// Do the function rewrite
+    /// Returns this window function's simplification hook, if any.
     ///
     /// See [`WindowUDFImpl::simplify`] for more details.
     pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
@@ -344,25 +344,28 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + 
Sync {
         partition_evaluator_args: PartitionEvaluatorArgs,
     ) -> Result<Box<dyn PartitionEvaluator>>;
 
-    /// Optionally apply per-UDWF simplification / rewrite rules.
+    /// Returns an optional hook for simplifying this user-defined window
+    /// function.
     ///
-    /// This can be used to apply function specific simplification rules during
-    /// optimization. The default implementation does nothing.
+    /// Use this hook to apply function-specific rewrites during optimization.
+    /// The default implementation returns `None`.
     ///
-    /// Note that DataFusion handles simplifying arguments and  "constant
-    /// folding" (replacing a function call with constant arguments such as
-    /// `my_add(1,2) --> 3` ). Thus, there is no need to implement such
-    /// optimizations manually for specific UDFs.
+    /// DataFusion already simplifies arguments and performs constant folding
+    /// (for example, `my_add(1, 2) -> 3`), so there is usually no need to
+    /// implement those optimizations manually for specific UDFs.
     ///
     /// Example:
     /// `advanced_udwf.rs`: 
<https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/udf/advanced_udwf.rs>
     ///
     /// # Returns
-    /// [None] if simplify is not defined or,
+    /// `None` if simplify is not defined.
     ///
-    /// Or, a closure with two arguments:
-    /// * 'window_function': [crate::expr::WindowFunction] for which 
simplified has been invoked
-    /// * 'info': [crate::simplify::SimplifyContext]
+    /// Or, a closure ([`WindowFunctionSimplification`]) invoked with:
+    /// * `window_function`: [WindowFunction] with already simplified
+    ///   arguments
+    /// * `info`: [crate::simplify::SimplifyContext]
+    ///
+    /// The closure returns a simplified [Expr] or an error.
     ///
     /// # Notes
     /// The returned expression must have the same schema as the original


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to