This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 23d7fff0f8 Improve documentation for 
AggregateUDFImpl::value_from_stats (#12689)
23d7fff0f8 is described below

commit 23d7fff0f88409f3183151b7a4f8441e9bcc08fd
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Oct 1 15:01:21 2024 -0400

    Improve documentation for AggregateUDFImpl::value_from_stats (#12689)
    
    * Improve documentation for AggregateUDFImpl::value_from_stats
    
    * Update datafusion/expr/src/udaf.rs
    
    Co-authored-by: Oleks V <[email protected]>
    
    ---------
    
    Co-authored-by: Oleks V <[email protected]>
---
 datafusion/expr/src/udaf.rs | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index d8592bce60..780ea36910 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -95,8 +95,11 @@ impl fmt::Display for AggregateUDF {
     }
 }
 
+/// Arguments passed to [`AggregateUDFImpl::value_from_stats`]
 pub struct StatisticsArgs<'a> {
+    /// The statistics of the aggregate input
     pub statistics: &'a Statistics,
+    /// The resolved return type of the aggregate function
     pub return_type: &'a DataType,
     /// Whether the aggregate function is distinct.
     ///
@@ -251,13 +254,16 @@ impl AggregateUDF {
     }
 
     /// Returns true if the function is max, false if the function is min
-    /// None in all other cases, used in certain optimizations or
+    /// None in all other cases, used in certain optimizations for
     /// or aggregate
-    ///
     pub fn is_descending(&self) -> Option<bool> {
         self.inner.is_descending()
     }
 
+    /// Return the value of this aggregate function if it can be determined
+    /// entirely from statistics and arguments.
+    ///
+    /// See [`AggregateUDFImpl::value_from_stats`] for more details.
     pub fn value_from_stats(
         &self,
         statistics_args: &StatisticsArgs,
@@ -577,7 +583,15 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
     fn is_descending(&self) -> Option<bool> {
         None
     }
-    // Return the value of the current UDF from the statistics
+
+    /// Return the value of this aggregate function if it can be determined
+    /// entirely from statistics and arguments.
+    ///
+    /// Using a [`ScalarValue`] rather than a runtime computation can 
significantly
+    /// improving query performance.
+    ///
+    /// For example, if the minimum value of column `x` is known to be `42` 
from
+    /// statistics, then the aggregate `MIN(x)` should return 
`Some(ScalarValue(42))`
     fn value_from_stats(&self, _statistics_args: &StatisticsArgs) -> 
Option<ScalarValue> {
         None
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to