(datafusion) branch main updated: Migrate documentation for Aggregate Functions to code (#12861)

alamb Fri, 11 Oct 2024 06:58:53 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new a8d3fae21d Migrate documentation for Aggregate Functions to code 
(#12861)
a8d3fae21d is described below

commit a8d3fae21d12a9caafad77a11b8a22bcfe9555fd
Author: Jonathan Chen <[email protected]>
AuthorDate: Fri Oct 11 09:58:41 2024 -0400

    Migrate documentation for Aggregate Functions to code (#12861)
    
    * aggregate function migration
    
    * fmt fix
---
 .../functions-aggregate/src/approx_distinct.rs     |  35 +-
 .../functions-aggregate/src/approx_median.rs       |  35 +-
 .../src/approx_percentile_cont.rs                  |  37 +-
 .../src/approx_percentile_cont_with_weight.rs      |  38 +-
 datafusion/functions-aggregate/src/array_agg.rs    |  34 +-
 datafusion/functions-aggregate/src/average.rs      |  36 +-
 datafusion/functions-aggregate/src/bool_and_or.rs  |  59 +-
 datafusion/functions-aggregate/src/correlation.rs  |  34 +-
 datafusion/functions-aggregate/src/count.rs        |  41 +-
 datafusion/functions-aggregate/src/covariance.rs   |  62 ++-
 datafusion/functions-aggregate/src/first_last.rs   |  63 ++-
 datafusion/functions-aggregate/src/grouping.rs     |  39 +-
 datafusion/functions-aggregate/src/median.rs       |  36 +-
 datafusion/functions-aggregate/src/min_max.rs      |  62 ++-
 datafusion/functions-aggregate/src/nth_value.rs    |  41 +-
 datafusion/functions-aggregate/src/stddev.rs       |  62 ++-
 datafusion/functions-aggregate/src/string_agg.rs   |  35 +-
 datafusion/functions-aggregate/src/sum.rs          |  34 +-
 docs/source/user-guide/sql/aggregate_functions.md  | 329 -----------
 .../user-guide/sql/aggregate_functions_new.md      | 612 +++++++++++++++++++++
 20 files changed, 1363 insertions(+), 361 deletions(-)

diff --git a/datafusion/functions-aggregate/src/approx_distinct.rs 
b/datafusion/functions-aggregate/src/approx_distinct.rs
index cf8217fe98..efa9a6d8da 100644
--- a/datafusion/functions-aggregate/src/approx_distinct.rs
+++ b/datafusion/functions-aggregate/src/approx_distinct.rs
@@ -31,13 +31,17 @@ use datafusion_common::ScalarValue;
 use datafusion_common::{
     downcast_value, internal_err, not_impl_err, DataFusionError, Result,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
-use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
+};
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
 use std::hash::Hash;
 use std::marker::PhantomData;
+use std::sync::OnceLock;
 make_udaf_expr_and_func!(
     ApproxDistinct,
     approx_distinct,
@@ -303,4 +307,33 @@ impl AggregateUDFImpl for ApproxDistinct {
         };
         Ok(accumulator)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_approx_distinct_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_approx_distinct_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_APPROXIMATE)
+            .with_description(
+                "Returns the approximate number of distinct input values 
calculated using the HyperLogLog algorithm.",
+            )
+            .with_syntax_example("approx_distinct(expression)")
+            .with_sql_example(r#"```sql
+> SELECT approx_distinct(column_name) FROM table_name;
++-----------------------------------+
+| approx_distinct(column_name)      |
++-----------------------------------+
+| 42                                |
++-----------------------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions-aggregate/src/approx_median.rs 
b/datafusion/functions-aggregate/src/approx_median.rs
index 7a7b124325..dd5bb8d441 100644
--- a/datafusion/functions-aggregate/src/approx_median.rs
+++ b/datafusion/functions-aggregate/src/approx_median.rs
@@ -19,15 +19,19 @@
 
 use std::any::Any;
 use std::fmt::Debug;
+use std::sync::OnceLock;
 
 use arrow::{datatypes::DataType, datatypes::Field};
 use arrow_schema::DataType::{Float64, UInt64};
 
 use datafusion_common::{not_impl_err, plan_err, Result};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::NUMERICS;
 use datafusion_expr::utils::format_state_name;
-use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
+};
 
 use crate::approx_percentile_cont::ApproxPercentileAccumulator;
 
@@ -116,4 +120,33 @@ impl AggregateUDFImpl for ApproxMedian {
             acc_args.exprs[0].data_type(acc_args.schema)?,
         )))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_approx_median_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_approx_median_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_APPROXIMATE)
+            .with_description(
+                "Returns the approximate median (50th percentile) of input 
values. It is an alias of `approx_percentile_cont(x, 0.5)`.",
+            )
+            .with_syntax_example("approx_median(expression)")
+            .with_sql_example(r#"```sql
+> SELECT approx_median(column_name) FROM table_name;
++-----------------------------------+
+| approx_median(column_name)        |
++-----------------------------------+
+| 23.5                              |
++-----------------------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs 
b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 5578aebbf4..b4488d6d9e 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -17,7 +17,7 @@
 
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::{Array, RecordBatch};
 use arrow::compute::{filter, is_not_null};
@@ -34,12 +34,13 @@ use datafusion_common::{
     downcast_value, internal_err, not_impl_datafusion_err, not_impl_err, 
plan_err,
     DataFusionError, Result, ScalarValue,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, ColumnarValue, Expr, Signature, 
TypeSignature,
-    Volatility,
+    Accumulator, AggregateUDFImpl, ColumnarValue, Documentation, Expr, 
Signature,
+    TypeSignature, Volatility,
 };
 use datafusion_functions_aggregate_common::tdigest::{
     TDigest, TryIntoF64, DEFAULT_MAX_SIZE,
@@ -268,6 +269,36 @@ impl AggregateUDFImpl for ApproxPercentileCont {
         }
         Ok(arg_types[0].clone())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_approx_percentile_cont_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_approx_percentile_cont_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_APPROXIMATE)
+            .with_description(
+                "Returns the approximate percentile of input values using the 
t-digest algorithm.",
+            )
+            .with_syntax_example("approx_percentile_cont(expression, 
percentile, centroids)")
+            .with_sql_example(r#"```sql
+> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
++-------------------------------------------------+
+| approx_percentile_cont(column_name, 0.75, 100)  |
++-------------------------------------------------+
+| 65.0                                            |
++-------------------------------------------------+
+```"#)
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("percentile", "Percentile to compute. Must be a 
float value between 0 and 1 (inclusive).")
+            .with_argument("centroids", "Number of centroids to use in the 
t-digest algorithm. _Default is 100_. A higher number results in more accurate 
approximation but requires more memory.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git 
a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs 
b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
index fee67ba162..8cbf9587a7 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
@@ -17,7 +17,7 @@
 
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::{
     array::ArrayRef,
@@ -26,10 +26,13 @@ use arrow::{
 
 use datafusion_common::ScalarValue;
 use datafusion_common::{not_impl_err, plan_err, Result};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_APPROXIMATE;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::NUMERICS;
 use datafusion_expr::Volatility::Immutable;
-use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, TypeSignature};
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Signature, TypeSignature,
+};
 use datafusion_functions_aggregate_common::tdigest::{
     Centroid, TDigest, DEFAULT_MAX_SIZE,
 };
@@ -151,6 +154,37 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
         self.approx_percentile_cont.state_fields(args)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_approx_percentile_cont_with_weight_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_approx_percentile_cont_with_weight_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_APPROXIMATE)
+            .with_description(
+                "Returns the weighted approximate percentile of input values 
using the t-digest algorithm.",
+            )
+            
.with_syntax_example("approx_percentile_cont_with_weight(expression, weight, 
percentile)")
+            .with_sql_example(r#"```sql
+> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) 
FROM table_name;
++----------------------------------------------------------------------+
+| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
++----------------------------------------------------------------------+
+| 78.5                                                                 |
++----------------------------------------------------------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("weight", "Expression to use as weight. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("percentile", "Percentile to compute. Must be a 
float value between 0 and 1 (inclusive).")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/array_agg.rs 
b/datafusion/functions-aggregate/src/array_agg.rs
index 15146fc4a2..b44c4e6874 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -25,15 +25,16 @@ use datafusion_common::cast::as_list_array;
 use datafusion_common::utils::{array_into_list_array_nullable, get_row_at_idx};
 use datafusion_common::{exec_err, ScalarValue};
 use datafusion_common::{internal_err, Result};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
-use datafusion_expr::AggregateUDFImpl;
 use datafusion_expr::{Accumulator, Signature, Volatility};
+use datafusion_expr::{AggregateUDFImpl, Documentation};
 use datafusion_functions_aggregate_common::merge_arrays::merge_ordered_arrays;
 use datafusion_functions_aggregate_common::utils::ordering_fields;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, 
PhysicalSortExpr};
 use std::collections::{HashSet, VecDeque};
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 make_udaf_expr_and_func!(
     ArrayAgg,
@@ -142,6 +143,35 @@ impl AggregateUDFImpl for ArrayAgg {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Reversed(array_agg_udaf())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_array_agg_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_array_agg_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns an array created from the expression elements. If 
ordering is required, elements are inserted in the specified order.",
+            )
+            .with_syntax_example("array_agg(expression [ORDER BY expression])")
+            .with_sql_example(r#"```sql
+> SELECT array_agg(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| array_agg(column_name ORDER BY other_column)  |
++-----------------------------------------------+
+| [element1, element2, element3]                |
++-----------------------------------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/average.rs 
b/datafusion/functions-aggregate/src/average.rs
index ddad76a873..ad58eecdf9 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -28,12 +28,14 @@ use arrow::datatypes::{
     Float64Type, UInt64Type,
 };
 use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::{avg_return_type, 
coerce_avg_type};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, 
Signature,
+    Accumulator, AggregateUDFImpl, Documentation, EmitTo, GroupsAccumulator,
+    ReversedUDAF, Signature,
 };
 
 use 
datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState;
@@ -45,7 +47,7 @@ use 
datafusion_functions_aggregate_common::utils::DecimalAverager;
 use log::debug;
 use std::any::Any;
 use std::fmt::Debug;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 make_udaf_expr_and_func!(
     Avg,
@@ -235,6 +237,36 @@ impl AggregateUDFImpl for Avg {
         }
         coerce_avg_type(self.name(), arg_types)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_avg_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_avg_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the average of numeric values in the specified 
column.",
+            )
+            .with_syntax_example("avg(expression)")
+            .with_sql_example(r#"```sql
+> SELECT avg(column_name) FROM table_name;
++---------------------------+
+| avg(column_name)           |
++---------------------------+
+| 42.75                      |
++---------------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .with_argument("Aliases: ", "`mean`")
+            .build()
+            .unwrap()
+    })
 }
 
 /// An accumulator to compute the average
diff --git a/datafusion/functions-aggregate/src/bool_and_or.rs 
b/datafusion/functions-aggregate/src/bool_and_or.rs
index 7cc7d9ff7f..e212ba8d61 100644
--- a/datafusion/functions-aggregate/src/bool_and_or.rs
+++ b/datafusion/functions-aggregate/src/bool_and_or.rs
@@ -18,6 +18,7 @@
 //! Defines physical expressions that can evaluated at runtime during query 
execution
 
 use std::any::Any;
+use std::sync::OnceLock;
 
 use arrow::array::ArrayRef;
 use arrow::array::BooleanArray;
@@ -29,10 +30,12 @@ use arrow::datatypes::Field;
 use datafusion_common::internal_err;
 use datafusion_common::{downcast_value, not_impl_err};
 use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, GroupsAccumulator, ReversedUDAF, Signature, 
Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, 
ReversedUDAF,
+    Signature, Volatility,
 };
 
 use 
datafusion_functions_aggregate_common::aggregate::groups_accumulator::bool_op::BooleanGroupsAccumulator;
@@ -172,6 +175,34 @@ impl AggregateUDFImpl for BoolAnd {
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Identical
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_bool_and_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_bool_and_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns true if all non-null input values are true, otherwise 
false.",
+            )
+            .with_syntax_example("bool_and(expression)")
+            .with_sql_example(r#"```sql
+> SELECT bool_and(column_name) FROM table_name;
++----------------------------+
+| bool_and(column_name)       |
++----------------------------+
+| true                        |
++----------------------------+
+```"#)
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug, Default)]
@@ -293,6 +324,32 @@ impl AggregateUDFImpl for BoolOr {
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Identical
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_bool_or_doc())
+    }
+}
+
+fn get_bool_or_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns true if any non-null input value is true, otherwise 
false.",
+            )
+            .with_syntax_example("bool_or(expression)")
+            .with_sql_example(r#"```sql
+> SELECT bool_or(column_name) FROM table_name;
++----------------------------+
+| bool_or(column_name)        |
++----------------------------+
+| true                        |
++----------------------------+
+```"#)
+            .with_standard_argument("expression", "Expression to operate on. 
Can be a constant, column, or function, and any combination of arithmetic 
operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug, Default)]
diff --git a/datafusion/functions-aggregate/src/correlation.rs 
b/datafusion/functions-aggregate/src/correlation.rs
index 88f01b06d2..60be3608e9 100644
--- a/datafusion/functions-aggregate/src/correlation.rs
+++ b/datafusion/functions-aggregate/src/correlation.rs
@@ -19,7 +19,7 @@
 
 use std::any::Any;
 use std::fmt::Debug;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::compute::{and, filter, is_not_null};
 use arrow::{
@@ -30,11 +30,12 @@ use arrow::{
 use crate::covariance::CovarianceAccumulator;
 use crate::stddev::StddevAccumulator;
 use datafusion_common::{plan_err, Result, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
 use datafusion_expr::{
     function::{AccumulatorArgs, StateFieldsArgs},
     type_coercion::aggregates::NUMERICS,
     utils::format_state_name,
-    Accumulator, AggregateUDFImpl, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
 };
 use datafusion_functions_aggregate_common::stats::StatsType;
 
@@ -107,6 +108,35 @@ impl AggregateUDFImpl for Correlation {
             ),
         ])
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_corr_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_corr_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the coefficient of correlation between two numeric 
values.",
+            )
+            .with_syntax_example("corr(expression1, expression2)")
+            .with_sql_example(r#"```sql
+> SELECT corr(column1, column2) FROM table_name;
++--------------------------------+
+| corr(column1, column2)         |
++--------------------------------+
+| 0.85                           |
++--------------------------------+
+```"#)
+            .with_argument("expression1", "First expression to operate on. Can 
be a constant, column, or function, and any combination of arithmetic 
operators.")
+            .with_argument("expression2", "Second expression to operate on. 
Can be a constant, column, or function, and any combination of arithmetic 
operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 /// An accumulator to compute correlation
diff --git a/datafusion/functions-aggregate/src/count.rs 
b/datafusion/functions-aggregate/src/count.rs
index cc245b3572..23dd5b65bf 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -20,8 +20,9 @@ use datafusion_common::stats::Precision;
 use 
datafusion_functions_aggregate_common::aggregate::count_distinct::BytesViewDistinctCountAccumulator;
 use datafusion_physical_expr::expressions;
 use std::collections::HashSet;
+use std::fmt::Debug;
 use std::ops::BitAnd;
-use std::{fmt::Debug, sync::Arc};
+use std::sync::{Arc, OnceLock};
 
 use arrow::{
     array::{ArrayRef, AsArray},
@@ -43,10 +44,11 @@ use arrow::{
 use datafusion_common::{
     downcast_value, internal_err, not_impl_err, DataFusionError, Result, 
ScalarValue,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::{
     function::AccumulatorArgs, utils::format_state_name, Accumulator, 
AggregateUDFImpl,
-    EmitTo, GroupsAccumulator, Signature, Volatility,
+    Documentation, EmitTo, GroupsAccumulator, Signature, Volatility,
 };
 use datafusion_expr::{Expr, ReversedUDAF, StatisticsArgs, TypeSignature};
 use datafusion_functions_aggregate_common::aggregate::count_distinct::{
@@ -324,6 +326,41 @@ impl AggregateUDFImpl for Count {
         }
         None
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_count_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_count_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the number of non-null values in the specified 
column. To include null values in the total count, use `count(*)`.",
+            )
+            .with_syntax_example("count(expression)")
+            .with_sql_example(r#"```sql
+> SELECT count(column_name) FROM table_name;
++-----------------------+
+| count(column_name)     |
++-----------------------+
+| 100                   |
++-----------------------+
+
+> SELECT count(*) FROM table_name;
++------------------+
+| count(*)         |
++------------------+
+| 120              |
++------------------+
+```"#)
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/covariance.rs 
b/datafusion/functions-aggregate/src/covariance.rs
index d0abb079ef..c599b58ed2 100644
--- a/datafusion/functions-aggregate/src/covariance.rs
+++ b/datafusion/functions-aggregate/src/covariance.rs
@@ -18,6 +18,7 @@
 //! [`CovarianceSample`]: covariance sample aggregations.
 
 use std::fmt::Debug;
+use std::sync::OnceLock;
 
 use arrow::{
     array::{ArrayRef, Float64Array, UInt64Array},
@@ -29,11 +30,12 @@ use datafusion_common::{
     downcast_value, plan_err, unwrap_or_internal_err, DataFusionError, Result,
     ScalarValue,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
 use datafusion_expr::{
     function::{AccumulatorArgs, StateFieldsArgs},
     type_coercion::aggregates::NUMERICS,
     utils::format_state_name,
-    Accumulator, AggregateUDFImpl, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
 };
 use datafusion_functions_aggregate_common::stats::StatsType;
 
@@ -124,6 +126,36 @@ impl AggregateUDFImpl for CovarianceSample {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_covar_samp_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_covar_samp_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the sample covariance of a set of number pairs.",
+            )
+            .with_syntax_example("covar_samp(expression1, expression2)")
+            .with_sql_example(r#"```sql
+> SELECT covar_samp(column1, column2) FROM table_name;
++-----------------------------------+
+| covar_samp(column1, column2)      |
++-----------------------------------+
+| 8.25                              |
++-----------------------------------+
+```"#, 
+            )
+            .with_argument("expression1", "First expression to operate on. Can 
be a constant, column, or function, and any combination of arithmetic 
operators.")
+            .with_argument("expression2", "Second expression to operate on. 
Can be a constant, column, or function, and any combination of arithmetic 
operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 pub struct CovariancePopulation {
@@ -193,6 +225,34 @@ impl AggregateUDFImpl for CovariancePopulation {
             StatsType::Population,
         )?))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_covar_pop_doc())
+    }
+}
+
+fn get_covar_pop_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the population covariance of a set of number pairs.",
+            )
+            .with_syntax_example("covar_pop(expression1, expression2)")
+            .with_sql_example(r#"```sql
+> SELECT covar_pop(column1, column2) FROM table_name;
++-----------------------------------+
+| covar_pop(column1, column2)       |
++-----------------------------------+
+| 7.63                              |
++-----------------------------------+
+```"#, 
+            )
+            .with_argument("expression1", "First expression to operate on. Can 
be a constant, column, or function, and any combination of arithmetic 
operators.")
+            .with_argument("expression2", "Second expression to operate on. 
Can be a constant, column, or function, and any combination of arithmetic 
operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 /// An accumulator to compute covariance
diff --git a/datafusion/functions-aggregate/src/first_last.rs 
b/datafusion/functions-aggregate/src/first_last.rs
index 41ac787579..02b8c52282 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -19,7 +19,7 @@
 
 use std::any::Any;
 use std::fmt::Debug;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::{ArrayRef, AsArray, BooleanArray};
 use arrow::compute::{self, lexsort_to_indices, SortColumn};
@@ -28,11 +28,12 @@ use datafusion_common::utils::{compare_rows, 
get_row_at_idx, take_arrays};
 use datafusion_common::{
     arrow_datafusion_err, internal_err, DataFusionError, Result, ScalarValue,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, ArrayFunctionSignature, Expr, 
ExprFunctionExt,
-    Signature, SortExpr, TypeSignature, Volatility,
+    Accumulator, AggregateUDFImpl, ArrayFunctionSignature, Documentation, Expr,
+    ExprFunctionExt, Signature, SortExpr, TypeSignature, Volatility,
 };
 use datafusion_functions_aggregate_common::utils::get_sort_options;
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, 
PhysicalSortExpr};
@@ -165,6 +166,35 @@ impl AggregateUDFImpl for FirstValue {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Reversed(last_value_udaf())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_first_value_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_first_value_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the first element in an aggregation group according 
to the requested ordering. If no ordering is given, returns an arbitrary 
element from the group.",
+            )
+            .with_syntax_example("first_value(expression [ORDER BY 
expression])")
+            .with_sql_example(r#"```sql
+> SELECT first_value(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| first_value(column_name ORDER BY other_column)|
++-----------------------------------------------+
+| first_element                                 |
++-----------------------------------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
@@ -466,6 +496,33 @@ impl AggregateUDFImpl for LastValue {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Reversed(first_value_udaf())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_last_value_doc())
+    }
+}
+
+fn get_last_value_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the last element in an aggregation group according to 
the requested ordering. If no ordering is given, returns an arbitrary element 
from the group.",
+            )
+            .with_syntax_example("last_value(expression [ORDER BY 
expression])")
+            .with_sql_example(r#"```sql
+> SELECT last_value(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| last_value(column_name ORDER BY other_column) |
++-----------------------------------------------+
+| last_element                                  |
++-----------------------------------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/grouping.rs 
b/datafusion/functions-aggregate/src/grouping.rs
index 6fb7c3800f..09e9b90b2e 100644
--- a/datafusion/functions-aggregate/src/grouping.rs
+++ b/datafusion/functions-aggregate/src/grouping.rs
@@ -19,14 +19,18 @@
 
 use std::any::Any;
 use std::fmt;
+use std::sync::OnceLock;
 
 use arrow::datatypes::DataType;
 use arrow::datatypes::Field;
 use datafusion_common::{not_impl_err, Result};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::AccumulatorArgs;
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::utils::format_state_name;
-use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Signature, Volatility,
+};
 
 make_udaf_expr_and_func!(
     Grouping,
@@ -94,4 +98,37 @@ impl AggregateUDFImpl for Grouping {
             "physical plan is not yet implemented for GROUPING aggregate 
function"
         )
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_grouping_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_grouping_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns 1 if the data is aggregated across the specified 
column, or 0 if it is not aggregated in the result set.",
+            )
+            .with_syntax_example("grouping(expression)")
+            .with_sql_example(r#"```sql
+> SELECT column_name, GROUPING(column_name) AS group_column
+  FROM table_name
+  GROUP BY GROUPING SETS ((column_name), ());
++-------------+-------------+
+| column_name | group_column |
++-------------+-------------+
+| value1      | 0           |
+| value2      | 0           |
+| NULL        | 1           |
++-------------+-------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to evaluate whether data 
is aggregated across the specified column. Can be a constant, column, or 
function.")
+            .build()
+            .unwrap()
+    })
 }
diff --git a/datafusion/functions-aggregate/src/median.rs 
b/datafusion/functions-aggregate/src/median.rs
index 7dd0de14c3..8eb17db1ec 100644
--- a/datafusion/functions-aggregate/src/median.rs
+++ b/datafusion/functions-aggregate/src/median.rs
@@ -16,8 +16,8 @@
 // under the License.
 
 use std::collections::HashSet;
-use std::fmt::Formatter;
-use std::{fmt::Debug, sync::Arc};
+use std::fmt::{Debug, Formatter};
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::{downcast_integer, ArrowNumericType};
 use arrow::{
@@ -33,10 +33,11 @@ use arrow::array::ArrowNativeTypeOp;
 use arrow::datatypes::ArrowNativeType;
 
 use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::{
     function::AccumulatorArgs, utils::format_state_name, Accumulator, 
AggregateUDFImpl,
-    Signature, Volatility,
+    Documentation, Signature, Volatility,
 };
 use datafusion_functions_aggregate_common::utils::Hashable;
 
@@ -152,6 +153,35 @@ impl AggregateUDFImpl for Median {
     fn aliases(&self) -> &[String] {
         &[]
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_median_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_median_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the median value in the specified column.",
+            )
+            .with_syntax_example("median(expression)")
+            .with_sql_example(r#"```sql
+> SELECT median(column_name) FROM table_name;
++----------------------+
+| median(column_name)   |
++----------------------+
+| 45.5                 |
++----------------------+
+```"#,
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 /// The median accumulator accumulates the raw input values
diff --git a/datafusion/functions-aggregate/src/min_max.rs 
b/datafusion/functions-aggregate/src/min_max.rs
index 7d2e8e66e2..3d2915fd09 100644
--- a/datafusion/functions-aggregate/src/min_max.rs
+++ b/datafusion/functions-aggregate/src/min_max.rs
@@ -38,6 +38,7 @@ use datafusion_common::stats::Precision;
 use datafusion_common::{
     downcast_value, exec_err, internal_err, ColumnStatistics, DataFusionError, 
Result,
 };
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use 
datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
 use datafusion_physical_expr::expressions;
 use std::fmt::Debug;
@@ -51,11 +52,13 @@ use arrow::datatypes::{
 
 use datafusion_common::ScalarValue;
 use datafusion_expr::{
-    function::AccumulatorArgs, Accumulator, AggregateUDFImpl, Signature, 
Volatility,
+    function::AccumulatorArgs, Accumulator, AggregateUDFImpl, Documentation, 
Signature,
+    Volatility,
 };
 use datafusion_expr::{GroupsAccumulator, StatisticsArgs};
 use half::f16;
 use std::ops::Deref;
+use std::sync::OnceLock;
 
 fn get_min_max_result_type(input_types: &[DataType]) -> Result<Vec<DataType>> {
     // make sure that the input types only has one element.
@@ -321,6 +324,35 @@ impl AggregateUDFImpl for Max {
     fn value_from_stats(&self, statistics_args: &StatisticsArgs) -> 
Option<ScalarValue> {
         self.value_from_statistics(statistics_args)
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_max_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_max_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the maximum value in the specified column.",
+            )
+            .with_syntax_example("max(expression)")
+            .with_sql_example(r#"```sql
+> SELECT max(column_name) FROM table_name;
++----------------------+
+| max(column_name)      |
++----------------------+
+| 150                  |
++----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 // Statically-typed version of min/max(array) -> ScalarValue for string types
@@ -1119,7 +1151,35 @@ impl AggregateUDFImpl for Min {
     fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
         datafusion_expr::ReversedUDAF::Identical
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_min_doc())
+    }
+}
+
+fn get_min_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the minimum value in the specified column.",
+            )
+            .with_syntax_example("min(expression)")
+            .with_sql_example(r#"```sql
+> SELECT min(column_name) FROM table_name;
++----------------------+
+| min(column_name)      |
++----------------------+
+| 12                   |
++----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
+
 /// An accumulator to compute the minimum value
 #[derive(Debug)]
 pub struct MinAccumulator {
diff --git a/datafusion/functions-aggregate/src/nth_value.rs 
b/datafusion/functions-aggregate/src/nth_value.rs
index bbfe56914c..6d8cea8f05 100644
--- a/datafusion/functions-aggregate/src/nth_value.rs
+++ b/datafusion/functions-aggregate/src/nth_value.rs
@@ -20,18 +20,19 @@
 
 use std::any::Any;
 use std::collections::VecDeque;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::{new_empty_array, ArrayRef, AsArray, StructArray};
 use arrow_schema::{DataType, Field, Fields};
 
 use datafusion_common::utils::{array_into_list_array_nullable, get_row_at_idx};
 use datafusion_common::{exec_err, internal_err, not_impl_err, Result, 
ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    lit, Accumulator, AggregateUDFImpl, ExprFunctionExt, ReversedUDAF, 
Signature,
-    SortExpr, Volatility,
+    lit, Accumulator, AggregateUDFImpl, Documentation, ExprFunctionExt, 
ReversedUDAF,
+    Signature, SortExpr, Volatility,
 };
 use datafusion_functions_aggregate_common::merge_arrays::merge_ordered_arrays;
 use datafusion_functions_aggregate_common::utils::ordering_fields;
@@ -161,6 +162,40 @@ impl AggregateUDFImpl for NthValueAgg {
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Reversed(nth_value_udaf())
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_nth_value_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_nth_value_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the nth value in a group of values.",
+            )
+            .with_syntax_example("nth_value(expression, n ORDER BY 
expression)")
+            .with_sql_example(r#"```sql
+> SELECT dept_id, salary, NTH_VALUE(salary, 2) OVER (PARTITION BY dept_id 
ORDER BY salary ASC) AS second_salary_by_dept
+  FROM employee;
++---------+--------+-------------------------+
+| dept_id | salary | second_salary_by_dept   |
++---------+--------+-------------------------+
+| 1       | 30000  | NULL                    |
+| 1       | 40000  | 40000                   |
+| 1       | 50000  | 40000                   |
+| 2       | 35000  | NULL                    |
+| 2       | 45000  | 45000                   |
++---------+--------+-------------------------+
+```"#)
+            .with_standard_argument("expression", "The column or expression to 
retrieve the nth value from.")
+            .with_argument("n", "The position (nth) of the value to retrieve, 
based on the ordering.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/stddev.rs 
b/datafusion/functions-aggregate/src/stddev.rs
index a25ab5e319..9f9da0c585 100644
--- a/datafusion/functions-aggregate/src/stddev.rs
+++ b/datafusion/functions-aggregate/src/stddev.rs
@@ -19,17 +19,19 @@
 
 use std::any::Any;
 use std::fmt::{Debug, Formatter};
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use arrow::array::Float64Array;
 use arrow::{array::ArrayRef, datatypes::DataType, datatypes::Field};
 
 use datafusion_common::{internal_err, not_impl_err, Result};
 use datafusion_common::{plan_err, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_STATISTICAL;
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, GroupsAccumulator, Signature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, Signature,
+    Volatility,
 };
 use datafusion_functions_aggregate_common::stats::StatsType;
 
@@ -132,6 +134,35 @@ impl AggregateUDFImpl for Stddev {
     ) -> Result<Box<dyn GroupsAccumulator>> {
         Ok(Box::new(StddevGroupsAccumulator::new(StatsType::Sample)))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_stddev_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_stddev_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the standard deviation of a set of numbers.",
+            )
+            .with_syntax_example("stddev(expression)")
+            .with_sql_example(r#"```sql
+> SELECT stddev(column_name) FROM table_name;
++----------------------+
+| stddev(column_name)   |
++----------------------+
+| 12.34                |
++----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 make_udaf_expr_and_func!(
@@ -228,6 +259,33 @@ impl AggregateUDFImpl for StddevPop {
             StatsType::Population,
         )))
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_stddev_pop_doc())
+    }
+}
+
+fn get_stddev_pop_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_STATISTICAL)
+            .with_description(
+                "Returns the population standard deviation of a set of 
numbers.",
+            )
+            .with_syntax_example("stddev_pop(expression)")
+            .with_sql_example(r#"```sql
+> SELECT stddev_pop(column_name) FROM table_name;
++--------------------------+
+| stddev_pop(column_name)   |
++--------------------------+
+| 10.56                    |
++--------------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 /// An accumulator to compute the average
diff --git a/datafusion/functions-aggregate/src/string_agg.rs 
b/datafusion/functions-aggregate/src/string_agg.rs
index a7e9a37e23..66fc199106 100644
--- a/datafusion/functions-aggregate/src/string_agg.rs
+++ b/datafusion/functions-aggregate/src/string_agg.rs
@@ -22,12 +22,14 @@ use arrow_schema::DataType;
 use datafusion_common::cast::as_generic_string_array;
 use datafusion_common::Result;
 use datafusion_common::{not_impl_err, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::AccumulatorArgs;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, Signature, TypeSignature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, Signature, TypeSignature, 
Volatility,
 };
 use datafusion_physical_expr::expressions::Literal;
 use std::any::Any;
+use std::sync::OnceLock;
 
 make_udaf_expr_and_func!(
     StringAgg,
@@ -98,6 +100,37 @@ impl AggregateUDFImpl for StringAgg {
 
         not_impl_err!("expect literal")
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_string_agg_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_string_agg_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Concatenates the values of string expressions and places 
separator values between them."
+            )
+            .with_syntax_example("string_agg(expression, delimiter)")
+            .with_sql_example(r#"```sql
+> SELECT string_agg(name, ', ') AS names_list
+  FROM employee;
++--------------------------+
+| names_list               |
++--------------------------+
+| Alice, Bob, Charlie      |
++--------------------------+
+```"#, 
+            )
+            .with_argument("expression", "The string expression to 
concatenate. Can be a column or any valid string expression.")
+            .with_argument("delimiter", "A literal string used as a separator 
between the concatenated values.")
+            .build()
+            .unwrap()
+    })
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions-aggregate/src/sum.rs 
b/datafusion/functions-aggregate/src/sum.rs
index 7e40c1bd17..91e777dd2a 100644
--- a/datafusion/functions-aggregate/src/sum.rs
+++ b/datafusion/functions-aggregate/src/sum.rs
@@ -21,6 +21,7 @@ use ahash::RandomState;
 use datafusion_expr::utils::AggregateOrderSensitivity;
 use std::any::Any;
 use std::collections::HashSet;
+use std::sync::OnceLock;
 
 use arrow::array::Array;
 use arrow::array::ArrowNativeTypeOp;
@@ -33,11 +34,13 @@ use arrow::datatypes::{
 };
 use arrow::{array::ArrayRef, datatypes::Field};
 use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
+use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
 use datafusion_expr::function::AccumulatorArgs;
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, GroupsAccumulator, ReversedUDAF, Signature, 
Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, 
ReversedUDAF,
+    Signature, Volatility,
 };
 use 
datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
 use datafusion_functions_aggregate_common::utils::Hashable;
@@ -233,6 +236,35 @@ impl AggregateUDFImpl for Sum {
     fn order_sensitivity(&self) -> AggregateOrderSensitivity {
         AggregateOrderSensitivity::Insensitive
     }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        Some(get_sum_doc())
+    }
+}
+
+static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
+
+fn get_sum_doc() -> &'static Documentation {
+    DOCUMENTATION.get_or_init(|| {
+        Documentation::builder()
+            .with_doc_section(DOC_SECTION_GENERAL)
+            .with_description(
+                "Returns the sum of all values in the specified column.",
+            )
+            .with_syntax_example("sum(expression)")
+            .with_sql_example(r#"```sql
+> SELECT sum(column_name) FROM table_name;
++-----------------------+
+| sum(column_name)       |
++-----------------------+
+| 12345                 |
++-----------------------+
+```"#, 
+            )
+            .with_argument("expression", "Expression to operate on. Can be a 
constant, column, or function, and any combination of arithmetic operators.")
+            .build()
+            .unwrap()
+    })
 }
 
 /// This accumulator computes SUM incrementally
diff --git a/docs/source/user-guide/sql/aggregate_functions.md 
b/docs/source/user-guide/sql/aggregate_functions.md
index 1c25874c08..4f774fe6d0 100644
--- a/docs/source/user-guide/sql/aggregate_functions.md
+++ b/docs/source/user-guide/sql/aggregate_functions.md
@@ -27,183 +27,9 @@ the rest of the documentation.
 
 [automatically created from the codebase]: 
https://github.com/apache/datafusion/issues/12740
 
-## General
-
-- [avg](#avg)
-- [bool_and](#bool_and)
-- [bool_or](#bool_or)
-- [count](#count)
-- [max](#max)
-- [mean](#mean)
-- [median](#median)
-- [min](#min)
-- [sum](#sum)
-- [array_agg](#array_agg)
-- [first_value](#first_value)
-- [last_value](#last_value)
-
-### `avg`
-
-Returns the average of numeric values in the specified column.
-
-```
-avg(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-#### Aliases
-
-- `mean`
-
-### `bool_and`
-
-Returns true if all non-null input values are true, otherwise false.
-
-```
-bool_and(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `bool_or`
-
-Returns true if any non-null input value is true, otherwise false.
-
-```
-bool_or(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `count`
-
-Returns the number of non-null values in the specified column.
-
-To include _null_ values in the total count, use `count(*)`.
-
-```
-count(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `max`
-
-Returns the maximum value in the specified column.
-
-```
-max(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `mean`
-
-_Alias of [avg](#avg)._
-
-### `median`
-
-Returns the median value in the specified column.
-
-```
-median(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `min`
-
-Returns the minimum value in the specified column.
-
-```
-min(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `sum`
-
-Returns the sum of all values in the specified column.
-
-```
-sum(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `array_agg`
-
-Returns an array created from the expression elements. If ordering requirement 
is given, elements are inserted in the order of required ordering.
-
-```
-array_agg(expression [ORDER BY expression])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `first_value`
-
-Returns the first element in an aggregation group according to the requested 
ordering. If no ordering is given, returns an arbitrary element from the group.
-
-```
-first_value(expression [ORDER BY expression])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `last_value`
-
-Returns the last element in an aggregation group according to the requested 
ordering. If no ordering is given, returns an arbitrary element from the group.
-
-```
-last_value(expression [ORDER BY expression])
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
 ## Statistical
 
-- [corr](#corr)
 - [covar](#covar)
-- [covar_pop](#covar_pop)
-- [covar_samp](#covar_samp)
-- [stddev](#stddev)
-- [stddev_pop](#stddev_pop)
-- [stddev_samp](#stddev_samp)
 - [regr_avgx](#regr_avgx)
 - [regr_avgy](#regr_avgy)
 - [regr_count](#regr_count)
@@ -214,21 +40,6 @@ last_value(expression [ORDER BY expression])
 - [regr_syy](#regr_syy)
 - [regr_sxy](#regr_sxy)
 
-### `corr`
-
-Returns the coefficient of correlation between two numeric values.
-
-```
-corr(expression1, expression2)
-```
-
-#### Arguments
-
-- **expression1**: First expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-- **expression2**: Second expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
 ### `covar`
 
 Returns the covariance of a set of number pairs.
@@ -244,75 +55,6 @@ covar(expression1, expression2)
 - **expression2**: Second expression to operate on.
   Can be a constant, column, or function, and any combination of arithmetic 
operators.
 
-### `covar_pop`
-
-Returns the population covariance of a set of number pairs.
-
-```
-covar_pop(expression1, expression2)
-```
-
-#### Arguments
-
-- **expression1**: First expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-- **expression2**: Second expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `covar_samp`
-
-Returns the sample covariance of a set of number pairs.
-
-```
-covar_samp(expression1, expression2)
-```
-
-#### Arguments
-
-- **expression1**: First expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-- **expression2**: Second expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `stddev`
-
-Returns the standard deviation of a set of numbers.
-
-```
-stddev(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `stddev_pop`
-
-Returns the population standard deviation of a set of numbers.
-
-```
-stddev_pop(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `stddev_samp`
-
-Returns the sample standard deviation of a set of numbers.
-
-```
-stddev_samp(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
 ### `regr_slope`
 
 Returns the slope of the linear regression line for non-null pairs in 
aggregate columns.
@@ -448,74 +190,3 @@ regr_sxy(expression_y, expression_x)
   Can be a constant, column, or function, and any combination of arithmetic 
operators.
 - **expression_x**: Independent variable.
   Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-## Approximate
-
-- [approx_distinct](#approx_distinct)
-- [approx_median](#approx_median)
-- [approx_percentile_cont](#approx_percentile_cont)
-- [approx_percentile_cont_with_weight](#approx_percentile_cont_with_weight)
-
-### `approx_distinct`
-
-Returns the approximate number of distinct input values calculated using the
-HyperLogLog algorithm.
-
-```
-approx_distinct(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `approx_median`
-
-Returns the approximate median (50th percentile) of input values.
-It is an alias of `approx_percentile_cont(x, 0.5)`.
-
-```
-approx_median(expression)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-
-### `approx_percentile_cont`
-
-Returns the approximate percentile of input values using the t-digest 
algorithm.
-
-```
-approx_percentile_cont(expression, percentile, centroids)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-- **percentile**: Percentile to compute. Must be a float value between 0 and 1 
(inclusive).
-- **centroids**: Number of centroids to use in the t-digest algorithm. 
_Default is 100_.
-
-  If there are this number or fewer unique values, you can expect an exact 
result.
-  A higher number of centroids results in a more accurate approximation, but
-  requires more memory to compute.
-
-### `approx_percentile_cont_with_weight`
-
-Returns the weighted approximate percentile of input values using the
-t-digest algorithm.
-
-```
-approx_percentile_cont_with_weight(expression, weight, percentile)
-```
-
-#### Arguments
-
-- **expression**: Expression to operate on.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-- **weight**: Expression to use as weight.
-  Can be a constant, column, or function, and any combination of arithmetic 
operators.
-- **percentile**: Percentile to compute. Must be a float value between 0 and 1 
(inclusive).
diff --git a/docs/source/user-guide/sql/aggregate_functions_new.md 
b/docs/source/user-guide/sql/aggregate_functions_new.md
index 08cdb0a986..fc918c3b15 100644
--- a/docs/source/user-guide/sql/aggregate_functions_new.md
+++ b/docs/source/user-guide/sql/aggregate_functions_new.md
@@ -37,15 +37,80 @@ Aggregate functions operate on a set of values to compute a 
single result.
 
 ## General Functions
 
+- [array_agg](#array_agg)
+- [avg](#avg)
 - [bit_and](#bit_and)
 - [bit_or](#bit_or)
 - [bit_xor](#bit_xor)
+- [bool_and](#bool_and)
+- [bool_or](#bool_or)
+- [count](#count)
+- [first_value](#first_value)
+- [grouping](#grouping)
+- [last_value](#last_value)
+- [max](#max)
+- [mean](#mean)
+- [median](#median)
+- [min](#min)
+- [string_agg](#string_agg)
+- [sum](#sum)
 - [var](#var)
 - [var_pop](#var_pop)
 - [var_population](#var_population)
 - [var_samp](#var_samp)
 - [var_sample](#var_sample)
 
+### `array_agg`
+
+Returns an array created from the expression elements. If ordering is 
required, elements are inserted in the specified order.
+
+```
+array_agg(expression [ORDER BY expression])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT array_agg(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| array_agg(column_name ORDER BY other_column)  |
++-----------------------------------------------+
+| [element1, element2, element3]                |
++-----------------------------------------------+
+```
+
+### `avg`
+
+Returns the average of numeric values in the specified column.
+
+```
+avg(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+- **Aliases: **: `mean`
+
+#### Example
+
+```sql
+> SELECT avg(column_name) FROM table_name;
++---------------------------+
+| avg(column_name)           |
++---------------------------+
+| 42.75                      |
++---------------------------+
+```
+
+#### Aliases
+
+- mean
+
 ### `bit_and`
 
 Computes the bitwise AND of all non-null input values.
@@ -82,6 +147,276 @@ bit_xor(expression)
 
 - **expression**: Integer expression to operate on. Can be a constant, column, 
or function, and any combination of operators.
 
+### `bool_and`
+
+Returns true if all non-null input values are true, otherwise false.
+
+```
+bool_and(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT bool_and(column_name) FROM table_name;
++----------------------------+
+| bool_and(column_name)       |
++----------------------------+
+| true                        |
++----------------------------+
+```
+
+### `bool_or`
+
+Returns true if all non-null input values are true, otherwise false.
+
+```
+bool_and(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT bool_and(column_name) FROM table_name;
++----------------------------+
+| bool_and(column_name)       |
++----------------------------+
+| true                        |
++----------------------------+
+```
+
+### `count`
+
+Returns the number of non-null values in the specified column. To include null 
values in the total count, use `count(*)`.
+
+```
+count(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT count(column_name) FROM table_name;
++-----------------------+
+| count(column_name)     |
++-----------------------+
+| 100                   |
++-----------------------+
+
+> SELECT count(*) FROM table_name;
++------------------+
+| count(*)         |
++------------------+
+| 120              |
++------------------+
+```
+
+### `first_value`
+
+Returns the first element in an aggregation group according to the requested 
ordering. If no ordering is given, returns an arbitrary element from the group.
+
+```
+first_value(expression [ORDER BY expression])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT first_value(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| first_value(column_name ORDER BY other_column)|
++-----------------------------------------------+
+| first_element                                 |
++-----------------------------------------------+
+```
+
+### `grouping`
+
+Returns 1 if the data is aggregated across the specified column, or 0 if it is 
not aggregated in the result set.
+
+```
+grouping(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to evaluate whether data is aggregated across the 
specified column. Can be a constant, column, or function.
+
+#### Example
+
+```sql
+> SELECT column_name, GROUPING(column_name) AS group_column
+  FROM table_name
+  GROUP BY GROUPING SETS ((column_name), ());
++-------------+-------------+
+| column_name | group_column |
++-------------+-------------+
+| value1      | 0           |
+| value2      | 0           |
+| NULL        | 1           |
++-------------+-------------+
+```
+
+### `last_value`
+
+Returns the first element in an aggregation group according to the requested 
ordering. If no ordering is given, returns an arbitrary element from the group.
+
+```
+first_value(expression [ORDER BY expression])
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT first_value(column_name ORDER BY other_column) FROM table_name;
++-----------------------------------------------+
+| first_value(column_name ORDER BY other_column)|
++-----------------------------------------------+
+| first_element                                 |
++-----------------------------------------------+
+```
+
+### `max`
+
+Returns the maximum value in the specified column.
+
+```
+max(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT max(column_name) FROM table_name;
++----------------------+
+| max(column_name)      |
++----------------------+
+| 150                  |
++----------------------+
+```
+
+### `mean`
+
+_Alias of [avg](#avg)._
+
+### `median`
+
+Returns the median value in the specified column.
+
+```
+median(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT median(column_name) FROM table_name;
++----------------------+
+| median(column_name)   |
++----------------------+
+| 45.5                 |
++----------------------+
+```
+
+### `min`
+
+Returns the maximum value in the specified column.
+
+```
+max(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT max(column_name) FROM table_name;
++----------------------+
+| max(column_name)      |
++----------------------+
+| 150                  |
++----------------------+
+```
+
+### `string_agg`
+
+Concatenates the values of string expressions and places separator values 
between them.
+
+```
+string_agg(expression, delimiter)
+```
+
+#### Arguments
+
+- **expression**: The string expression to concatenate. Can be a column or any 
valid string expression.
+- **delimiter**: A literal string used as a separator between the concatenated 
values.
+
+#### Example
+
+```sql
+> SELECT string_agg(name, ', ') AS names_list
+  FROM employee;
++--------------------------+
+| names_list               |
++--------------------------+
+| Alice, Bob, Charlie      |
++--------------------------+
+```
+
+### `sum`
+
+Returns the sum of all values in the specified column.
+
+```
+sum(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT sum(column_name) FROM table_name;
++-----------------------+
+| sum(column_name)       |
++-----------------------+
+| 12345                 |
++-----------------------+
+```
+
 ### `var`
 
 Returns the statistical sample variance of a set of numbers.
@@ -126,3 +461,280 @@ _Alias of [var](#var)._
 ### `var_sample`
 
 _Alias of [var](#var)._
+
+## Statistical Functions
+
+- [corr](#corr)
+- [covar](#covar)
+- [covar_pop](#covar_pop)
+- [covar_samp](#covar_samp)
+- [nth_value](#nth_value)
+- [stddev](#stddev)
+- [stddev_pop](#stddev_pop)
+- [stddev_samp](#stddev_samp)
+
+### `corr`
+
+Returns the coefficient of correlation between two numeric values.
+
+```
+corr(expression1, expression2)
+```
+
+#### Arguments
+
+- **expression1**: First expression to operate on. Can be a constant, column, 
or function, and any combination of arithmetic operators.
+- **expression2**: Second expression to operate on. Can be a constant, column, 
or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT corr(column1, column2) FROM table_name;
++--------------------------------+
+| corr(column1, column2)         |
++--------------------------------+
+| 0.85                           |
++--------------------------------+
+```
+
+### `covar`
+
+_Alias of [covar_samp](#covar_samp)._
+
+### `covar_pop`
+
+Returns the sample covariance of a set of number pairs.
+
+```
+covar_samp(expression1, expression2)
+```
+
+#### Arguments
+
+- **expression1**: First expression to operate on. Can be a constant, column, 
or function, and any combination of arithmetic operators.
+- **expression2**: Second expression to operate on. Can be a constant, column, 
or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT covar_samp(column1, column2) FROM table_name;
++-----------------------------------+
+| covar_samp(column1, column2)      |
++-----------------------------------+
+| 8.25                              |
++-----------------------------------+
+```
+
+### `covar_samp`
+
+Returns the sample covariance of a set of number pairs.
+
+```
+covar_samp(expression1, expression2)
+```
+
+#### Arguments
+
+- **expression1**: First expression to operate on. Can be a constant, column, 
or function, and any combination of arithmetic operators.
+- **expression2**: Second expression to operate on. Can be a constant, column, 
or function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT covar_samp(column1, column2) FROM table_name;
++-----------------------------------+
+| covar_samp(column1, column2)      |
++-----------------------------------+
+| 8.25                              |
++-----------------------------------+
+```
+
+#### Aliases
+
+- covar
+
+### `nth_value`
+
+Returns the nth value in a group of values.
+
+```
+nth_value(expression, n ORDER BY expression)
+```
+
+#### Arguments
+
+- **expression**: The column or expression to retrieve the nth value from. 
expression to operate on. Can be a constant, column, or function, and any 
combination of operators.
+- **n**: The position (nth) of the value to retrieve, based on the ordering.
+
+#### Example
+
+```sql
+> SELECT dept_id, salary, NTH_VALUE(salary, 2) OVER (PARTITION BY dept_id 
ORDER BY salary ASC) AS second_salary_by_dept
+  FROM employee;
++---------+--------+-------------------------+
+| dept_id | salary | second_salary_by_dept   |
++---------+--------+-------------------------+
+| 1       | 30000  | NULL                    |
+| 1       | 40000  | 40000                   |
+| 1       | 50000  | 40000                   |
+| 2       | 35000  | NULL                    |
+| 2       | 45000  | 45000                   |
++---------+--------+-------------------------+
+```
+
+### `stddev`
+
+Returns the standard deviation of a set of numbers.
+
+```
+stddev(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT stddev(column_name) FROM table_name;
++----------------------+
+| stddev(column_name)   |
++----------------------+
+| 12.34                |
++----------------------+
+```
+
+#### Aliases
+
+- stddev_samp
+
+### `stddev_pop`
+
+Returns the standard deviation of a set of numbers.
+
+```
+stddev(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT stddev(column_name) FROM table_name;
++----------------------+
+| stddev(column_name)   |
++----------------------+
+| 12.34                |
++----------------------+
+```
+
+### `stddev_samp`
+
+_Alias of [stddev](#stddev)._
+
+## Approximate Functions
+
+- [approx_distinct](#approx_distinct)
+- [approx_median](#approx_median)
+- [approx_percentile_cont](#approx_percentile_cont)
+- [approx_percentile_cont_with_weight](#approx_percentile_cont_with_weight)
+
+### `approx_distinct`
+
+Returns the approximate number of distinct input values calculated using the 
HyperLogLog algorithm.
+
+```
+approx_distinct(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT approx_distinct(column_name) FROM table_name;
++-----------------------------------+
+| approx_distinct(column_name)      |
++-----------------------------------+
+| 42                                |
++-----------------------------------+
+```
+
+### `approx_median`
+
+Returns the approximate median (50th percentile) of input values. It is an 
alias of `approx_percentile_cont(x, 0.5)`.
+
+```
+approx_median(expression)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+
+#### Example
+
+```sql
+> SELECT approx_median(column_name) FROM table_name;
++-----------------------------------+
+| approx_median(column_name)        |
++-----------------------------------+
+| 23.5                              |
++-----------------------------------+
+```
+
+### `approx_percentile_cont`
+
+Returns the approximate percentile of input values using the t-digest 
algorithm.
+
+```
+approx_percentile_cont(expression, percentile, centroids)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+- **percentile**: Percentile to compute. Must be a float value between 0 and 1 
(inclusive).
+- **centroids**: Number of centroids to use in the t-digest algorithm. 
_Default is 100_. A higher number results in more accurate approximation but 
requires more memory.
+
+#### Example
+
+```sql
+> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
++-------------------------------------------------+
+| approx_percentile_cont(column_name, 0.75, 100)  |
++-------------------------------------------------+
+| 65.0                                            |
++-------------------------------------------------+
+```
+
+### `approx_percentile_cont_with_weight`
+
+Returns the weighted approximate percentile of input values using the t-digest 
algorithm.
+
+```
+approx_percentile_cont_with_weight(expression, weight, percentile)
+```
+
+#### Arguments
+
+- **expression**: Expression to operate on. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+- **weight**: Expression to use as weight. Can be a constant, column, or 
function, and any combination of arithmetic operators.
+- **percentile**: Percentile to compute. Must be a float value between 0 and 1 
(inclusive).
+
+#### Example
+
+```sql
+> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) 
FROM table_name;
++----------------------------------------------------------------------+
+| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
++----------------------------------------------------------------------+
+| 78.5                                                                 |
++----------------------------------------------------------------------+
+```


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: Migrate documentation for Aggregate Functions to code (#12861)

Reply via email to