This is an automated email from the ASF dual-hosted git repository.

timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 024c86b1 docs: Clarify first_value usage in select vs aggregate (#1348)
024c86b1 is described below

commit 024c86b16f4da4f2ec957b00f7ea37d00bdc759a
Author: Adisa Mubarak (AdMub) <[email protected]>
AuthorDate: Thu Feb 12 20:28:03 2026 +0100

    docs: Clarify first_value usage in select vs aggregate (#1348)
    
    * docs: Add warning to first_value about usage in select vs aggregate
    
    Clarifies that aggregate functions like first_value must be used within 
.aggregate() and not .select(). Closes #1300.
    
    * chore: remove temporary reproduction script
    
    * Update all aggregate functions to have an example usage that is correct
    
    ---------
    
    Co-authored-by: Tim Saucer <[email protected]>
---
 python/datafusion/functions.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 7ae59c00..2aed9dd3 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -1779,7 +1779,7 @@ def array_agg(
 
     For example::
 
-        df.select(array_agg(col("a"), order_by="b"))
+        df.aggregate([], array_agg(col("a"), order_by="b"))
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -1941,7 +1941,7 @@ def median(
 
 
 def min(expression: Expr, filter: Expr | None = None) -> Expr:
-    """Returns the minimum value of the argument.
+    """Aggregate function that returns the minimum value of the argument.
 
     If using the builder functions described in ref:`_aggregation` this 
function ignores
     the options ``order_by``, ``null_treatment``, and ``distinct``.
@@ -2282,7 +2282,7 @@ def first_value(
 
     For example::
 
-        df.select(first_value(col("a"), order_by="ts"))
+        df.aggregate([], first_value(col("a"), order_by="ts"))
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -2319,7 +2319,7 @@ def last_value(
 
     For example::
 
-        df.select(last_value(col("a"), order_by="ts"))
+        df.aggregate([], last_value(col("a"), order_by="ts"))
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -2358,7 +2358,7 @@ def nth_value(
 
     For example::
 
-        df.select(nth_value(col("a"), 2, order_by="ts"))
+        df.aggregate([], nth_value(col("a"), 2, order_by="ts"))
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None
@@ -2843,7 +2843,7 @@ def string_agg(
 
     For example::
 
-        df.select(string_agg(col("a"), ",", order_by="b"))
+        df.aggregate([], string_agg(col("a"), ",", order_by="b"))
     """
     order_by_raw = sort_list_to_raw_sort_list(order_by)
     filter_raw = filter.expr if filter is not None else None


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to