This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 3ca57ae7a9b [SPARK-45038][PYTHON][DOCS] Refine docstring of `max` 3ca57ae7a9b is described below commit 3ca57ae7a9bc2053807e0d0f04c59104037137e4 Author: allisonwang-db <allison.w...@databricks.com> AuthorDate: Mon Sep 4 09:43:17 2023 +0900 [SPARK-45038][PYTHON][DOCS] Refine docstring of `max` ### What changes were proposed in this pull request? This PR refines the docstring for function `max` by adding more examples. ### Why are the changes needed? To improve PySpark documentations. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? doctest ### Was this patch authored or co-authored using generative AI tooling? No Closes #42758 from allisonwang-db/spark-45038-refine-max. Authored-by: allisonwang-db <allison.w...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/functions.py | 78 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index fb02cb0cc98..47d928fe59a 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1217,22 +1217,94 @@ def max(col: "ColumnOrName") -> Column: Parameters ---------- col : :class:`~pyspark.sql.Column` or str - target column to compute on. + The target column on which the maximum value is computed. Returns ------- :class:`~pyspark.sql.Column` - column for computed results. + A column that contains the maximum value computed. + + See Also + -------- + :meth:`pyspark.sql.functions.min` + :meth:`pyspark.sql.functions.avg` + :meth:`pyspark.sql.functions.sum` + + Notes + ----- + - Null values are ignored during the computation. + - NaN values are larger than any other numeric value. Examples -------- + Example 1: Compute the maximum value of a numeric column + + >>> import pyspark.sql.functions as sf >>> df = spark.range(10) - >>> df.select(max(col("id"))).show() + >>> df.select(sf.max(df.id)).show() +-------+ |max(id)| +-------+ | 9| +-------+ + + Example 2: Compute the maximum value of a string column + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([("A",), ("B",), ("C",)], ["value"]) + >>> df.select(sf.max(df.value)).show() + +----------+ + |max(value)| + +----------+ + | C| + +----------+ + + Example 3: Compute the maximum value of a column in a grouped DataFrame + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([("A", 1), ("A", 2), ("B", 3), ("B", 4)], ["key", "value"]) + >>> df.groupBy("key").agg(sf.max(df.value)).show() + +---+----------+ + |key|max(value)| + +---+----------+ + | A| 2| + | B| 4| + +---+----------+ + + Example 4: Compute the maximum value of multiple columns in a grouped DataFrame + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame( + ... [("A", 1, 2), ("A", 2, 3), ("B", 3, 4), ("B", 4, 5)], ["key", "value1", "value2"]) + >>> df.groupBy("key").agg(sf.max("value1"), sf.max("value2")).show() + +---+-----------+-----------+ + |key|max(value1)|max(value2)| + +---+-----------+-----------+ + | A| 2| 3| + | B| 4| 5| + +---+-----------+-----------+ + + Example 5: Compute the maximum value of a column with null values + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([(1,), (2,), (None,)], ["value"]) + >>> df.select(sf.max(df.value)).show() + +----------+ + |max(value)| + +----------+ + | 2| + +----------+ + + Example 6: Compute the maximum value of a column with "NaN" values + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([(1.1,), (float("nan"),), (3.3,)], ["value"]) + >>> df.select(sf.max(df.value)).show() + +----------+ + |max(value)| + +----------+ + | NaN| + +----------+ """ return _invoke_function_over_columns("max", col) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org