This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 563b3cab749f [SPARK-45259][PYTHON][DOCS] Refine docstring of `count`
563b3cab749f is described below
commit 563b3cab749f0104ef399730fe69fa4efd14be84
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Tue Nov 7 09:52:51 2023 -0800
[SPARK-45259][PYTHON][DOCS] Refine docstring of `count`
### What changes were proposed in this pull request?
This PR proposes to improve the docstring of `count`.
### Why are the changes needed?
For end users, and better usability of PySpark.
### Does this PR introduce _any_ user-facing change?
Yes, it fixes the user facing documentation.
### How was this patch tested?
Manually tested.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43685 from HyukjinKwon/SPARK-45259.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/functions.py | 47 +++++++++++++++++++++++++++++++++++------
1 file changed, 40 insertions(+), 7 deletions(-)
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index a32f04164f31..81d120e2ff49 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1162,15 +1162,48 @@ def count(col: "ColumnOrName") -> Column:
Examples
--------
- Count by all columns (start), and by a column that does not count ``None``.
+ Example 1: Count all rows in a DataFrame
+ >>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([(None,), ("a",), ("b",), ("c",)],
schema=["alphabets"])
- >>> df.select(count(expr("*")), count(df.alphabets)).show()
- +--------+----------------+
- |count(1)|count(alphabets)|
- +--------+----------------+
- | 4| 3|
- +--------+----------------+
+ >>> df.select(sf.count(sf.expr("*"))).show()
+ +--------+
+ |count(1)|
+ +--------+
+ | 4|
+ +--------+
+
+ Example 2: Count non-null values in a specific column
+
+ >>> from pyspark.sql import functions as sf
+ >>> df.select(sf.count(df.alphabets)).show()
+ +----------------+
+ |count(alphabets)|
+ +----------------+
+ | 3|
+ +----------------+
+
+ Example 3: Count all rows in a DataFrame with multiple columns
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame(
+ ... [(1, "apple"), (2, "banana"), (3, None)], schema=["id", "fruit"])
+ >>> df.select(sf.count(sf.expr("*"))).show()
+ +--------+
+ |count(1)|
+ +--------+
+ | 3|
+ +--------+
+
+ Example 4: Count non-null values in multiple columns
+
+ >>> from pyspark.sql import functions as sf
+ >>> df.select(sf.count(df.id), sf.count(df.fruit)).show()
+ +---------+------------+
+ |count(id)|count(fruit)|
+ +---------+------------+
+ | 3| 2|
+ +---------+------------+
"""
return _invoke_function_over_columns("count", col)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]