This is an automated email from the ASF dual-hosted git repository.
yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 45956f72d864 [SPARK-47005][PYTHON][DOCS] Refine docstring of
`asc_nulls_first/asc_nulls_last/desc_nulls_first/desc_nulls_last`
45956f72d864 is described below
commit 45956f72d864701cd84635e9cac0a29592c08b1c
Author: yangjie01 <[email protected]>
AuthorDate: Thu Feb 8 14:09:06 2024 +0800
[SPARK-47005][PYTHON][DOCS] Refine docstring of
`asc_nulls_first/asc_nulls_last/desc_nulls_first/desc_nulls_last`
### What changes were proposed in this pull request?
This pr refine docstring of
`asc_nulls_first/asc_nulls_last/desc_nulls_first/desc_nulls_last` and add some
new examples.
### Why are the changes needed?
To improve PySpark documentation
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Pass Github Actions
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #45066 from LuciferYang/sort-funcs.
Authored-by: yangjie01 <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
---
python/pyspark/sql/functions/builtin.py | 148 +++++++++++++++++++++++++++-----
1 file changed, 128 insertions(+), 20 deletions(-)
diff --git a/python/pyspark/sql/functions/builtin.py
b/python/pyspark/sql/functions/builtin.py
index 110006df4317..6320f9b922ee 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -2889,7 +2889,7 @@ def getbit(col: "ColumnOrName", pos: "ColumnOrName") ->
Column:
@_try_remote_functions
def asc_nulls_first(col: "ColumnOrName") -> Column:
"""
- Returns a sort expression based on the ascending order of the given
+ Sort Function: Returns a sort expression based on the ascending order of
the given
column name, and null values return before non-null values.
.. versionadded:: 2.4.0
@@ -2909,10 +2909,11 @@ def asc_nulls_first(col: "ColumnOrName") -> Column:
Examples
--------
- >>> df1 = spark.createDataFrame([(1, "Bob"),
- ... (0, None),
- ... (2, "Alice")], ["age", "name"])
- >>> df1.sort(asc_nulls_first(df1.name)).show()
+ Example 1: Sorting a DataFrame with null values in ascending order
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([(1, "Bob"), (0, None), (2, "Alice")],
["age", "name"])
+ >>> df.sort(sf.asc_nulls_first(df.name)).show()
+---+-----+
|age| name|
+---+-----+
@@ -2921,6 +2922,32 @@ def asc_nulls_first(col: "ColumnOrName") -> Column:
| 1| Bob|
+---+-----+
+ Example 2: Sorting a DataFrame with multiple columns, null values in
ascending order
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame(
+ ... [(1, "Bob", None), (0, None, "Z"), (2, "Alice", "Y")], ["age",
"name", "grade"])
+ >>> df.sort(sf.asc_nulls_first(df.name),
sf.asc_nulls_first(df.grade)).show()
+ +---+-----+-----+
+ |age| name|grade|
+ +---+-----+-----+
+ | 0| NULL| Z|
+ | 2|Alice| Y|
+ | 1| Bob| NULL|
+ +---+-----+-----+
+
+ Example 3: Sorting a DataFrame with null values in ascending order using
column name string
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([(1, "Bob"), (0, None), (2, "Alice")],
["age", "name"])
+ >>> df.sort(sf.asc_nulls_first("name")).show()
+ +---+-----+
+ |age| name|
+ +---+-----+
+ | 0| NULL|
+ | 2|Alice|
+ | 1| Bob|
+ +---+-----+
"""
return (
col.asc_nulls_first()
@@ -2932,7 +2959,7 @@ def asc_nulls_first(col: "ColumnOrName") -> Column:
@_try_remote_functions
def asc_nulls_last(col: "ColumnOrName") -> Column:
"""
- Returns a sort expression based on the ascending order of the given
+ Sort Function: Returns a sort expression based on the ascending order of
the given
column name, and null values appear after non-null values.
.. versionadded:: 2.4.0
@@ -2952,10 +2979,11 @@ def asc_nulls_last(col: "ColumnOrName") -> Column:
Examples
--------
- >>> df1 = spark.createDataFrame([(0, None),
- ... (1, "Bob"),
- ... (2, "Alice")], ["age", "name"])
- >>> df1.sort(asc_nulls_last(df1.name)).show()
+ Example 1: Sorting a DataFrame with null values in ascending order
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([(0, None), (1, "Bob"), (2, "Alice")],
["age", "name"])
+ >>> df.sort(sf.asc_nulls_last(df.name)).show()
+---+-----+
|age| name|
+---+-----+
@@ -2964,6 +2992,32 @@ def asc_nulls_last(col: "ColumnOrName") -> Column:
| 0| NULL|
+---+-----+
+ Example 2: Sorting a DataFrame with multiple columns, null values in
ascending order
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame(
+ ... [(0, None, "Z"), (1, "Bob", None), (2, "Alice", "Y")], ["age",
"name", "grade"])
+ >>> df.sort(sf.asc_nulls_last(df.name), sf.asc_nulls_last(df.grade)).show()
+ +---+-----+-----+
+ |age| name|grade|
+ +---+-----+-----+
+ | 2|Alice| Y|
+ | 1| Bob| NULL|
+ | 0| NULL| Z|
+ +---+-----+-----+
+
+ Example 3: Sorting a DataFrame with null values in ascending order using
column name string
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([(0, None), (1, "Bob"), (2, "Alice")],
["age", "name"])
+ >>> df.sort(sf.asc_nulls_last("name")).show()
+ +---+-----+
+ |age| name|
+ +---+-----+
+ | 2|Alice|
+ | 1| Bob|
+ | 0| NULL|
+ +---+-----+
"""
return (
col.asc_nulls_last() if isinstance(col, Column) else
_invoke_function("asc_nulls_last", col)
@@ -2973,7 +3027,7 @@ def asc_nulls_last(col: "ColumnOrName") -> Column:
@_try_remote_functions
def desc_nulls_first(col: "ColumnOrName") -> Column:
"""
- Returns a sort expression based on the descending order of the given
+ Sort Function: Returns a sort expression based on the descending order of
the given
column name, and null values appear before non-null values.
.. versionadded:: 2.4.0
@@ -2993,10 +3047,11 @@ def desc_nulls_first(col: "ColumnOrName") -> Column:
Examples
--------
- >>> df1 = spark.createDataFrame([(0, None),
- ... (1, "Bob"),
- ... (2, "Alice")], ["age", "name"])
- >>> df1.sort(desc_nulls_first(df1.name)).show()
+ Example 1: Sorting a DataFrame with null values in descending order
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([(1, "Bob"), (0, None), (2, "Alice")],
["age", "name"])
+ >>> df.sort(sf.desc_nulls_first(df.name)).show()
+---+-----+
|age| name|
+---+-----+
@@ -3005,6 +3060,32 @@ def desc_nulls_first(col: "ColumnOrName") -> Column:
| 2|Alice|
+---+-----+
+ Example 2: Sorting a DataFrame with multiple columns, null values in
descending order
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame(
+ ... [(1, "Bob", None), (0, None, "Z"), (2, "Alice", "Y")], ["age",
"name", "grade"])
+ >>> df.sort(sf.desc_nulls_first(df.name),
sf.desc_nulls_first(df.grade)).show()
+ +---+-----+-----+
+ |age| name|grade|
+ +---+-----+-----+
+ | 0| NULL| Z|
+ | 1| Bob| NULL|
+ | 2|Alice| Y|
+ +---+-----+-----+
+
+ Example 3: Sorting a DataFrame with null values in descending order using
column name string
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([(1, "Bob"), (0, None), (2, "Alice")],
["age", "name"])
+ >>> df.sort(sf.desc_nulls_first("name")).show()
+ +---+-----+
+ |age| name|
+ +---+-----+
+ | 0| NULL|
+ | 1| Bob|
+ | 2|Alice|
+ +---+-----+
"""
return (
col.desc_nulls_first()
@@ -3016,7 +3097,7 @@ def desc_nulls_first(col: "ColumnOrName") -> Column:
@_try_remote_functions
def desc_nulls_last(col: "ColumnOrName") -> Column:
"""
- Returns a sort expression based on the descending order of the given
+ Sort Function: Returns a sort expression based on the descending order of
the given
column name, and null values appear after non-null values.
.. versionadded:: 2.4.0
@@ -3036,10 +3117,11 @@ def desc_nulls_last(col: "ColumnOrName") -> Column:
Examples
--------
- >>> df1 = spark.createDataFrame([(0, None),
- ... (1, "Bob"),
- ... (2, "Alice")], ["age", "name"])
- >>> df1.sort(desc_nulls_last(df1.name)).show()
+ Example 1: Sorting a DataFrame with null values in descending order
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([(0, None), (1, "Bob"), (2, "Alice")],
["age", "name"])
+ >>> df.sort(sf.desc_nulls_last(df.name)).show()
+---+-----+
|age| name|
+---+-----+
@@ -3048,6 +3130,32 @@ def desc_nulls_last(col: "ColumnOrName") -> Column:
| 0| NULL|
+---+-----+
+ Example 2: Sorting a DataFrame with multiple columns, null values in
descending order
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame(
+ ... [(0, None, "Z"), (1, "Bob", None), (2, "Alice", "Y")], ["age",
"name", "grade"])
+ >>> df.sort(sf.desc_nulls_last(df.name),
sf.desc_nulls_last(df.grade)).show()
+ +---+-----+-----+
+ |age| name|grade|
+ +---+-----+-----+
+ | 1| Bob| NULL|
+ | 2|Alice| Y|
+ | 0| NULL| Z|
+ +---+-----+-----+
+
+ Example 3: Sorting a DataFrame with null values in descending order using
column name string
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([(0, None), (1, "Bob"), (2, "Alice")],
["age", "name"])
+ >>> df.sort(sf.desc_nulls_last("name")).show()
+ +---+-----+
+ |age| name|
+ +---+-----+
+ | 1| Bob|
+ | 2|Alice|
+ | 0| NULL|
+ +---+-----+
"""
return (
col.desc_nulls_last()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]