This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 4c8ee88 [SPARK-33257][PYTHON][SQL] Support Column inputs in PySpark
ordering functions (asc*, desc*)
4c8ee88 is described below
commit 4c8ee8856cb9714d433456fb0ce44dfebb00d83f
Author: zero323 <[email protected]>
AuthorDate: Tue Nov 3 22:50:59 2020 +0900
[SPARK-33257][PYTHON][SQL] Support Column inputs in PySpark ordering
functions (asc*, desc*)
### What changes were proposed in this pull request?
This PR adds support for passing `Column`s as input to PySpark sorting
functions.
### Why are the changes needed?
According to SPARK-26979, PySpark functions should support both Column and
str arguments, when possible.
### Does this PR introduce _any_ user-facing change?
PySpark users can now provide both `Column` and `str` as an argument for
`asc*` and `desc*` functions.
### How was this patch tested?
New unit tests.
Closes #30227 from zero323/SPARK-33257.
Authored-by: zero323 <[email protected]>
Signed-off-by: HyukjinKwon <[email protected]>
---
python/pyspark/sql/functions.py | 30 ++++++++++++++++++++-----
python/pyspark/sql/functions.pyi | 12 +++++-----
python/pyspark/sql/tests/test_functions.py | 35 ++++++++++++++++++++++++++++++
3 files changed, 65 insertions(+), 12 deletions(-)
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 87b999d..86a88a5 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -119,7 +119,10 @@ def asc(col):
"""
Returns a sort expression based on the ascending order of the given column
name.
"""
- return _invoke_function("asc", col)
+ return (
+ col.asc() if isinstance(col, Column)
+ else _invoke_function("asc", col)
+ )
@since(1.3)
@@ -127,7 +130,10 @@ def desc(col):
"""
Returns a sort expression based on the descending order of the given
column name.
"""
- return _invoke_function("desc", col)
+ return (
+ col.desc() if isinstance(col, Column)
+ else _invoke_function("desc", col)
+ )
@since(1.3)
@@ -457,7 +463,10 @@ def asc_nulls_first(col):
Returns a sort expression based on the ascending order of the given
column name, and null values return before non-null values.
"""
- return _invoke_function("asc_nulls_first", col)
+ return (
+ col.asc_nulls_first() if isinstance(col, Column)
+ else _invoke_function("asc_nulls_first", col)
+ )
@since(2.4)
@@ -466,7 +475,10 @@ def asc_nulls_last(col):
Returns a sort expression based on the ascending order of the given
column name, and null values appear after non-null values.
"""
- return _invoke_function("asc_nulls_last", col)
+ return (
+ col.asc_nulls_last() if isinstance(col, Column)
+ else _invoke_function("asc_nulls_last", col)
+ )
@since(2.4)
@@ -475,7 +487,10 @@ def desc_nulls_first(col):
Returns a sort expression based on the descending order of the given
column name, and null values appear before non-null values.
"""
- return _invoke_function("desc_nulls_first", col)
+ return (
+ col.desc_nulls_first() if isinstance(col, Column)
+ else _invoke_function("desc_nulls_first", col)
+ )
@since(2.4)
@@ -484,7 +499,10 @@ def desc_nulls_last(col):
Returns a sort expression based on the descending order of the given
column name, and null values appear after non-null values.
"""
- return _invoke_function("desc_nulls_last", col)
+ return (
+ col.desc_nulls_last() if isinstance(col, Column)
+ else _invoke_function("desc_nulls_last", col)
+ )
@since(1.6)
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index e395f57..281c1d7 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -258,9 +258,9 @@ def map_zip_with(
) -> Column: ...
def abs(col: ColumnOrName) -> Column: ...
def acos(col: ColumnOrName) -> Column: ...
-def asc(col: str) -> Column: ...
-def asc_nulls_first(col: str) -> Column: ...
-def asc_nulls_last(col: str) -> Column: ...
+def asc(col: ColumnOrName) -> Column: ...
+def asc_nulls_first(col: ColumnOrName) -> Column: ...
+def asc_nulls_last(col: ColumnOrName) -> Column: ...
def ascii(col: ColumnOrName) -> Column: ...
def asin(col: ColumnOrName) -> Column: ...
def atan(col: ColumnOrName) -> Column: ...
@@ -285,9 +285,9 @@ def count(col: ColumnOrName) -> Column: ...
def cume_dist() -> Column: ...
def degrees(col: ColumnOrName) -> Column: ...
def dense_rank() -> Column: ...
-def desc(col: str) -> Column: ...
-def desc_nulls_first(col: str) -> Column: ...
-def desc_nulls_last(col: str) -> Column: ...
+def desc(col: ColumnOrName) -> Column: ...
+def desc_nulls_first(col: ColumnOrName) -> Column: ...
+def desc_nulls_last(col: ColumnOrName) -> Column: ...
def exp(col: ColumnOrName) -> Column: ...
def expm1(col: ColumnOrName) -> Column: ...
def floor(col: ColumnOrName) -> Column: ...
diff --git a/python/pyspark/sql/tests/test_functions.py
b/python/pyspark/sql/tests/test_functions.py
index cc77b8d..3254934 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -265,6 +265,41 @@ class FunctionsTests(ReusedSQLTestCase):
self.assertRaises(ValueError, lambda: df.stat.approxQuantile(("a",
123), [0.1, 0.9], 0.1))
self.assertRaises(ValueError, lambda: df.stat.approxQuantile(["a",
123], [0.1, 0.9], 0.1))
+ def test_sorting_functions_with_column(self):
+ from pyspark.sql import functions
+ from pyspark.sql.column import Column
+
+ funs = [
+ functions.asc_nulls_first, functions.asc_nulls_last,
+ functions.desc_nulls_first, functions.desc_nulls_last
+ ]
+ exprs = [col("x"), "x"]
+
+ for fun in funs:
+ for expr in exprs:
+ res = fun(expr)
+ self.assertIsInstance(res, Column)
+ self.assertIn(
+ f"""'x {fun.__name__.replace("_", " ").upper()}'""",
+ str(res)
+ )
+
+ for expr in exprs:
+ res = functions.asc(expr)
+ self.assertIsInstance(res, Column)
+ self.assertIn(
+ """'x ASC NULLS FIRST'""",
+ str(res)
+ )
+
+ for expr in exprs:
+ res = functions.desc(expr)
+ self.assertIsInstance(res, Column)
+ self.assertIn(
+ """'x DESC NULLS LAST'""",
+ str(res)
+ )
+
def test_sort_with_nulls_order(self):
from pyspark.sql import functions
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]