This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 4c8ee88 [SPARK-33257][PYTHON][SQL] Support Column inputs in PySpark ordering functions (asc*, desc*) 4c8ee88 is described below commit 4c8ee8856cb9714d433456fb0ce44dfebb00d83f Author: zero323 <mszymkiew...@gmail.com> AuthorDate: Tue Nov 3 22:50:59 2020 +0900 [SPARK-33257][PYTHON][SQL] Support Column inputs in PySpark ordering functions (asc*, desc*) ### What changes were proposed in this pull request? This PR adds support for passing `Column`s as input to PySpark sorting functions. ### Why are the changes needed? According to SPARK-26979, PySpark functions should support both Column and str arguments, when possible. ### Does this PR introduce _any_ user-facing change? PySpark users can now provide both `Column` and `str` as an argument for `asc*` and `desc*` functions. ### How was this patch tested? New unit tests. Closes #30227 from zero323/SPARK-33257. Authored-by: zero323 <mszymkiew...@gmail.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- python/pyspark/sql/functions.py | 30 ++++++++++++++++++++----- python/pyspark/sql/functions.pyi | 12 +++++----- python/pyspark/sql/tests/test_functions.py | 35 ++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 12 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 87b999d..86a88a5 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -119,7 +119,10 @@ def asc(col): """ Returns a sort expression based on the ascending order of the given column name. """ - return _invoke_function("asc", col) + return ( + col.asc() if isinstance(col, Column) + else _invoke_function("asc", col) + ) @since(1.3) @@ -127,7 +130,10 @@ def desc(col): """ Returns a sort expression based on the descending order of the given column name. """ - return _invoke_function("desc", col) + return ( + col.desc() if isinstance(col, Column) + else _invoke_function("desc", col) + ) @since(1.3) @@ -457,7 +463,10 @@ def asc_nulls_first(col): Returns a sort expression based on the ascending order of the given column name, and null values return before non-null values. """ - return _invoke_function("asc_nulls_first", col) + return ( + col.asc_nulls_first() if isinstance(col, Column) + else _invoke_function("asc_nulls_first", col) + ) @since(2.4) @@ -466,7 +475,10 @@ def asc_nulls_last(col): Returns a sort expression based on the ascending order of the given column name, and null values appear after non-null values. """ - return _invoke_function("asc_nulls_last", col) + return ( + col.asc_nulls_last() if isinstance(col, Column) + else _invoke_function("asc_nulls_last", col) + ) @since(2.4) @@ -475,7 +487,10 @@ def desc_nulls_first(col): Returns a sort expression based on the descending order of the given column name, and null values appear before non-null values. """ - return _invoke_function("desc_nulls_first", col) + return ( + col.desc_nulls_first() if isinstance(col, Column) + else _invoke_function("desc_nulls_first", col) + ) @since(2.4) @@ -484,7 +499,10 @@ def desc_nulls_last(col): Returns a sort expression based on the descending order of the given column name, and null values appear after non-null values. """ - return _invoke_function("desc_nulls_last", col) + return ( + col.desc_nulls_last() if isinstance(col, Column) + else _invoke_function("desc_nulls_last", col) + ) @since(1.6) diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index e395f57..281c1d7 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -258,9 +258,9 @@ def map_zip_with( ) -> Column: ... def abs(col: ColumnOrName) -> Column: ... def acos(col: ColumnOrName) -> Column: ... -def asc(col: str) -> Column: ... -def asc_nulls_first(col: str) -> Column: ... -def asc_nulls_last(col: str) -> Column: ... +def asc(col: ColumnOrName) -> Column: ... +def asc_nulls_first(col: ColumnOrName) -> Column: ... +def asc_nulls_last(col: ColumnOrName) -> Column: ... def ascii(col: ColumnOrName) -> Column: ... def asin(col: ColumnOrName) -> Column: ... def atan(col: ColumnOrName) -> Column: ... @@ -285,9 +285,9 @@ def count(col: ColumnOrName) -> Column: ... def cume_dist() -> Column: ... def degrees(col: ColumnOrName) -> Column: ... def dense_rank() -> Column: ... -def desc(col: str) -> Column: ... -def desc_nulls_first(col: str) -> Column: ... -def desc_nulls_last(col: str) -> Column: ... +def desc(col: ColumnOrName) -> Column: ... +def desc_nulls_first(col: ColumnOrName) -> Column: ... +def desc_nulls_last(col: ColumnOrName) -> Column: ... def exp(col: ColumnOrName) -> Column: ... def expm1(col: ColumnOrName) -> Column: ... def floor(col: ColumnOrName) -> Column: ... diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index cc77b8d..3254934 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -265,6 +265,41 @@ class FunctionsTests(ReusedSQLTestCase): self.assertRaises(ValueError, lambda: df.stat.approxQuantile(("a", 123), [0.1, 0.9], 0.1)) self.assertRaises(ValueError, lambda: df.stat.approxQuantile(["a", 123], [0.1, 0.9], 0.1)) + def test_sorting_functions_with_column(self): + from pyspark.sql import functions + from pyspark.sql.column import Column + + funs = [ + functions.asc_nulls_first, functions.asc_nulls_last, + functions.desc_nulls_first, functions.desc_nulls_last + ] + exprs = [col("x"), "x"] + + for fun in funs: + for expr in exprs: + res = fun(expr) + self.assertIsInstance(res, Column) + self.assertIn( + f"""'x {fun.__name__.replace("_", " ").upper()}'""", + str(res) + ) + + for expr in exprs: + res = functions.asc(expr) + self.assertIsInstance(res, Column) + self.assertIn( + """'x ASC NULLS FIRST'""", + str(res) + ) + + for expr in exprs: + res = functions.desc(expr) + self.assertIsInstance(res, Column) + self.assertIn( + """'x DESC NULLS LAST'""", + str(res) + ) + def test_sort_with_nulls_order(self): from pyspark.sql import functions --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org