amaliujia commented on code in PR #38858:
URL: https://github.com/apache/spark/pull/38858#discussion_r1036698430
##########
python/pyspark/sql/connect/functions.py:
##########
@@ -16,14 +16,247 @@
#
from pyspark.sql.connect.column import Column, LiteralExpression,
ColumnReference
-from typing import Any
+from typing import Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from pyspark.sql.connect._typing import ColumnOrName
# TODO(SPARK-40538) Add support for the missing PySpark functions.
+def _to_col(col: "ColumnOrName") -> Column:
+ return col if isinstance(col, Column) else column(col)
+
+
def col(x: str) -> Column:
return Column(ColumnReference(x))
+column = col
+
+
def lit(x: Any) -> Column:
return Column(LiteralExpression(x))
+
+
+def asc(col: "ColumnOrName") -> Column:
+ """
+ Returns a sort expression based on the ascending order of the given column
name.
+
+ .. versionadded:: 3.4.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ target column to sort by in the ascending order.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ the column specifying the order.
+
+ Examples
+ --------
+ Sort by the column 'id' in the descending order.
+
+ >>> df = spark.range(5)
+ >>> df = df.sort(desc("id"))
+ >>> df.show()
+ +---+
+ | id|
+ +---+
+ | 4|
+ | 3|
+ | 2|
+ | 1|
+ | 0|
+ +---+
+
+ Sort by the column 'id' in the ascending order.
+
+ >>> df.orderBy(asc("id")).show()
+ +---+
+ | id|
+ +---+
+ | 0|
+ | 1|
+ | 2|
+ | 3|
+ | 4|
+ +---+
+ """
+ return _to_col(col).asc()
+
+
+def asc_nulls_first(col: "ColumnOrName") -> Column:
+ """
+ Returns a sort expression based on the ascending order of the given
+ column name, and null values return before non-null values.
+
+ .. versionadded:: 2.4.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ target column to sort by in the ascending order.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ the column specifying the order.
+
+ Examples
+ --------
+ >>> df1 = spark.createDataFrame([(1, "Bob"),
+ ... (0, None),
+ ... (2, "Alice")], ["age", "name"])
+ >>> df1.sort(asc_nulls_first(df1.name)).show()
+ +---+-----+
+ |age| name|
+ +---+-----+
+ | 0| null|
+ | 2|Alice|
+ | 1| Bob|
+ +---+-----+
+
+ """
+ return _to_col(col).asc_nulls_first()
+
+
+def asc_nulls_last(col: "ColumnOrName") -> Column:
+ """
+ Returns a sort expression based on the ascending order of the given
+ column name, and null values appear after non-null values.
+
+ .. versionadded:: 2.4.0
Review Comment:
nit: wrong `versionadded`. BTW do we need always add `versionadded 3.4.0`
given the entire package is `versionadded`? Maybe only need a top level
`versionadded`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]