This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new e683932 [SPARK-38487][PYTHON][DOC] Fix docstrings of nlargest/nsmallest of DataFrame e683932 is described below commit e683932495fae444b2c17a755d9a660a6c2d63ef Author: Xinrong Meng <xinrong.m...@databricks.com> AuthorDate: Thu Mar 10 15:32:48 2022 +0900 [SPARK-38487][PYTHON][DOC] Fix docstrings of nlargest/nsmallest of DataFrame Fix docstrings of nlargest/nsmallest of DataFrame To make docstring less confusing. No. Manual test. Closes #35793 from xinrong-databricks/frame.ntop. Authored-by: Xinrong Meng <xinrong.m...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit c483e2977cbc6ae33d999c9c9d1dbacd9c53d85a) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/pandas/frame.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index e576789..efc677b 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -7198,7 +7198,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})] ) return internal - # TODO: add keep = First + # TODO: add keep = First def nlargest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame": """ Return the first `n` rows ordered by `columns` in descending order. @@ -7255,7 +7255,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})] 6 NaN 12 In the following example, we will use ``nlargest`` to select the three - rows having the largest values in column "population". + rows having the largest values in column "X". >>> df.nlargest(n=3, columns='X') X Y @@ -7263,12 +7263,14 @@ defaultdict(<class 'list'>, {'col..., 'col...})] 4 6.0 10 3 5.0 9 + To order by the largest values in column "Y" and then "X", we can + specify multiple columns like in the next example. + >>> df.nlargest(n=3, columns=['Y', 'X']) X Y 6 NaN 12 5 7.0 11 4 6.0 10 - """ return self.sort_values(by=columns, ascending=False).head(n=n) @@ -7318,7 +7320,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})] 6 NaN 12 In the following example, we will use ``nsmallest`` to select the - three rows having the smallest values in column "a". + three rows having the smallest values in column "X". >>> df.nsmallest(n=3, columns='X') # doctest: +NORMALIZE_WHITESPACE X Y @@ -7326,7 +7328,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})] 1 2.0 7 2 3.0 8 - To order by the largest values in column "a" and then "c", we can + To order by the smallest values in column "Y" and then "X", we can specify multiple columns like in the next example. >>> df.nsmallest(n=3, columns=['Y', 'X']) # doctest: +NORMALIZE_WHITESPACE --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org