HyukjinKwon commented on a change in pull request #33625:
URL: https://github.com/apache/spark/pull/33625#discussion_r683907441
##########
File path: python/pyspark/pandas/frame.py
##########
@@ -3459,6 +3458,111 @@ def mask(
cond_inversed = cond._apply_series_op(lambda psser: ~psser)
return self.where(cond_inversed, other)
+ # TODO: Support axis as 1 or 'columns'
+ def mode(
+ self, axis: Union[int, str] = 0, numeric_only: bool = False, dropna:
bool = True
+ ) -> "DataFrame":
+ """
+ Get the mode(s) of each element along the selected axis.
+
+ The mode of a set of values is the value that appears most often.
+ It can be multiple values.
+
+ .. note:: the current implementation of mode requires joins
+ multiple times(columns count - 1 times when axis is 0 or 'index'),
+ which is potentially expensive.
+
+ .. note:: the order of multiple modes (within a column when axis is 0
or 'index')
+ is not determined.
Review comment:
Maybe we can move them to:
```
Notes
------
...
```
below. (https://numpydoc.readthedocs.io/en/latest/format.html#notes)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]