This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new bb98d4d70dce [SPARK-46165][PS] Add support for DataFrame.all axis=None
bb98d4d70dce is described below
commit bb98d4d70dce0aad687dc6fe8e6df8fb3a35a2cd
Author: Devin Petersohn <[email protected]>
AuthorDate: Thu Feb 5 09:55:08 2026 +0900
[SPARK-46165][PS] Add support for DataFrame.all axis=None
### What changes were proposed in this pull request?
Add support for axis=None parameter of DataFrame.all
### Why are the changes needed?
To support the missing parameter
### Does this PR introduce _any_ user-facing change?
Yes, a new parameter support
### How was this patch tested?
CI
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #53981 from devin-petersohn/devin/all_axis_none.
Authored-by: Devin Petersohn <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/pandas/frame.py | 16 ++++++++++------
python/pyspark/pandas/tests/computation/test_any_all.py | 10 ++++++++++
2 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 64ee6fb812b3..056b1e8ce284 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -11050,10 +11050,12 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
)
)
- # TODO(SPARK-46165): axis and **kwargs should be implemented.
def all(
- self, axis: Axis = 0, bool_only: Optional[bool] = None, skipna: bool =
True
- ) -> "Series":
+ self,
+ axis: Optional[Axis] = 0,
+ bool_only: Optional[bool] = None,
+ skipna: bool = True,
+ ) -> Union["Series", bool]:
"""
Return whether all elements are True.
@@ -11062,13 +11064,14 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
Parameters
----------
- axis : {0, 'index', 1, or 'columns'}, default 0
+ axis : {0, 'index', 1, 'columns', or None}, default 0
Indicate which axis or axes should be reduced.
* 0 / 'index' : reduce the index, return a Series whose index is
the
original column labels.
* 1 / 'columns' : reduce the columns, return a Series whose index
is the
original index.
+ * None : reduce all dimensions, return a single boolean value.
bool_only : bool, default None
Include only boolean columns. If None, will attempt to use
everything,
@@ -11123,7 +11126,8 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
col2 False
dtype: bool
"""
- axis = validate_axis(axis)
+ if axis is not None:
+ axis = validate_axis(axis)
column_labels = self._internal.column_labels
if bool_only:
column_labels = self._bool_column_labels(column_labels)
@@ -11175,7 +11179,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
)
else:
# axis=None case - return single boolean value
- raise NotImplementedError('axis should be 0, 1, "index", or
"columns" currently.')
+ return self.all(axis=1, bool_only=bool_only, skipna=skipna).all()
def any(
self,
diff --git a/python/pyspark/pandas/tests/computation/test_any_all.py
b/python/pyspark/pandas/tests/computation/test_any_all.py
index 9b35f0b612d5..418d78c0d313 100644
--- a/python/pyspark/pandas/tests/computation/test_any_all.py
+++ b/python/pyspark/pandas/tests/computation/test_any_all.py
@@ -85,6 +85,11 @@ class FrameAnyAllMixin:
psdf.all(axis="columns", bool_only=False), pdf.all(axis="columns",
bool_only=False)
)
+ # Test axis=None
+ self.assert_eq(psdf.all(axis=None), pdf.all(axis=None))
+ self.assert_eq(psdf.all(axis=None, bool_only=True), pdf.all(axis=None,
bool_only=True))
+ self.assert_eq(psdf.all(axis=None, bool_only=False),
pdf.all(axis=None, bool_only=False))
+
columns.names = ["X", "Y"]
pdf.columns = columns
psdf.columns = columns
@@ -98,6 +103,11 @@ class FrameAnyAllMixin:
self.assert_eq(psdf.all(axis=1, bool_only=True), pdf.all(axis=1,
bool_only=True))
self.assert_eq(psdf.all(axis=1, bool_only=False), pdf.all(axis=1,
bool_only=False))
+ # Test axis=None
+ self.assert_eq(psdf.all(axis=None), pdf.all(axis=None))
+ self.assert_eq(psdf.all(axis=None, bool_only=True), pdf.all(axis=None,
bool_only=True))
+ self.assert_eq(psdf.all(axis=None, bool_only=False),
pdf.all(axis=None, bool_only=False))
+
# Test skipna
pdf = pd.DataFrame({"A": [True, True], "B": [1, np.nan], "C": [True,
None]})
pdf.name = "x"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]