This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new e549c6fd22a [SPARK-38890][PYTHON] Implement `ignore_index` of
`DataFrame.sort_index`
e549c6fd22a is described below
commit e549c6fd22ac0d5a6df0d817212637c532b9a681
Author: Xinrong Meng <[email protected]>
AuthorDate: Thu Apr 14 09:34:13 2022 +0900
[SPARK-38890][PYTHON] Implement `ignore_index` of `DataFrame.sort_index`
### What changes were proposed in this pull request?
Implement `ignore_index` of `DataFrame.sort_index`.
### Why are the changes needed?
To reach parity with pandas API.
### Does this PR introduce _any_ user-facing change?
Yes. `ignore_index` of `DataFrame.sort_index` is supported as below:
```py
>>> df = ps.DataFrame({'A': [2, 1, np.nan]}, index=['b', 'a', np.nan])
>>> df
A
b 2.0
a 1.0
NaN NaN
>>> df.sort_index(ignore_index=True)
A
0 1.0
1 2.0
2 NaN
```
### How was this patch tested?
Unit tests.
Closes #36184 from xinrong-databricks/frame.sort_index.ignore_index.
Authored-by: Xinrong Meng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/pandas/frame.py | 22 +++++++++++++++++++++-
python/pyspark/pandas/tests/test_dataframe.py | 12 ++++++++++++
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 36e992fef93..a78aaa66f08 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -7014,6 +7014,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
inplace: bool = False,
kind: str = None,
na_position: str = "last",
+ ignore_index: bool = False,
) -> Optional["DataFrame"]:
"""
Sort object by labels (along an axis)
@@ -7033,6 +7034,10 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
na_position : {‘first’, ‘last’}, default ‘last’
first puts NaNs at the beginning, last puts NaNs at the end. Not
implemented for
MultiIndex.
+ ignore_index : bool, default False
+ If True, the resulting axis will be labeled 0, 1, …, n - 1.
+
+ .. versionadded:: 3.4.0
Returns
-------
@@ -7060,6 +7065,12 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
a 1.0
b 2.0
+ >>> df.sort_index(ignore_index=True)
+ A
+ 0 1.0
+ 1 2.0
+ 2 NaN
+
>>> df.sort_index(inplace=True)
>>> df
A
@@ -7091,6 +7102,13 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
b 0 1 2
a 1 2 1
b 1 0 3
+
+ >>> df.sort_index(ignore_index=True)
+ A B
+ 0 3 0
+ 1 2 1
+ 2 1 2
+ 3 0 3
"""
inplace = validate_bool_kwarg(inplace, "inplace")
axis = validate_axis(axis)
@@ -7112,10 +7130,12 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
psdf = self._sort(by=by, ascending=ascending, na_position=na_position)
if inplace:
+ if ignore_index:
+ psdf.reset_index(drop=True, inplace=inplace)
self._update_internal_frame(psdf._internal)
return None
else:
- return psdf
+ return psdf.reset_index(drop=True) if ignore_index else psdf
def swaplevel(
self, i: Union[int, Name] = -2, j: Union[int, Name] = -1, axis: Axis = 0
diff --git a/python/pyspark/pandas/tests/test_dataframe.py
b/python/pyspark/pandas/tests/test_dataframe.py
index fa32b38d3c9..b99a9a2e807 100644
--- a/python/pyspark/pandas/tests/test_dataframe.py
+++ b/python/pyspark/pandas/tests/test_dataframe.py
@@ -1678,6 +1678,8 @@ class DataFrameTest(ComparisonTestBase, SQLTestUtils):
# Assert default behavior without parameters
self.assert_eq(psdf.sort_index(), pdf.sort_index())
+ # Assert ignoring index
+ self.assert_eq(psdf.sort_index(ignore_index=True),
pdf.sort_index(ignore_index=True))
# Assert sorting descending
self.assert_eq(psdf.sort_index(ascending=False),
pdf.sort_index(ascending=False))
# Assert sorting NA indices first
@@ -1694,6 +1696,14 @@ class DataFrameTest(ComparisonTestBase, SQLTestUtils):
self.assertEqual(psdf.sort_index(inplace=True),
pdf.sort_index(inplace=True))
self.assert_eq(psdf, pdf)
self.assert_eq(psserA, pserA)
+ pserA = pdf.A
+ psserA = psdf.A
+ self.assertEqual(
+ psdf.sort_index(inplace=True, ascending=False, ignore_index=True),
+ pdf.sort_index(inplace=True, ascending=False, ignore_index=True),
+ )
+ self.assert_eq(psdf, pdf)
+ self.assert_eq(psserA, pserA)
# Assert multi-indices
pdf = pd.DataFrame(
@@ -1703,6 +1713,8 @@ class DataFrameTest(ComparisonTestBase, SQLTestUtils):
self.assert_eq(psdf.sort_index(), pdf.sort_index())
self.assert_eq(psdf.sort_index(level=[1, 0]), pdf.sort_index(level=[1,
0]))
self.assert_eq(psdf.reset_index().sort_index(),
pdf.reset_index().sort_index())
+ # Assert ignoring index
+ self.assert_eq(psdf.sort_index(ignore_index=True),
pdf.sort_index(ignore_index=True))
# Assert with multi-index columns
columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B")])
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]