This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 14c8552eb61 [SPARK-38765][PYTHON] Implement `inplace` parameter of
`Series.clip`
14c8552eb61 is described below
commit 14c8552eb6169c73755aa2fbd15a0b6ffc93fe2d
Author: Xinrong Meng <[email protected]>
AuthorDate: Fri Apr 8 09:54:01 2022 +0900
[SPARK-38765][PYTHON] Implement `inplace` parameter of `Series.clip`
### What changes were proposed in this pull request?
Implement `inplace` parameter of `Series.clip`.
### Why are the changes needed?
To reach parity with pandas.
### Does this PR introduce _any_ user-facing change?
Yes.
`inplace` parameter of `Series.clip` is supported as below:
```py
>>> psser = ps.Series([0, 2, 4])
>>> psser
0 0
1 2
2 4
dtype: int64
>>> psser.clip(2, 3, inplace=True)
>>> psser
0 2
1 2
2 3
dtype: int64
```
### How was this patch tested?
Unit tests.
Closes #36041 from xinrong-databricks/series.clip.
Authored-by: Xinrong Meng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/pandas/series.py | 43 +++++++++++++++++++++++++-----
python/pyspark/pandas/tests/test_series.py | 9 ++++++-
2 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 5c6fe3b78a3..9856b59947a 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -2219,7 +2219,12 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
else:
return first_series(psdf)
- def clip(self, lower: Union[float, int] = None, upper: Union[float, int] =
None) -> "Series":
+ def clip(
+ self,
+ lower: Union[float, int] = None,
+ upper: Union[float, int] = None,
+ inplace: bool = False,
+ ) -> "Series":
"""
Trim values at input threshold(s).
@@ -2231,6 +2236,8 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
Minimum threshold value. All values below this threshold will be
set to it.
upper : float or int, default None
Maximum threshold value. All values above this threshold will be
set to it.
+ inplace : bool, default False
+ if True, perform operation in-place
Returns
-------
@@ -2239,12 +2246,28 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
Examples
--------
- >>> ps.Series([0, 2, 4]).clip(1, 3)
+ >>> psser = ps.Series([0, 2, 4])
+ >>> psser
+ 0 0
+ 1 2
+ 2 4
+ dtype: int64
+
+ >>> psser.clip(1, 3)
0 1
1 2
2 3
dtype: int64
+ Clip can be performed in-place.
+
+ >>> psser.clip(2, 3, inplace=True)
+ >>> psser
+ 0 2
+ 1 2
+ 2 3
+ dtype: int64
+
Notes
-----
One difference between this implementation and pandas is that running
@@ -2266,10 +2289,18 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
scol = F.when(scol < lower, lower).otherwise(scol)
if upper is not None:
scol = F.when(scol > upper, upper).otherwise(scol)
- return self._with_new_scol(
- scol.alias(self._internal.data_spark_column_names[0]),
- field=self._internal.data_fields[0],
- )
+ if inplace:
+ internal = self._internal.copy(
+
data_spark_columns=[scol.alias(self._internal.data_spark_column_names[0])],
+ data_fields=[self._internal.data_fields[0]],
+ )
+ self._psdf._update_internal_frame(internal,
requires_same_anchor=False)
+ return None
+ else:
+ return self._with_new_scol(
+ scol.alias(self._internal.data_spark_column_names[0]),
+ field=self._internal.data_fields[0],
+ )
else:
return self
diff --git a/python/pyspark/pandas/tests/test_series.py
b/python/pyspark/pandas/tests/test_series.py
index 6cb52267d1d..0fac8ac6515 100644
--- a/python/pyspark/pandas/tests/test_series.py
+++ b/python/pyspark/pandas/tests/test_series.py
@@ -1106,7 +1106,7 @@ class SeriesTest(PandasOnSparkTestCase, SQLTestUtils):
getattr(psser, name)
def test_clip(self):
- pser = pd.Series([0, 2, 4], index=np.random.rand(3))
+ pser = pd.Series([0, 2, 4], index=np.random.rand(3), name="x")
psser = ps.from_pandas(pser)
# Assert list-like values are not accepted for 'lower' and 'upper'
@@ -1124,6 +1124,13 @@ class SeriesTest(PandasOnSparkTestCase, SQLTestUtils):
self.assert_eq(psser.clip(upper=3), pser.clip(upper=3))
# Assert lower and upper
self.assert_eq(psser.clip(1, 3), pser.clip(1, 3))
+ self.assert_eq((psser + 1).clip(1, 3), (pser + 1).clip(1, 3))
+
+ # Assert inplace is True
+ psser = ps.from_pandas(pser)
+ pser.clip(1, 3, inplace=True)
+ psser.clip(1, 3, inplace=True)
+ self.assert_eq(psser, pser)
# Assert behavior on string values
str_psser = ps.Series(["a", "b", "c"])
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]