[spark] branch master updated: [SPARK-38704][PYTHON] Support string `inclusive` parameter of `Series.between`

gurwls223 Thu, 31 Mar 2022 21:39:29 -0700

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 1d09e7b  [SPARK-38704][PYTHON] Support string `inclusive` parameter of 
`Series.between`
1d09e7b is described below

commit 1d09e7be3aedd43a0b8beb44f17b7e79b9e9d402
Author: Xinrong Meng <[email protected]>
AuthorDate: Fri Apr 1 13:38:15 2022 +0900

    [SPARK-38704][PYTHON] Support string `inclusive` parameter of 
`Series.between`
    
    ### What changes were proposed in this pull request?
    Support string `inclusive` parameter of `Series.between`
    
    ### Why are the changes needed?
    To reach parity with Pandas.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. String `inclusive` is supported now as below.
    ```py
            >>> s = ps.Series([2, 0, 4, 8, np.nan])
    
            With `inclusive` set to "both" boundary values are included:
    
            >>> s.between(0, 4, inclusive="both")
            0     True
            1     True
            2     True
            3    False
            4    False
            dtype: bool
    
            With `inclusive` set to "neither" boundary values are excluded:
    
            >>> s.between(0, 4, inclusive="neither")
            0     True
            1    False
            2    False
            3    False
            4    False
            dtype: bool
    
            With `inclusive` set to "right" only right boundary value is 
included:
    
            >>> s.between(0, 4, inclusive="right")
            0     True
            1    False
            2     True
            3    False
            4    False
            dtype: bool
    
            With `inclusive` set to "left" only left boundary value is included:
    
            >>> s.between(0, 4, inclusive="left")
            0     True
            1     True
            2    False
            3    False
            4    False
            dtype: bool
    
    ```
    
    ### How was this patch tested?
    Unit tests.
    
    Closes #36015 from xinrong-databricks/series.between.
    
    Authored-by: Xinrong Meng <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/pandas/series.py            | 61 +++++++++++++++++++++++++-----
 python/pyspark/pandas/tests/test_series.py | 24 ++++++++++++
 2 files changed, 76 insertions(+), 9 deletions(-)

diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index a73ea1e..5c195da 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -22,6 +22,7 @@ import datetime
 import re
 import inspect
 import sys
+import warnings
 from collections.abc import Mapping
 from functools import partial, reduce
 from typing import (
@@ -853,7 +854,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         """
         return self.rfloordiv(other), self.rmod(other)
 
-    def between(self, left: Any, right: Any, inclusive: bool = True) -> 
"Series":
+    def between(self, left: Any, right: Any, inclusive: Union[bool, str] = 
"both") -> "Series":
         """
         Return boolean Series equivalent to left <= series <= right.
         This function returns a boolean vector containing `True` wherever the
@@ -866,8 +867,9 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
             Left boundary.
         right : scalar or list-like
             Right boundary.
-        inclusive : bool, default True
-            Include boundaries.
+        inclusive : {"both", "neither", "left", "right"} or boolean. "both" by 
default.
+            Include boundaries. Whether to set each bound as closed or open.
+            Booleans are deprecated in favour of `both` or `neither`.
 
         Returns
         -------
@@ -890,19 +892,39 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
 
         Boundary values are included by default:
 
-        >>> s.between(1, 4)
+        >>> s.between(0, 4)
         0     True
-        1    False
+        1     True
         2     True
         3    False
         4    False
         dtype: bool
 
-        With `inclusive` set to ``False`` boundary values are excluded:
+        With `inclusive` set to "neither" boundary values are excluded:
+
+        >>> s.between(0, 4, inclusive="neither")
+        0     True
+        1    False
+        2    False
+        3    False
+        4    False
+        dtype: bool
+
+        With `inclusive` set to "right" only right boundary value is included:
 
-        >>> s.between(1, 4, inclusive=False)
+        >>> s.between(0, 4, inclusive="right")
         0     True
         1    False
+        2     True
+        3    False
+        4    False
+        dtype: bool
+
+        With `inclusive` set to "left" only left boundary value is included:
+
+        >>> s.between(0, 4, inclusive="left")
+        0     True
+        1     True
         2    False
         3    False
         4    False
@@ -918,12 +940,33 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         3    False
         dtype: bool
         """
-        if inclusive:
+        if inclusive is True or inclusive is False:
+            warnings.warn(
+                "Boolean inputs to the `inclusive` argument are deprecated in "
+                "favour of `both` or `neither`.",
+                FutureWarning,
+            )
+            if inclusive:
+                inclusive = "both"
+            else:
+                inclusive = "neither"
+
+        if inclusive == "both":
             lmask = self >= left
             rmask = self <= right
-        else:
+        elif inclusive == "left":
+            lmask = self >= left
+            rmask = self < right
+        elif inclusive == "right":
+            lmask = self > left
+            rmask = self <= right
+        elif inclusive == "neither":
             lmask = self > left
             rmask = self < right
+        else:
+            raise ValueError(
+                "Inclusive has to be either string of 'both'," "'left', 
'right', or 'neither'."
+            )
 
         return lmask & rmask
 
diff --git a/python/pyspark/pandas/tests/test_series.py 
b/python/pyspark/pandas/tests/test_series.py
index 3ed8866..dafb519 100644
--- a/python/pyspark/pandas/tests/test_series.py
+++ b/python/pyspark/pandas/tests/test_series.py
@@ -2970,6 +2970,30 @@ class SeriesTest(PandasOnSparkTestCase, SQLTestUtils):
         self.assert_eq(pser.rpow(np.nan), psser.rpow(np.nan))
         self.assert_eq(1 ** pser, 1 ** psser)
 
+    def test_between(self):
+        pser = pd.Series([np.nan, 1, 2, 3, 4])
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psser.between(1, 4), pser.between(1, 4))
+        self.assert_eq(psser.between(1, 4, inclusive="both"), pser.between(1, 
4, inclusive="both"))
+        self.assert_eq(
+            psser.between(1, 4, inclusive="neither"), pser.between(1, 4, 
inclusive="neither")
+        )
+        self.assert_eq(psser.between(1, 4, inclusive="left"), pser.between(1, 
4, inclusive="left"))
+        self.assert_eq(
+            psser.between(1, 4, inclusive="right"), pser.between(1, 4, 
inclusive="right")
+        )
+        expected_err_msg = (
+            "Inclusive has to be either string of 'both'," "'left', 'right', 
or 'neither'"
+        )
+        with self.assertRaisesRegex(ValueError, expected_err_msg):
+            psser.between(1, 4, inclusive="middle")
+
+        # Test for backward compatibility
+        self.assert_eq(psser.between(1, 4, inclusive=True), pser.between(1, 4, 
inclusive=True))
+        self.assert_eq(psser.between(1, 4, inclusive=False), pser.between(1, 
4, inclusive=False))
+        with self.assertWarns(FutureWarning):
+            psser.between(1, 4, inclusive=True)
+
     def test_between_time(self):
         idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
         pser = pd.Series([1, 2, 3, 4], index=idx)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-38704][PYTHON] Support string `inclusive` parameter of `Series.between`

Reply via email to