[spark] branch master updated: [SPARK-37673][PYTHON] Implement `ps.timedelta_range` method

gurwls223 Fri, 17 Dec 2021 22:29:59 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 048bc2c  [SPARK-37673][PYTHON] Implement `ps.timedelta_range` method
048bc2c is described below

commit 048bc2cc055208a5590f948b57900f87328404fd
Author: Xinrong Meng <[email protected]>
AuthorDate: Sat Dec 18 15:29:00 2021 +0900

    [SPARK-37673][PYTHON] Implement `ps.timedelta_range` method
    
    ### What changes were proposed in this pull request?
    Implement `ps.timedelta_range` method.
    
    The API is backed by `pd.timedelta_range` internally, following how 
`ps.date_range` is implemented.
    
    ### Why are the changes needed?
    To be consistent with pandas API.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. `ps.timedelta_range` is supported now.
    
    ```py
    >>> ps.timedelta_range(start="1 day", end="3 days")
    TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', 
freq=None)
    >>> ps.timedelta_range(start="1 day", periods=3)
    TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', 
freq=None)
    >>> ps.timedelta_range(start='1 day', end='2 days', freq='6H')
    TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
                    '1 days 18:00:00', '2 days 00:00:00'],
                   dtype='timedelta64[ns]', freq=None)
    ```
    
    ### How was this patch tested?
    Unit tests.
    
    Closes #34932 from xinrong-databricks/timedelta_range.
    
    Lead-authored-by: Xinrong Meng <[email protected]>
    Co-authored-by: xinrong-databricks 
<[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 .../reference/pyspark.pandas/general_functions.rst |  3 +-
 python/pyspark/pandas/namespace.py                 | 87 +++++++++++++++++++++-
 python/pyspark/pandas/tests/test_namespace.py      | 30 ++++++++
 3 files changed, 118 insertions(+), 2 deletions(-)

diff --git a/python/docs/source/reference/pyspark.pandas/general_functions.rst 
b/python/docs/source/reference/pyspark.pandas/general_functions.rst
index 21cea6e..01af6d5 100644
--- a/python/docs/source/reference/pyspark.pandas/general_functions.rst
+++ b/python/docs/source/reference/pyspark.pandas/general_functions.rst
@@ -64,4 +64,5 @@ Top-level dealing with datetimelike
    :toctree: api/
 
    to_datetime
-   date_range
\ No newline at end of file
+   date_range
+   timedelta_range
\ No newline at end of file
diff --git a/python/pyspark/pandas/namespace.py 
b/python/pyspark/pandas/namespace.py
index 9e7be82..c0a8367 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -88,7 +88,7 @@ from pyspark.pandas.internal import (
 from pyspark.pandas.series import Series, first_series
 from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.spark.utils import as_nullable_spark_type, 
force_decimal_precision_scale
-from pyspark.pandas.indexes import Index, DatetimeIndex
+from pyspark.pandas.indexes import Index, DatetimeIndex, TimedeltaIndex
 from pyspark.pandas.indexes.multi import MultiIndex
 
 
@@ -105,6 +105,7 @@ __all__ = [
     "read_html",
     "to_datetime",
     "date_range",
+    "timedelta_range",
     "get_dummies",
     "concat",
     "melt",
@@ -1886,6 +1887,90 @@ def date_range(
     )
 
 
+def timedelta_range(
+    start: Union[str, Any] = None,
+    end: Union[str, Any] = None,
+    periods: Optional[int] = None,
+    freq: Optional[Union[str, DateOffset]] = None,
+    name: Optional[str] = None,
+    closed: Optional[str] = None,
+) -> TimedeltaIndex:
+    """
+    Return a fixed frequency TimedeltaIndex, with day as the default frequency.
+
+    Parameters
+    ----------
+    start : str or timedelta-like, optional
+        Left bound for generating timedeltas.
+    end : str or timedelta-like, optional
+        Right bound for generating timedeltas.
+    periods : int, optional
+        Number of periods to generate.
+    freq : str or DateOffset, default 'D'
+        Frequency strings can have multiples, e.g. '5H'.
+    name : str, default None
+        Name of the resulting TimedeltaIndex.
+    closed : {None, 'left', 'right'}, optional
+        Make the interval closed with respect to the given frequency to
+        the 'left', 'right', or both sides (None, the default).
+
+    Returns
+    -------
+    TimedeltaIndex
+
+    Notes
+    -----
+    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
+    exactly three must be specified. If ``freq`` is omitted, the resulting
+    ``TimedeltaIndex`` will have ``periods`` linearly spaced elements between
+    ``start`` and ``end`` (closed on both sides).
+
+    To learn more about the frequency strings, please see `this link
+    
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
+
+    Examples
+    --------
+    >>> ps.timedelta_range(start='1 day', periods=4)  # doctest: 
+NORMALIZE_WHITESPACE
+    TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], 
dtype='timedelta64[ns]', freq=None)
+
+    The closed parameter specifies which endpoint is included.
+    The default behavior is to include both endpoints.
+
+    >>> ps.timedelta_range(start='1 day', periods=4, closed='right')  # 
doctest: +NORMALIZE_WHITESPACE
+    TimedeltaIndex(['2 days', '3 days', '4 days'], dtype='timedelta64[ns]', 
freq=None)
+
+    The freq parameter specifies the frequency of the TimedeltaIndex.
+    Only fixed frequencies can be passed, non-fixed frequencies such as ‘M’ 
(month end) will raise.
+
+    >>> ps.timedelta_range(start='1 day', end='2 days', freq='6H')  # doctest: 
+NORMALIZE_WHITESPACE
+    TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
+                    '1 days 18:00:00', '2 days 00:00:00'],
+                   dtype='timedelta64[ns]', freq=None)
+
+    Specify start, end, and periods; the frequency is generated automatically 
(linearly spaced).
+
+    >>> ps.timedelta_range(start='1 day', end='5 days', periods=4)  # doctest: 
+NORMALIZE_WHITESPACE
+    TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00',
+                    '5 days 00:00:00'],
+                   dtype='timedelta64[ns]', freq=None)
+    """
+    assert freq not in ["N", "ns"], "nanoseconds is not supported"
+
+    return cast(
+        TimedeltaIndex,
+        ps.from_pandas(
+            pd.timedelta_range(
+                start=start,
+                end=end,
+                periods=periods,
+                freq=freq,
+                name=name,
+                closed=closed,
+            )
+        ),
+    )
+
+
 def get_dummies(
     data: Union[DataFrame, Series],
     prefix: Optional[Union[str, List[str], Dict[str, str]]] = None,
diff --git a/python/pyspark/pandas/tests/test_namespace.py 
b/python/pyspark/pandas/tests/test_namespace.py
index fdd2953..47957d6 100644
--- a/python/pyspark/pandas/tests/test_namespace.py
+++ b/python/pyspark/pandas/tests/test_namespace.py
@@ -235,6 +235,36 @@ class NamespaceTest(PandasOnSparkTestCase, SQLTestUtils):
             AssertionError, lambda: ps.date_range(start="1/1/2018", periods=5, 
freq="N")
         )
 
+    def test_timedelta_range(self):
+        self.assert_eq(
+            ps.timedelta_range(start="1 day", end="3 days"),
+            pd.timedelta_range(start="1 day", end="3 days"),
+        )
+        self.assert_eq(
+            ps.timedelta_range(start="1 day", periods=3),
+            pd.timedelta_range(start="1 day", periods=3),
+        )
+        self.assert_eq(
+            ps.timedelta_range(end="3 days", periods=3),
+            pd.timedelta_range(end="3 days", periods=3),
+        )
+        self.assert_eq(
+            ps.timedelta_range(end="3 days", periods=3, closed="right"),
+            pd.timedelta_range(end="3 days", periods=3, closed="right"),
+        )
+        self.assert_eq(
+            ps.timedelta_range(start="1 day", end="3 days", freq="6H"),
+            pd.timedelta_range(start="1 day", end="3 days", freq="6H"),
+        )
+        self.assert_eq(
+            ps.timedelta_range(start="1 day", end="3 days", periods=4),
+            pd.timedelta_range(start="1 day", end="3 days", periods=4),
+        )
+
+        self.assertRaises(
+            AssertionError, lambda: ps.timedelta_range(start="1 day", 
periods=3, freq="ns")
+        )
+
     def test_concat_index_axis(self):
         pdf = pd.DataFrame({"A": [0, 2, 4], "B": [1, 3, 5], "C": [6, 7, 8]})
         # TODO: pdf.columns.names = ["ABC"]

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-37673][PYTHON] Implement `ps.timedelta_range` method

Reply via email to