This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 048bc2c [SPARK-37673][PYTHON] Implement `ps.timedelta_range` method
048bc2c is described below
commit 048bc2cc055208a5590f948b57900f87328404fd
Author: Xinrong Meng <[email protected]>
AuthorDate: Sat Dec 18 15:29:00 2021 +0900
[SPARK-37673][PYTHON] Implement `ps.timedelta_range` method
### What changes were proposed in this pull request?
Implement `ps.timedelta_range` method.
The API is backed by `pd.timedelta_range` internally, following how
`ps.date_range` is implemented.
### Why are the changes needed?
To be consistent with pandas API.
### Does this PR introduce _any_ user-facing change?
Yes. `ps.timedelta_range` is supported now.
```py
>>> ps.timedelta_range(start="1 day", end="3 days")
TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]',
freq=None)
>>> ps.timedelta_range(start="1 day", periods=3)
TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]',
freq=None)
>>> ps.timedelta_range(start='1 day', end='2 days', freq='6H')
TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
'1 days 18:00:00', '2 days 00:00:00'],
dtype='timedelta64[ns]', freq=None)
```
### How was this patch tested?
Unit tests.
Closes #34932 from xinrong-databricks/timedelta_range.
Lead-authored-by: Xinrong Meng <[email protected]>
Co-authored-by: xinrong-databricks
<[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../reference/pyspark.pandas/general_functions.rst | 3 +-
python/pyspark/pandas/namespace.py | 87 +++++++++++++++++++++-
python/pyspark/pandas/tests/test_namespace.py | 30 ++++++++
3 files changed, 118 insertions(+), 2 deletions(-)
diff --git a/python/docs/source/reference/pyspark.pandas/general_functions.rst
b/python/docs/source/reference/pyspark.pandas/general_functions.rst
index 21cea6e..01af6d5 100644
--- a/python/docs/source/reference/pyspark.pandas/general_functions.rst
+++ b/python/docs/source/reference/pyspark.pandas/general_functions.rst
@@ -64,4 +64,5 @@ Top-level dealing with datetimelike
:toctree: api/
to_datetime
- date_range
\ No newline at end of file
+ date_range
+ timedelta_range
\ No newline at end of file
diff --git a/python/pyspark/pandas/namespace.py
b/python/pyspark/pandas/namespace.py
index 9e7be82..c0a8367 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -88,7 +88,7 @@ from pyspark.pandas.internal import (
from pyspark.pandas.series import Series, first_series
from pyspark.pandas.spark import functions as SF
from pyspark.pandas.spark.utils import as_nullable_spark_type,
force_decimal_precision_scale
-from pyspark.pandas.indexes import Index, DatetimeIndex
+from pyspark.pandas.indexes import Index, DatetimeIndex, TimedeltaIndex
from pyspark.pandas.indexes.multi import MultiIndex
@@ -105,6 +105,7 @@ __all__ = [
"read_html",
"to_datetime",
"date_range",
+ "timedelta_range",
"get_dummies",
"concat",
"melt",
@@ -1886,6 +1887,90 @@ def date_range(
)
+def timedelta_range(
+ start: Union[str, Any] = None,
+ end: Union[str, Any] = None,
+ periods: Optional[int] = None,
+ freq: Optional[Union[str, DateOffset]] = None,
+ name: Optional[str] = None,
+ closed: Optional[str] = None,
+) -> TimedeltaIndex:
+ """
+ Return a fixed frequency TimedeltaIndex, with day as the default frequency.
+
+ Parameters
+ ----------
+ start : str or timedelta-like, optional
+ Left bound for generating timedeltas.
+ end : str or timedelta-like, optional
+ Right bound for generating timedeltas.
+ periods : int, optional
+ Number of periods to generate.
+ freq : str or DateOffset, default 'D'
+ Frequency strings can have multiples, e.g. '5H'.
+ name : str, default None
+ Name of the resulting TimedeltaIndex.
+ closed : {None, 'left', 'right'}, optional
+ Make the interval closed with respect to the given frequency to
+ the 'left', 'right', or both sides (None, the default).
+
+ Returns
+ -------
+ TimedeltaIndex
+
+ Notes
+ -----
+ Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
+ exactly three must be specified. If ``freq`` is omitted, the resulting
+ ``TimedeltaIndex`` will have ``periods`` linearly spaced elements between
+ ``start`` and ``end`` (closed on both sides).
+
+ To learn more about the frequency strings, please see `this link
+
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
+
+ Examples
+ --------
+ >>> ps.timedelta_range(start='1 day', periods=4) # doctest:
+NORMALIZE_WHITESPACE
+ TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq=None)
+
+ The closed parameter specifies which endpoint is included.
+ The default behavior is to include both endpoints.
+
+ >>> ps.timedelta_range(start='1 day', periods=4, closed='right') #
doctest: +NORMALIZE_WHITESPACE
+ TimedeltaIndex(['2 days', '3 days', '4 days'], dtype='timedelta64[ns]',
freq=None)
+
+ The freq parameter specifies the frequency of the TimedeltaIndex.
+ Only fixed frequencies can be passed, non-fixed frequencies such as ‘M’
(month end) will raise.
+
+ >>> ps.timedelta_range(start='1 day', end='2 days', freq='6H') # doctest:
+NORMALIZE_WHITESPACE
+ TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
+ '1 days 18:00:00', '2 days 00:00:00'],
+ dtype='timedelta64[ns]', freq=None)
+
+ Specify start, end, and periods; the frequency is generated automatically
(linearly spaced).
+
+ >>> ps.timedelta_range(start='1 day', end='5 days', periods=4) # doctest:
+NORMALIZE_WHITESPACE
+ TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00',
+ '5 days 00:00:00'],
+ dtype='timedelta64[ns]', freq=None)
+ """
+ assert freq not in ["N", "ns"], "nanoseconds is not supported"
+
+ return cast(
+ TimedeltaIndex,
+ ps.from_pandas(
+ pd.timedelta_range(
+ start=start,
+ end=end,
+ periods=periods,
+ freq=freq,
+ name=name,
+ closed=closed,
+ )
+ ),
+ )
+
+
def get_dummies(
data: Union[DataFrame, Series],
prefix: Optional[Union[str, List[str], Dict[str, str]]] = None,
diff --git a/python/pyspark/pandas/tests/test_namespace.py
b/python/pyspark/pandas/tests/test_namespace.py
index fdd2953..47957d6 100644
--- a/python/pyspark/pandas/tests/test_namespace.py
+++ b/python/pyspark/pandas/tests/test_namespace.py
@@ -235,6 +235,36 @@ class NamespaceTest(PandasOnSparkTestCase, SQLTestUtils):
AssertionError, lambda: ps.date_range(start="1/1/2018", periods=5,
freq="N")
)
+ def test_timedelta_range(self):
+ self.assert_eq(
+ ps.timedelta_range(start="1 day", end="3 days"),
+ pd.timedelta_range(start="1 day", end="3 days"),
+ )
+ self.assert_eq(
+ ps.timedelta_range(start="1 day", periods=3),
+ pd.timedelta_range(start="1 day", periods=3),
+ )
+ self.assert_eq(
+ ps.timedelta_range(end="3 days", periods=3),
+ pd.timedelta_range(end="3 days", periods=3),
+ )
+ self.assert_eq(
+ ps.timedelta_range(end="3 days", periods=3, closed="right"),
+ pd.timedelta_range(end="3 days", periods=3, closed="right"),
+ )
+ self.assert_eq(
+ ps.timedelta_range(start="1 day", end="3 days", freq="6H"),
+ pd.timedelta_range(start="1 day", end="3 days", freq="6H"),
+ )
+ self.assert_eq(
+ ps.timedelta_range(start="1 day", end="3 days", periods=4),
+ pd.timedelta_range(start="1 day", end="3 days", periods=4),
+ )
+
+ self.assertRaises(
+ AssertionError, lambda: ps.timedelta_range(start="1 day",
periods=3, freq="ns")
+ )
+
def test_concat_index_axis(self):
pdf = pd.DataFrame({"A": [0, 2, 4], "B": [1, 3, 5], "C": [6, 7, 8]})
# TODO: pdf.columns.names = ["ABC"]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]