This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new b7c00eaa903c [SPARK-55293][PS][TESTS] Avoid using old offset aliases
b7c00eaa903c is described below
commit b7c00eaa903c0b28a538283963558482350c4216
Author: Takuya Ueshin <[email protected]>
AuthorDate: Mon Feb 2 06:56:25 2026 +0900
[SPARK-55293][PS][TESTS] Avoid using old offset aliases
### What changes were proposed in this pull request?
Avoid using old offset aliases.
- `"H"` -> `"h"`
- `"T"` -> `"min"`
- `"S"` -> `"s"`
- `"L"` -> `"ms"`
- `"U"` -> `"us"`
- `"Q"` -> `"QE"`
- `"Y"` -> `"YE"`
https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases
### Why are the changes needed?
apache/spark#54018 fixed the offset alias `M` to `ME`.
There are other aliases that are deprecated in pandas 2.2 / removed in
pandas 3.0.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Fixed the related tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #54077 from ueshin/issues/SPARK-55293/offset_alias.
Authored-by: Takuya Ueshin <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/pandas/frame.py | 2 +-
python/pyspark/pandas/indexes/datetimes.py | 6 +++---
python/pyspark/pandas/series.py | 4 ++--
python/pyspark/pandas/tests/indexes/test_datetime.py | 12 ++++++------
python/pyspark/pandas/tests/indexes/test_indexing_adv.py | 4 ++--
python/pyspark/pandas/tests/indexes/test_indexing_loc.py | 2 +-
python/pyspark/pandas/tests/resample/test_error.py | 6 +++---
python/pyspark/pandas/tests/resample/test_frame.py | 16 ++++++++--------
python/pyspark/pandas/tests/resample/test_series.py | 14 +++++++-------
python/pyspark/pandas/tests/resample/test_timezone.py | 4 ++--
python/pyspark/pandas/tests/series/test_datetime.py | 6 +++---
python/pyspark/pandas/tests/test_namespace.py | 8 ++++----
12 files changed, 42 insertions(+), 42 deletions(-)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 15ffee9fe2dd..64ee6fb812b3 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -3837,7 +3837,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
Examples
--------
- >>> idx = pd.date_range('2018-04-09', periods=4, freq='12H')
+ >>> idx = pd.date_range('2018-04-09', periods=4, freq='12h')
>>> psdf = ps.DataFrame({'A': [1, 2, 3, 4]}, index=idx)
>>> psdf
A
diff --git a/python/pyspark/pandas/indexes/datetimes.py
b/python/pyspark/pandas/indexes/datetimes.py
index ed7862da1009..fce760eeee22 100644
--- a/python/pyspark/pandas/indexes/datetimes.py
+++ b/python/pyspark/pandas/indexes/datetimes.py
@@ -693,7 +693,7 @@ class DatetimeIndex(Index):
Examples
--------
- >>> idx = ps.date_range(start='2014-08-01 10:00', freq='H', periods=3)
# doctest: +SKIP
+ >>> idx = ps.date_range(start='2014-08-01 10:00', freq='h', periods=3)
# doctest: +SKIP
>>> idx.normalize() # doctest: +SKIP
DatetimeIndex(['2014-08-01', '2014-08-01', '2014-08-01'],
dtype='datetime64[ns]', freq=None)
"""
@@ -761,7 +761,7 @@ class DatetimeIndex(Index):
Examples
--------
- >>> psidx = ps.date_range("2000-01-01", periods=3, freq="T")
+ >>> psidx = ps.date_range("2000-01-01", periods=3, freq="min")
>>> psidx
DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 00:01:00',
'2000-01-01 00:02:00'],
@@ -815,7 +815,7 @@ class DatetimeIndex(Index):
Examples
--------
- >>> psidx = ps.date_range("2000-01-01", periods=3, freq="T") #
doctest: +SKIP
+ >>> psidx = ps.date_range("2000-01-01", periods=3, freq="min") #
doctest: +SKIP
>>> psidx # doctest: +SKIP
DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 00:01:00',
'2000-01-01 00:02:00'],
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 7b6c97ea02cd..cc77b4dba5fa 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -6946,7 +6946,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
Examples
--------
- >>> idx = pd.date_range('2018-04-09', periods=4, freq='12H')
+ >>> idx = pd.date_range('2018-04-09', periods=4, freq='12h')
>>> psser = ps.Series([1, 2, 3, 4], index=idx)
>>> psser
2018-04-09 00:00:00 1
@@ -7206,7 +7206,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
--------
Start by creating a series with 9 one minute timestamps.
- >>> index = pd.date_range('1/1/2000', periods=9, freq='T')
+ >>> index = pd.date_range('1/1/2000', periods=9, freq='min')
>>> series = ps.Series(range(9), index=index, name='V')
>>> series
2000-01-01 00:00:00 0
diff --git a/python/pyspark/pandas/tests/indexes/test_datetime.py
b/python/pyspark/pandas/tests/indexes/test_datetime.py
index b909ef81470c..41a4a862fd17 100644
--- a/python/pyspark/pandas/tests/indexes/test_datetime.py
+++ b/python/pyspark/pandas/tests/indexes/test_datetime.py
@@ -26,17 +26,17 @@ class DatetimeIndexTestingFuncMixin:
def fixed_freqs(self):
return [
"D",
- "H",
- "T", # min
- "S",
- "L", # ms
- "U", # us
+ "h",
+ "min",
+ "s",
+ "ms",
+ "us",
# 'N' not supported
]
@property
def non_fixed_freqs(self):
- return ["W", "Q"]
+ return ["W", "QE"]
@property
def pidxs(self):
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
b/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
index 919d9cc9b569..d299fb01dfd7 100644
--- a/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
+++ b/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
@@ -261,7 +261,7 @@ class IndexingAdvMixin:
def test_getitem_timestamp_str(self):
pdf = pd.DataFrame(
{"A": np.random.randn(100), "B": np.random.randn(100)},
- index=pd.date_range("2011-01-01", freq="H", periods=100),
+ index=pd.date_range("2011-01-01", freq="h", periods=100),
)
psdf = ps.from_pandas(pdf)
@@ -286,7 +286,7 @@ class IndexingAdvMixin:
def test_getitem_period_str(self):
pdf = pd.DataFrame(
{"A": np.random.randn(100), "B": np.random.randn(100)},
- index=pd.period_range("2011-01-01", freq="H", periods=100),
+ index=pd.period_range("2011-01-01", freq="h", periods=100),
)
psdf = ps.from_pandas(pdf)
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
b/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
index 7affdcff56c9..916863b61b86 100644
--- a/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
+++ b/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
@@ -207,7 +207,7 @@ class IndexingLocMixin:
def test_loc_timestamp_str(self):
pdf = pd.DataFrame(
{"A": np.random.randn(100), "B": np.random.randn(100)},
- index=pd.date_range("2011-01-01", freq="H", periods=100),
+ index=pd.date_range("2011-01-01", freq="h", periods=100),
)
psdf = ps.from_pandas(pdf)
diff --git a/python/pyspark/pandas/tests/resample/test_error.py
b/python/pyspark/pandas/tests/resample/test_error.py
index 832e200bc124..89dd0723b84e 100644
--- a/python/pyspark/pandas/tests/resample/test_error.py
+++ b/python/pyspark/pandas/tests/resample/test_error.py
@@ -54,19 +54,19 @@ class ResampleErrorMixin:
psdf.A.resample("0D").sum()
with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not
supported"):
- psdf.A.resample("0Y").sum()
+ psdf.A.resample("0YE").sum()
with self.assertRaisesRegex(ValueError, "invalid closed: 'middle'"):
psdf.A.resample("3D", closed="middle").sum()
with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not
supported"):
- psdf.A.resample("3Y", closed="middle").sum()
+ psdf.A.resample("3YE", closed="middle").sum()
with self.assertRaisesRegex(ValueError, "invalid label: 'both'"):
psdf.A.resample("3D", label="both").sum()
with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not
supported"):
- psdf.A.resample("3Y", label="both").sum()
+ psdf.A.resample("3YE", label="both").sum()
with self.assertRaisesRegex(
NotImplementedError, "`on` currently works only for TimestampType"
diff --git a/python/pyspark/pandas/tests/resample/test_frame.py
b/python/pyspark/pandas/tests/resample/test_frame.py
index 482913c85c65..20d5bec53dba 100644
--- a/python/pyspark/pandas/tests/resample/test_frame.py
+++ b/python/pyspark/pandas/tests/resample/test_frame.py
@@ -76,19 +76,19 @@ class ResampleFrameMixin:
@property
def pdf4(self):
np.random.seed(33)
- index = pd.date_range(start="2020-12-12", end="2022-05-01", freq="1H")
+ index = pd.date_range(start="2020-12-12", end="2022-05-01", freq="1h")
return pd.DataFrame(np.random.rand(len(index), 2), index=index,
columns=list("AB"))
@property
def pdf5(self):
np.random.seed(44)
- index = pd.date_range(start="2021-12-30 03:04:05", end="2022-01-02
06:07:08", freq="1T")
+ index = pd.date_range(start="2021-12-30 03:04:05", end="2022-01-02
06:07:08", freq="1min")
return pd.DataFrame(np.random.rand(len(index), 2), index=index,
columns=list("AB"))
@property
def pdf6(self):
np.random.seed(55)
- index = pd.date_range(start="2022-05-02 03:04:05", end="2022-05-02
06:07:08", freq="1S")
+ index = pd.date_range(start="2022-05-02 03:04:05", end="2022-05-02
06:07:08", freq="1s")
return pd.DataFrame(np.random.rand(len(index), 2), index=index,
columns=list("AB"))
@property
@@ -127,14 +127,14 @@ class ResampleFrameMixin:
)
def test_dataframe_resample(self):
- self._test_resample(self.pdf4, self.psdf4, ["11H", "21D"], "left",
None, "mean")
- self._test_resample(self.pdf5, self.psdf5, ["55MIN", "2H", "D"],
"left", "left", "std")
- self._test_resample(self.pdf6, self.psdf6, ["29S", "10MIN", "3H"],
"left", "right", "var")
+ self._test_resample(self.pdf4, self.psdf4, ["11h", "21D"], "left",
None, "mean")
+ self._test_resample(self.pdf5, self.psdf5, ["55min", "2h", "D"],
"left", "left", "std")
+ self._test_resample(self.pdf6, self.psdf6, ["29s", "10min", "3h"],
"left", "right", "var")
with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not
supported"):
- self._test_resample(self.pdf2, self.psdf2, ["3A", "11M", "D"],
None, "left", "max")
+ self._test_resample(self.pdf2, self.psdf2, ["3A", "11ME", "D"],
None, "left", "max")
with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not
supported"):
- self._test_resample(self.pdf1, self.psdf1, ["3Y", "9M", "17D"],
None, None, "min")
+ self._test_resample(self.pdf1, self.psdf1, ["3Y", "9ME", "17D"],
None, None, "min")
class ResampleFrameTests(ResampleFrameMixin, PandasOnSparkTestCase, TestUtils):
diff --git a/python/pyspark/pandas/tests/resample/test_series.py
b/python/pyspark/pandas/tests/resample/test_series.py
index f0f508c23a22..9d6bb017dc8d 100644
--- a/python/pyspark/pandas/tests/resample/test_series.py
+++ b/python/pyspark/pandas/tests/resample/test_series.py
@@ -76,19 +76,19 @@ class ResampleSeriesMixin:
@property
def pdf4(self):
np.random.seed(33)
- index = pd.date_range(start="2020-12-12", end="2022-05-01", freq="1H")
+ index = pd.date_range(start="2020-12-12", end="2022-05-01", freq="1h")
return pd.DataFrame(np.random.rand(len(index), 2), index=index,
columns=list("AB"))
@property
def pdf5(self):
np.random.seed(44)
- index = pd.date_range(start="2021-12-30 03:04:05", end="2022-01-02
06:07:08", freq="1T")
+ index = pd.date_range(start="2021-12-30 03:04:05", end="2022-01-02
06:07:08", freq="1min")
return pd.DataFrame(np.random.rand(len(index), 2), index=index,
columns=list("AB"))
@property
def pdf6(self):
np.random.seed(55)
- index = pd.date_range(start="2022-05-02 03:04:05", end="2022-05-02
06:07:08", freq="1S")
+ index = pd.date_range(start="2022-05-02 03:04:05", end="2022-05-02
06:07:08", freq="1s")
return pd.DataFrame(np.random.rand(len(index), 2), index=index,
columns=list("AB"))
@property
@@ -126,11 +126,11 @@ class ResampleSeriesMixin:
)
def test_series_resample(self):
- self._test_resample(self.pdf2.A, self.psdf2.A, ["13M"], "right",
"left", "max")
- self._test_resample(self.pdf3.A, self.psdf3.A, ["1001H"], "right",
"right", "sum")
+ self._test_resample(self.pdf2.A, self.psdf2.A, ["13ME"], "right",
"left", "max")
+ self._test_resample(self.pdf3.A, self.psdf3.A, ["1001h"], "right",
"right", "sum")
self._test_resample(self.pdf4.A, self.psdf4.A, ["6D"], None, None,
"mean")
- self._test_resample(self.pdf5.A, self.psdf5.A, ["47T"], "left",
"left", "var")
- self._test_resample(self.pdf6.A, self.psdf6.A, ["111S"], "right",
"right", "std")
+ self._test_resample(self.pdf5.A, self.psdf5.A, ["47min"], "left",
"left", "var")
+ self._test_resample(self.pdf6.A, self.psdf6.A, ["111s"], "right",
"right", "std")
with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not
supported"):
self._test_resample(self.pdf1.A, self.psdf1.A, ["4Y"], "right",
None, "min")
diff --git a/python/pyspark/pandas/tests/resample/test_timezone.py
b/python/pyspark/pandas/tests/resample/test_timezone.py
index e783af507c93..7cf3df77de8d 100644
--- a/python/pyspark/pandas/tests/resample/test_timezone.py
+++ b/python/pyspark/pandas/tests/resample/test_timezone.py
@@ -57,8 +57,8 @@ class ResampleTimezoneMixin:
"spark.sql.timestampType": "TIMESTAMP_NTZ",
}
):
- p_resample = self.pdf.resample(rule="1001H", closed="right",
label="right")
- ps_resample = self.psdf.resample(rule="1001H", closed="right",
label="right")
+ p_resample = self.pdf.resample(rule="1001h", closed="right",
label="right")
+ ps_resample = self.psdf.resample(rule="1001h", closed="right",
label="right")
self.assert_eq(
p_resample.sum().sort_index(),
ps_resample.sum().sort_index(),
diff --git a/python/pyspark/pandas/tests/series/test_datetime.py
b/python/pyspark/pandas/tests/series/test_datetime.py
index f3314de8313a..a8374b38e66f 100644
--- a/python/pyspark/pandas/tests/series/test_datetime.py
+++ b/python/pyspark/pandas/tests/series/test_datetime.py
@@ -254,15 +254,15 @@ class SeriesDateTimeTestsMixin:
def test_round(self):
self.check_func(lambda x: x.dt.round(freq="min"))
- self.check_func(lambda x: x.dt.round(freq="H"))
+ self.check_func(lambda x: x.dt.round(freq="h"))
def test_floor(self):
self.check_func(lambda x: x.dt.floor(freq="min"))
- self.check_func(lambda x: x.dt.floor(freq="H"))
+ self.check_func(lambda x: x.dt.floor(freq="h"))
def test_ceil(self):
self.check_func(lambda x: x.dt.ceil(freq="min"))
- self.check_func(lambda x: x.dt.ceil(freq="H"))
+ self.check_func(lambda x: x.dt.ceil(freq="h"))
@unittest.skip("Unsupported locale setting")
def test_month_name(self):
diff --git a/python/pyspark/pandas/tests/test_namespace.py
b/python/pyspark/pandas/tests/test_namespace.py
index 151d61555af8..8a267f76c536 100644
--- a/python/pyspark/pandas/tests/test_namespace.py
+++ b/python/pyspark/pandas/tests/test_namespace.py
@@ -211,8 +211,8 @@ class NamespaceTestsMixin:
)
self.assert_eq(
- ps.date_range(start="1/1/2018", periods=5, freq="3M"),
- pd.date_range(start="1/1/2018", periods=5, freq="3M"),
+ ps.date_range(start="1/1/2018", periods=5, freq="3ME"),
+ pd.date_range(start="1/1/2018", periods=5, freq="3ME"),
)
self.assert_eq(
@@ -299,8 +299,8 @@ class NamespaceTestsMixin:
pd.timedelta_range(end="3 days", periods=3, closed="right"),
)
self.assert_eq(
- ps.timedelta_range(start="1 day", end="3 days", freq="6H"),
- pd.timedelta_range(start="1 day", end="3 days", freq="6H"),
+ ps.timedelta_range(start="1 day", end="3 days", freq="6h"),
+ pd.timedelta_range(start="1 day", end="3 days", freq="6h"),
)
self.assert_eq(
ps.timedelta_range(start="1 day", end="3 days", periods=4),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]