(spark) branch master updated: [SPARK-55293][PS][TESTS] Avoid using old offset aliases

gurwls223 Sun, 01 Feb 2026 13:57:03 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new b7c00eaa903c [SPARK-55293][PS][TESTS] Avoid using old offset aliases
b7c00eaa903c is described below

commit b7c00eaa903c0b28a538283963558482350c4216
Author: Takuya Ueshin <[email protected]>
AuthorDate: Mon Feb 2 06:56:25 2026 +0900

    [SPARK-55293][PS][TESTS] Avoid using old offset aliases
    
    ### What changes were proposed in this pull request?
    
    Avoid using old offset aliases.
    
    - `"H"` -> `"h"`
    - `"T"` -> `"min"`
    - `"S"` -> `"s"`
    - `"L"` -> `"ms"`
    - `"U"` -> `"us"`
    - `"Q"` -> `"QE"`
    - `"Y"` -> `"YE"`
    
    https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases
    
    ### Why are the changes needed?
    
    apache/spark#54018 fixed the offset alias `M` to `ME`.
    
    There are other aliases that are deprecated in pandas 2.2 / removed in 
pandas 3.0.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Fixed the related tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #54077 from ueshin/issues/SPARK-55293/offset_alias.
    
    Authored-by: Takuya Ueshin <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/pandas/frame.py                           |  2 +-
 python/pyspark/pandas/indexes/datetimes.py               |  6 +++---
 python/pyspark/pandas/series.py                          |  4 ++--
 python/pyspark/pandas/tests/indexes/test_datetime.py     | 12 ++++++------
 python/pyspark/pandas/tests/indexes/test_indexing_adv.py |  4 ++--
 python/pyspark/pandas/tests/indexes/test_indexing_loc.py |  2 +-
 python/pyspark/pandas/tests/resample/test_error.py       |  6 +++---
 python/pyspark/pandas/tests/resample/test_frame.py       | 16 ++++++++--------
 python/pyspark/pandas/tests/resample/test_series.py      | 14 +++++++-------
 python/pyspark/pandas/tests/resample/test_timezone.py    |  4 ++--
 python/pyspark/pandas/tests/series/test_datetime.py      |  6 +++---
 python/pyspark/pandas/tests/test_namespace.py            |  8 ++++----
 12 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 15ffee9fe2dd..64ee6fb812b3 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -3837,7 +3837,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
 
         Examples
         --------
-        >>> idx = pd.date_range('2018-04-09', periods=4, freq='12H')
+        >>> idx = pd.date_range('2018-04-09', periods=4, freq='12h')
         >>> psdf = ps.DataFrame({'A': [1, 2, 3, 4]}, index=idx)
         >>> psdf
                              A
diff --git a/python/pyspark/pandas/indexes/datetimes.py 
b/python/pyspark/pandas/indexes/datetimes.py
index ed7862da1009..fce760eeee22 100644
--- a/python/pyspark/pandas/indexes/datetimes.py
+++ b/python/pyspark/pandas/indexes/datetimes.py
@@ -693,7 +693,7 @@ class DatetimeIndex(Index):
 
         Examples
         --------
-        >>> idx = ps.date_range(start='2014-08-01 10:00', freq='H', periods=3) 
 # doctest: +SKIP
+        >>> idx = ps.date_range(start='2014-08-01 10:00', freq='h', periods=3) 
 # doctest: +SKIP
         >>> idx.normalize()  # doctest: +SKIP
         DatetimeIndex(['2014-08-01', '2014-08-01', '2014-08-01'], 
dtype='datetime64[ns]', freq=None)
         """
@@ -761,7 +761,7 @@ class DatetimeIndex(Index):
 
         Examples
         --------
-        >>> psidx = ps.date_range("2000-01-01", periods=3, freq="T")
+        >>> psidx = ps.date_range("2000-01-01", periods=3, freq="min")
         >>> psidx
         DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 00:01:00',
                        '2000-01-01 00:02:00'],
@@ -815,7 +815,7 @@ class DatetimeIndex(Index):
 
         Examples
         --------
-        >>> psidx = ps.date_range("2000-01-01", periods=3, freq="T")  # 
doctest: +SKIP
+        >>> psidx = ps.date_range("2000-01-01", periods=3, freq="min")  # 
doctest: +SKIP
         >>> psidx  # doctest: +SKIP
         DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 00:01:00',
                        '2000-01-01 00:02:00'],
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 7b6c97ea02cd..cc77b4dba5fa 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -6946,7 +6946,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
 
         Examples
         --------
-        >>> idx = pd.date_range('2018-04-09', periods=4, freq='12H')
+        >>> idx = pd.date_range('2018-04-09', periods=4, freq='12h')
         >>> psser = ps.Series([1, 2, 3, 4], index=idx)
         >>> psser
         2018-04-09 00:00:00    1
@@ -7206,7 +7206,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         --------
         Start by creating a series with 9 one minute timestamps.
 
-        >>> index = pd.date_range('1/1/2000', periods=9, freq='T')
+        >>> index = pd.date_range('1/1/2000', periods=9, freq='min')
         >>> series = ps.Series(range(9), index=index, name='V')
         >>> series
         2000-01-01 00:00:00    0
diff --git a/python/pyspark/pandas/tests/indexes/test_datetime.py 
b/python/pyspark/pandas/tests/indexes/test_datetime.py
index b909ef81470c..41a4a862fd17 100644
--- a/python/pyspark/pandas/tests/indexes/test_datetime.py
+++ b/python/pyspark/pandas/tests/indexes/test_datetime.py
@@ -26,17 +26,17 @@ class DatetimeIndexTestingFuncMixin:
     def fixed_freqs(self):
         return [
             "D",
-            "H",
-            "T",  # min
-            "S",
-            "L",  # ms
-            "U",  # us
+            "h",
+            "min",
+            "s",
+            "ms",
+            "us",
             # 'N' not supported
         ]
 
     @property
     def non_fixed_freqs(self):
-        return ["W", "Q"]
+        return ["W", "QE"]
 
     @property
     def pidxs(self):
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing_adv.py 
b/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
index 919d9cc9b569..d299fb01dfd7 100644
--- a/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
+++ b/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
@@ -261,7 +261,7 @@ class IndexingAdvMixin:
     def test_getitem_timestamp_str(self):
         pdf = pd.DataFrame(
             {"A": np.random.randn(100), "B": np.random.randn(100)},
-            index=pd.date_range("2011-01-01", freq="H", periods=100),
+            index=pd.date_range("2011-01-01", freq="h", periods=100),
         )
         psdf = ps.from_pandas(pdf)
 
@@ -286,7 +286,7 @@ class IndexingAdvMixin:
     def test_getitem_period_str(self):
         pdf = pd.DataFrame(
             {"A": np.random.randn(100), "B": np.random.randn(100)},
-            index=pd.period_range("2011-01-01", freq="H", periods=100),
+            index=pd.period_range("2011-01-01", freq="h", periods=100),
         )
         psdf = ps.from_pandas(pdf)
 
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing_loc.py 
b/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
index 7affdcff56c9..916863b61b86 100644
--- a/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
+++ b/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
@@ -207,7 +207,7 @@ class IndexingLocMixin:
     def test_loc_timestamp_str(self):
         pdf = pd.DataFrame(
             {"A": np.random.randn(100), "B": np.random.randn(100)},
-            index=pd.date_range("2011-01-01", freq="H", periods=100),
+            index=pd.date_range("2011-01-01", freq="h", periods=100),
         )
         psdf = ps.from_pandas(pdf)
 
diff --git a/python/pyspark/pandas/tests/resample/test_error.py 
b/python/pyspark/pandas/tests/resample/test_error.py
index 832e200bc124..89dd0723b84e 100644
--- a/python/pyspark/pandas/tests/resample/test_error.py
+++ b/python/pyspark/pandas/tests/resample/test_error.py
@@ -54,19 +54,19 @@ class ResampleErrorMixin:
             psdf.A.resample("0D").sum()
 
         with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not 
supported"):
-            psdf.A.resample("0Y").sum()
+            psdf.A.resample("0YE").sum()
 
         with self.assertRaisesRegex(ValueError, "invalid closed: 'middle'"):
             psdf.A.resample("3D", closed="middle").sum()
 
         with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not 
supported"):
-            psdf.A.resample("3Y", closed="middle").sum()
+            psdf.A.resample("3YE", closed="middle").sum()
 
         with self.assertRaisesRegex(ValueError, "invalid label: 'both'"):
             psdf.A.resample("3D", label="both").sum()
 
         with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not 
supported"):
-            psdf.A.resample("3Y", label="both").sum()
+            psdf.A.resample("3YE", label="both").sum()
 
         with self.assertRaisesRegex(
             NotImplementedError, "`on` currently works only for TimestampType"
diff --git a/python/pyspark/pandas/tests/resample/test_frame.py 
b/python/pyspark/pandas/tests/resample/test_frame.py
index 482913c85c65..20d5bec53dba 100644
--- a/python/pyspark/pandas/tests/resample/test_frame.py
+++ b/python/pyspark/pandas/tests/resample/test_frame.py
@@ -76,19 +76,19 @@ class ResampleFrameMixin:
     @property
     def pdf4(self):
         np.random.seed(33)
-        index = pd.date_range(start="2020-12-12", end="2022-05-01", freq="1H")
+        index = pd.date_range(start="2020-12-12", end="2022-05-01", freq="1h")
         return pd.DataFrame(np.random.rand(len(index), 2), index=index, 
columns=list("AB"))
 
     @property
     def pdf5(self):
         np.random.seed(44)
-        index = pd.date_range(start="2021-12-30 03:04:05", end="2022-01-02 
06:07:08", freq="1T")
+        index = pd.date_range(start="2021-12-30 03:04:05", end="2022-01-02 
06:07:08", freq="1min")
         return pd.DataFrame(np.random.rand(len(index), 2), index=index, 
columns=list("AB"))
 
     @property
     def pdf6(self):
         np.random.seed(55)
-        index = pd.date_range(start="2022-05-02 03:04:05", end="2022-05-02 
06:07:08", freq="1S")
+        index = pd.date_range(start="2022-05-02 03:04:05", end="2022-05-02 
06:07:08", freq="1s")
         return pd.DataFrame(np.random.rand(len(index), 2), index=index, 
columns=list("AB"))
 
     @property
@@ -127,14 +127,14 @@ class ResampleFrameMixin:
             )
 
     def test_dataframe_resample(self):
-        self._test_resample(self.pdf4, self.psdf4, ["11H", "21D"], "left", 
None, "mean")
-        self._test_resample(self.pdf5, self.psdf5, ["55MIN", "2H", "D"], 
"left", "left", "std")
-        self._test_resample(self.pdf6, self.psdf6, ["29S", "10MIN", "3H"], 
"left", "right", "var")
+        self._test_resample(self.pdf4, self.psdf4, ["11h", "21D"], "left", 
None, "mean")
+        self._test_resample(self.pdf5, self.psdf5, ["55min", "2h", "D"], 
"left", "left", "std")
+        self._test_resample(self.pdf6, self.psdf6, ["29s", "10min", "3h"], 
"left", "right", "var")
 
         with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not 
supported"):
-            self._test_resample(self.pdf2, self.psdf2, ["3A", "11M", "D"], 
None, "left", "max")
+            self._test_resample(self.pdf2, self.psdf2, ["3A", "11ME", "D"], 
None, "left", "max")
         with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not 
supported"):
-            self._test_resample(self.pdf1, self.psdf1, ["3Y", "9M", "17D"], 
None, None, "min")
+            self._test_resample(self.pdf1, self.psdf1, ["3Y", "9ME", "17D"], 
None, None, "min")
 
 
 class ResampleFrameTests(ResampleFrameMixin, PandasOnSparkTestCase, TestUtils):
diff --git a/python/pyspark/pandas/tests/resample/test_series.py 
b/python/pyspark/pandas/tests/resample/test_series.py
index f0f508c23a22..9d6bb017dc8d 100644
--- a/python/pyspark/pandas/tests/resample/test_series.py
+++ b/python/pyspark/pandas/tests/resample/test_series.py
@@ -76,19 +76,19 @@ class ResampleSeriesMixin:
     @property
     def pdf4(self):
         np.random.seed(33)
-        index = pd.date_range(start="2020-12-12", end="2022-05-01", freq="1H")
+        index = pd.date_range(start="2020-12-12", end="2022-05-01", freq="1h")
         return pd.DataFrame(np.random.rand(len(index), 2), index=index, 
columns=list("AB"))
 
     @property
     def pdf5(self):
         np.random.seed(44)
-        index = pd.date_range(start="2021-12-30 03:04:05", end="2022-01-02 
06:07:08", freq="1T")
+        index = pd.date_range(start="2021-12-30 03:04:05", end="2022-01-02 
06:07:08", freq="1min")
         return pd.DataFrame(np.random.rand(len(index), 2), index=index, 
columns=list("AB"))
 
     @property
     def pdf6(self):
         np.random.seed(55)
-        index = pd.date_range(start="2022-05-02 03:04:05", end="2022-05-02 
06:07:08", freq="1S")
+        index = pd.date_range(start="2022-05-02 03:04:05", end="2022-05-02 
06:07:08", freq="1s")
         return pd.DataFrame(np.random.rand(len(index), 2), index=index, 
columns=list("AB"))
 
     @property
@@ -126,11 +126,11 @@ class ResampleSeriesMixin:
             )
 
     def test_series_resample(self):
-        self._test_resample(self.pdf2.A, self.psdf2.A, ["13M"], "right", 
"left", "max")
-        self._test_resample(self.pdf3.A, self.psdf3.A, ["1001H"], "right", 
"right", "sum")
+        self._test_resample(self.pdf2.A, self.psdf2.A, ["13ME"], "right", 
"left", "max")
+        self._test_resample(self.pdf3.A, self.psdf3.A, ["1001h"], "right", 
"right", "sum")
         self._test_resample(self.pdf4.A, self.psdf4.A, ["6D"], None, None, 
"mean")
-        self._test_resample(self.pdf5.A, self.psdf5.A, ["47T"], "left", 
"left", "var")
-        self._test_resample(self.pdf6.A, self.psdf6.A, ["111S"], "right", 
"right", "std")
+        self._test_resample(self.pdf5.A, self.psdf5.A, ["47min"], "left", 
"left", "var")
+        self._test_resample(self.pdf6.A, self.psdf6.A, ["111s"], "right", 
"right", "std")
 
         with self.assertRaisesRegex(ValueError, "rule code YE-DEC is not 
supported"):
             self._test_resample(self.pdf1.A, self.psdf1.A, ["4Y"], "right", 
None, "min")
diff --git a/python/pyspark/pandas/tests/resample/test_timezone.py 
b/python/pyspark/pandas/tests/resample/test_timezone.py
index e783af507c93..7cf3df77de8d 100644
--- a/python/pyspark/pandas/tests/resample/test_timezone.py
+++ b/python/pyspark/pandas/tests/resample/test_timezone.py
@@ -57,8 +57,8 @@ class ResampleTimezoneMixin:
                 "spark.sql.timestampType": "TIMESTAMP_NTZ",
             }
         ):
-            p_resample = self.pdf.resample(rule="1001H", closed="right", 
label="right")
-            ps_resample = self.psdf.resample(rule="1001H", closed="right", 
label="right")
+            p_resample = self.pdf.resample(rule="1001h", closed="right", 
label="right")
+            ps_resample = self.psdf.resample(rule="1001h", closed="right", 
label="right")
             self.assert_eq(
                 p_resample.sum().sort_index(),
                 ps_resample.sum().sort_index(),
diff --git a/python/pyspark/pandas/tests/series/test_datetime.py 
b/python/pyspark/pandas/tests/series/test_datetime.py
index f3314de8313a..a8374b38e66f 100644
--- a/python/pyspark/pandas/tests/series/test_datetime.py
+++ b/python/pyspark/pandas/tests/series/test_datetime.py
@@ -254,15 +254,15 @@ class SeriesDateTimeTestsMixin:
 
     def test_round(self):
         self.check_func(lambda x: x.dt.round(freq="min"))
-        self.check_func(lambda x: x.dt.round(freq="H"))
+        self.check_func(lambda x: x.dt.round(freq="h"))
 
     def test_floor(self):
         self.check_func(lambda x: x.dt.floor(freq="min"))
-        self.check_func(lambda x: x.dt.floor(freq="H"))
+        self.check_func(lambda x: x.dt.floor(freq="h"))
 
     def test_ceil(self):
         self.check_func(lambda x: x.dt.ceil(freq="min"))
-        self.check_func(lambda x: x.dt.ceil(freq="H"))
+        self.check_func(lambda x: x.dt.ceil(freq="h"))
 
     @unittest.skip("Unsupported locale setting")
     def test_month_name(self):
diff --git a/python/pyspark/pandas/tests/test_namespace.py 
b/python/pyspark/pandas/tests/test_namespace.py
index 151d61555af8..8a267f76c536 100644
--- a/python/pyspark/pandas/tests/test_namespace.py
+++ b/python/pyspark/pandas/tests/test_namespace.py
@@ -211,8 +211,8 @@ class NamespaceTestsMixin:
         )
 
         self.assert_eq(
-            ps.date_range(start="1/1/2018", periods=5, freq="3M"),
-            pd.date_range(start="1/1/2018", periods=5, freq="3M"),
+            ps.date_range(start="1/1/2018", periods=5, freq="3ME"),
+            pd.date_range(start="1/1/2018", periods=5, freq="3ME"),
         )
 
         self.assert_eq(
@@ -299,8 +299,8 @@ class NamespaceTestsMixin:
             pd.timedelta_range(end="3 days", periods=3, closed="right"),
         )
         self.assert_eq(
-            ps.timedelta_range(start="1 day", end="3 days", freq="6H"),
-            pd.timedelta_range(start="1 day", end="3 days", freq="6H"),
+            ps.timedelta_range(start="1 day", end="3 days", freq="6h"),
+            pd.timedelta_range(start="1 day", end="3 days", freq="6h"),
         )
         self.assert_eq(
             ps.timedelta_range(start="1 day", end="3 days", periods=4),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-55293][PS][TESTS] Avoid using old offset aliases

Reply via email to