This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 7f819398cb4b [SPARK-55245][PYTHON][PS][TESTS] Fix all timestamp freq 
usage from M to ME
7f819398cb4b is described below

commit 7f819398cb4b8130368837b30f1c56e32fbeb821
Author: Tian Gao <[email protected]>
AuthorDate: Wed Jan 28 10:36:19 2026 +0800

    [SPARK-55245][PYTHON][PS][TESTS] Fix all timestamp freq usage from M to ME
    
    ### What changes were proposed in this pull request?
    
    Use `ME` instead of `M` for `freq` of `date_range()`.
    
    ### Why are the changes needed?
    
    `M` is the wrong usage, it should only be used for periodic timespans. It's 
not listed as a valid option for `date_range()` anymore - 
https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases also 
the code will just report an error.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, this is a test only change.
    
    ### How was this patch tested?
    
    Local test did not report an error anymore.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #54018 from gaogaotiantian/pandas3-fix-frequency.
    
    Authored-by: Tian Gao <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py  | 2 +-
 python/pyspark/pandas/tests/indexes/test_indexing_adv.py        | 2 +-
 python/pyspark/pandas/tests/indexes/test_indexing_loc.py        | 2 +-
 python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py  | 4 ++--
 python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py      | 2 +-
 python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py | 2 +-
 python/pyspark/pandas/tests/plot/test_series_plot_plotly.py     | 2 +-
 python/pyspark/pandas/tests/series/test_datetime.py             | 2 +-
 python/pyspark/pandas/tests/test_namespace.py                   | 4 ++--
 9 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py 
b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
index b667a8f3979e..8800387f657e 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
@@ -178,7 +178,7 @@ class DatetimeOpsTestsMixin:
         self.assertRaises(TypeError, lambda: False | self.psser)
 
     def test_from_to_pandas(self):
-        data = pd.date_range("1994-1-31 10:30:15", periods=3, freq="M")
+        data = pd.date_range("1994-1-31 10:30:15", periods=3, freq="ME")
         pser = pd.Series(data)
         psser = ps.Series(data)
         self.assert_eq(pser, psser._to_pandas())
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing_adv.py 
b/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
index fc3c93ca7aae..919d9cc9b569 100644
--- a/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
+++ b/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
@@ -272,7 +272,7 @@ class IndexingAdvMixin:
 
         pdf = pd.DataFrame(
             {"A": np.random.randn(100), "B": np.random.randn(100)},
-            index=pd.date_range("2011-01-01", freq="M", periods=100),
+            index=pd.date_range("2011-01-01", freq="ME", periods=100),
         )
         psdf = ps.from_pandas(pdf)
 
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing_loc.py 
b/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
index ae9b937dc2c2..7affdcff56c9 100644
--- a/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
+++ b/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
@@ -223,7 +223,7 @@ class IndexingLocMixin:
 
         pdf = pd.DataFrame(
             {"A": np.random.randn(100), "B": np.random.randn(100)},
-            index=pd.date_range("2011-01-01", freq="M", periods=100),
+            index=pd.date_range("2011-01-01", freq="ME", periods=100),
         )
         psdf = ps.from_pandas(pdf)
         # TODO?: self.assert_eq(pdf.loc['2011-01'], psdf.loc['2011-01'])
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py 
b/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
index 2015a7189afb..f8ac0a326b50 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
@@ -138,7 +138,7 @@ class DataFramePlotMatplotlibTestsMixin:
                 "signups": [5, 5, 6, 12, 14, 13],
                 "visits": [20, 42, 28, 62, 81, 50],
             },
-            index=pd.date_range(start="2018/01/01", end="2018/07/01", 
freq="M"),
+            index=pd.date_range(start="2018/01/01", end="2018/07/01", 
freq="ME"),
         )
         psdf = ps.from_pandas(pdf)
         check_area_plot_stacked_false(pdf, psdf)
@@ -164,7 +164,7 @@ class DataFramePlotMatplotlibTestsMixin:
                 "signups": [5, 5, 6, 12, 14, 13],
                 "visits": [20, 42, 28, 62, 81, 50],
             },
-            index=pd.date_range(start="2018/01/01", end="2018/07/01", 
freq="M"),
+            index=pd.date_range(start="2018/01/01", end="2018/07/01", 
freq="ME"),
         )
         psdf = ps.from_pandas(pdf)
         check_area_plot_y(pdf, psdf, y="sales")
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py 
b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
index b2088737617e..727d8549bf67 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
@@ -90,7 +90,7 @@ class DataFramePlotPlotlyTestsMixin:
                 "signups": [5, 5, 6, 12, 14, 13],
                 "visits": [20, 42, 28, 62, 81, 50],
             },
-            index=pd.date_range(start="2018/01/01", end="2018/07/01", 
freq="M"),
+            index=pd.date_range(start="2018/01/01", end="2018/07/01", 
freq="ME"),
         )
         psdf = ps.from_pandas(pdf)
         check_area_plot_y(pdf, psdf, y="sales")
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py 
b/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
index 362609354f25..04c6b84e02f3 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
@@ -263,7 +263,7 @@ class SeriesPlotMatplotlibTestsMixin:
                 "signups": [5, 5, 6, 12, 14, 13],
                 "visits": [20, 42, 28, 62, 81, 50],
             },
-            index=pd.date_range(start="2018/01/01", end="2018/07/01", 
freq="M"),
+            index=pd.date_range(start="2018/01/01", end="2018/07/01", 
freq="ME"),
         )
         psdf = ps.from_pandas(pdf)
 
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py 
b/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
index a9e927b528ff..757e3344feef 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
@@ -95,7 +95,7 @@ class SeriesPlotPlotlyTestsMixin:
                 "signups": [5, 5, 6, 12, 14, 13],
                 "visits": [20, 42, 28, 62, 81, 50],
             },
-            index=pd.date_range(start="2018/01/01", end="2018/07/01", 
freq="M"),
+            index=pd.date_range(start="2018/01/01", end="2018/07/01", 
freq="ME"),
         )
         psdf = ps.from_pandas(pdf)
 
diff --git a/python/pyspark/pandas/tests/series/test_datetime.py 
b/python/pyspark/pandas/tests/series/test_datetime.py
index f93f835c74f9..f3314de8313a 100644
--- a/python/pyspark/pandas/tests/series/test_datetime.py
+++ b/python/pyspark/pandas/tests/series/test_datetime.py
@@ -29,7 +29,7 @@ from pyspark.testing.sqlutils import SQLTestUtils
 class SeriesDateTimeTestsMixin:
     @property
     def pdf1(self):
-        date1 = pd.Series(pd.date_range("2012-1-1 12:45:31", periods=3, 
freq="M"))
+        date1 = pd.Series(pd.date_range("2012-1-1 12:45:31", periods=3, 
freq="ME"))
         date2 = pd.Series(pd.date_range("2013-3-11 21:45:00", periods=3, 
freq="W"))
         return pd.DataFrame(dict(start_date=date1, end_date=date2))
 
diff --git a/python/pyspark/pandas/tests/test_namespace.py 
b/python/pyspark/pandas/tests/test_namespace.py
index 9a819820437c..151d61555af8 100644
--- a/python/pyspark/pandas/tests/test_namespace.py
+++ b/python/pyspark/pandas/tests/test_namespace.py
@@ -206,8 +206,8 @@ class NamespaceTestsMixin:
         )
 
         self.assert_eq(
-            ps.date_range(start="1/1/2018", periods=5, freq="M"),
-            pd.date_range(start="1/1/2018", periods=5, freq="M"),
+            ps.date_range(start="1/1/2018", periods=5, freq="ME"),
+            pd.date_range(start="1/1/2018", periods=5, freq="ME"),
         )
 
         self.assert_eq(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to