This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 7f819398cb4b [SPARK-55245][PYTHON][PS][TESTS] Fix all timestamp freq
usage from M to ME
7f819398cb4b is described below
commit 7f819398cb4b8130368837b30f1c56e32fbeb821
Author: Tian Gao <[email protected]>
AuthorDate: Wed Jan 28 10:36:19 2026 +0800
[SPARK-55245][PYTHON][PS][TESTS] Fix all timestamp freq usage from M to ME
### What changes were proposed in this pull request?
Use `ME` instead of `M` for `freq` of `date_range()`.
### Why are the changes needed?
`M` is the wrong usage, it should only be used for periodic timespans. It's
not listed as a valid option for `date_range()` anymore -
https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases also
the code will just report an error.
### Does this PR introduce _any_ user-facing change?
No, this is a test only change.
### How was this patch tested?
Local test did not report an error anymore.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #54018 from gaogaotiantian/pandas3-fix-frequency.
Authored-by: Tian Gao <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py | 2 +-
python/pyspark/pandas/tests/indexes/test_indexing_adv.py | 2 +-
python/pyspark/pandas/tests/indexes/test_indexing_loc.py | 2 +-
python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py | 4 ++--
python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py | 2 +-
python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py | 2 +-
python/pyspark/pandas/tests/plot/test_series_plot_plotly.py | 2 +-
python/pyspark/pandas/tests/series/test_datetime.py | 2 +-
python/pyspark/pandas/tests/test_namespace.py | 4 ++--
9 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
index b667a8f3979e..8800387f657e 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
@@ -178,7 +178,7 @@ class DatetimeOpsTestsMixin:
self.assertRaises(TypeError, lambda: False | self.psser)
def test_from_to_pandas(self):
- data = pd.date_range("1994-1-31 10:30:15", periods=3, freq="M")
+ data = pd.date_range("1994-1-31 10:30:15", periods=3, freq="ME")
pser = pd.Series(data)
psser = ps.Series(data)
self.assert_eq(pser, psser._to_pandas())
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
b/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
index fc3c93ca7aae..919d9cc9b569 100644
--- a/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
+++ b/python/pyspark/pandas/tests/indexes/test_indexing_adv.py
@@ -272,7 +272,7 @@ class IndexingAdvMixin:
pdf = pd.DataFrame(
{"A": np.random.randn(100), "B": np.random.randn(100)},
- index=pd.date_range("2011-01-01", freq="M", periods=100),
+ index=pd.date_range("2011-01-01", freq="ME", periods=100),
)
psdf = ps.from_pandas(pdf)
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
b/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
index ae9b937dc2c2..7affdcff56c9 100644
--- a/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
+++ b/python/pyspark/pandas/tests/indexes/test_indexing_loc.py
@@ -223,7 +223,7 @@ class IndexingLocMixin:
pdf = pd.DataFrame(
{"A": np.random.randn(100), "B": np.random.randn(100)},
- index=pd.date_range("2011-01-01", freq="M", periods=100),
+ index=pd.date_range("2011-01-01", freq="ME", periods=100),
)
psdf = ps.from_pandas(pdf)
# TODO?: self.assert_eq(pdf.loc['2011-01'], psdf.loc['2011-01'])
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
b/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
index 2015a7189afb..f8ac0a326b50 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
@@ -138,7 +138,7 @@ class DataFramePlotMatplotlibTestsMixin:
"signups": [5, 5, 6, 12, 14, 13],
"visits": [20, 42, 28, 62, 81, 50],
},
- index=pd.date_range(start="2018/01/01", end="2018/07/01",
freq="M"),
+ index=pd.date_range(start="2018/01/01", end="2018/07/01",
freq="ME"),
)
psdf = ps.from_pandas(pdf)
check_area_plot_stacked_false(pdf, psdf)
@@ -164,7 +164,7 @@ class DataFramePlotMatplotlibTestsMixin:
"signups": [5, 5, 6, 12, 14, 13],
"visits": [20, 42, 28, 62, 81, 50],
},
- index=pd.date_range(start="2018/01/01", end="2018/07/01",
freq="M"),
+ index=pd.date_range(start="2018/01/01", end="2018/07/01",
freq="ME"),
)
psdf = ps.from_pandas(pdf)
check_area_plot_y(pdf, psdf, y="sales")
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
index b2088737617e..727d8549bf67 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
@@ -90,7 +90,7 @@ class DataFramePlotPlotlyTestsMixin:
"signups": [5, 5, 6, 12, 14, 13],
"visits": [20, 42, 28, 62, 81, 50],
},
- index=pd.date_range(start="2018/01/01", end="2018/07/01",
freq="M"),
+ index=pd.date_range(start="2018/01/01", end="2018/07/01",
freq="ME"),
)
psdf = ps.from_pandas(pdf)
check_area_plot_y(pdf, psdf, y="sales")
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
b/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
index 362609354f25..04c6b84e02f3 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
@@ -263,7 +263,7 @@ class SeriesPlotMatplotlibTestsMixin:
"signups": [5, 5, 6, 12, 14, 13],
"visits": [20, 42, 28, 62, 81, 50],
},
- index=pd.date_range(start="2018/01/01", end="2018/07/01",
freq="M"),
+ index=pd.date_range(start="2018/01/01", end="2018/07/01",
freq="ME"),
)
psdf = ps.from_pandas(pdf)
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
b/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
index a9e927b528ff..757e3344feef 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
@@ -95,7 +95,7 @@ class SeriesPlotPlotlyTestsMixin:
"signups": [5, 5, 6, 12, 14, 13],
"visits": [20, 42, 28, 62, 81, 50],
},
- index=pd.date_range(start="2018/01/01", end="2018/07/01",
freq="M"),
+ index=pd.date_range(start="2018/01/01", end="2018/07/01",
freq="ME"),
)
psdf = ps.from_pandas(pdf)
diff --git a/python/pyspark/pandas/tests/series/test_datetime.py
b/python/pyspark/pandas/tests/series/test_datetime.py
index f93f835c74f9..f3314de8313a 100644
--- a/python/pyspark/pandas/tests/series/test_datetime.py
+++ b/python/pyspark/pandas/tests/series/test_datetime.py
@@ -29,7 +29,7 @@ from pyspark.testing.sqlutils import SQLTestUtils
class SeriesDateTimeTestsMixin:
@property
def pdf1(self):
- date1 = pd.Series(pd.date_range("2012-1-1 12:45:31", periods=3,
freq="M"))
+ date1 = pd.Series(pd.date_range("2012-1-1 12:45:31", periods=3,
freq="ME"))
date2 = pd.Series(pd.date_range("2013-3-11 21:45:00", periods=3,
freq="W"))
return pd.DataFrame(dict(start_date=date1, end_date=date2))
diff --git a/python/pyspark/pandas/tests/test_namespace.py
b/python/pyspark/pandas/tests/test_namespace.py
index 9a819820437c..151d61555af8 100644
--- a/python/pyspark/pandas/tests/test_namespace.py
+++ b/python/pyspark/pandas/tests/test_namespace.py
@@ -206,8 +206,8 @@ class NamespaceTestsMixin:
)
self.assert_eq(
- ps.date_range(start="1/1/2018", periods=5, freq="M"),
- pd.date_range(start="1/1/2018", periods=5, freq="M"),
+ ps.date_range(start="1/1/2018", periods=5, freq="ME"),
+ pd.date_range(start="1/1/2018", periods=5, freq="ME"),
)
self.assert_eq(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]