This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 3cec5a4c7d8 [SPARK-43709][PS] Remove `closed` parameter from `ps.date_range` & enable test 3cec5a4c7d8 is described below commit 3cec5a4c7d8cf86141b16236925e54886a807a42 Author: itholic <haejoon....@databricks.com> AuthorDate: Wed Aug 9 14:10:00 2023 +0900 [SPARK-43709][PS] Remove `closed` parameter from `ps.date_range` & enable test ### What changes were proposed in this pull request? This PR proposes to remove `closed` parameter from `ps.date_range` & enable test. See https://github.com/pandas-dev/pandas/issues/40245 more detail. ### Why are the changes needed? To support pandas 2.0.0 and above. ### Does this PR introduce _any_ user-facing change? `closed` parameter will no longer available from `ps.date_range` API. ### How was this patch tested? Enabling the existing UT. Closes #42389 from itholic/closed_removing. Authored-by: itholic <haejoon....@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../source/migration_guide/pyspark_upgrade.rst | 1 + python/pyspark/pandas/namespace.py | 38 +--------------------- python/pyspark/pandas/tests/test_namespace.py | 14 -------- 3 files changed, 2 insertions(+), 51 deletions(-) diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst b/python/docs/source/migration_guide/pyspark_upgrade.rst index b029bcc649f..1b247d46227 100644 --- a/python/docs/source/migration_guide/pyspark_upgrade.rst +++ b/python/docs/source/migration_guide/pyspark_upgrade.rst @@ -31,6 +31,7 @@ Upgrading from PySpark 3.5 to 4.0 * In Spark 4.0, ``Series.mad`` has been removed from pandas API on Spark. * In Spark 4.0, ``na_sentinel`` parameter from ``Index.factorize`` and `Series.factorize`` has been removed from pandas API on Spark, use ``use_na_sentinel`` instead. * In Spark 4.0, ``inplace`` parameter from ``Categorical.add_categories``, ``Categorical.remove_categories``, ``Categorical.set_categories``, ``Categorical.rename_categories``, ``Categorical.reorder_categories``, ``Categorical.as_ordered``, ``Categorical.as_unordered`` have been removed from pandas API on Spark. +* In Spark 4.0, ``closed`` parameter from ``ps.date_range`` has been removed from pandas API on Spark. Upgrading from PySpark 3.3 to 3.4 diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py index 5ffec6bedb9..ba93e5a3ee5 100644 --- a/python/pyspark/pandas/namespace.py +++ b/python/pyspark/pandas/namespace.py @@ -1751,7 +1751,7 @@ def to_datetime( ) -# TODO(SPARK-42621): Add `inclusive` parameter and replace `closed`. +# TODO(SPARK-42621): Add `inclusive` parameter. # See https://github.com/pandas-dev/pandas/issues/40245 def date_range( start: Union[str, Any] = None, @@ -1761,7 +1761,6 @@ def date_range( tz: Optional[Union[str, tzinfo]] = None, normalize: bool = False, name: Optional[str] = None, - closed: Optional[str] = None, **kwargs: Any, ) -> DatetimeIndex: """ @@ -1785,12 +1784,6 @@ def date_range( Normalize start/end dates to midnight before generating date range. name : str, default None Name of the resulting DatetimeIndex. - closed : {None, 'left', 'right'}, optional - Make the interval closed with respect to the given frequency to - the 'left', 'right', or both sides (None, the default). - - .. deprecated:: 3.4.0 - **kwargs For compatibility. Has no effect on the result. @@ -1874,37 +1867,9 @@ def date_range( DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', '2019-01-31'], dtype='datetime64[ns]', freq=None) - - `closed` controls whether to include `start` and `end` that are on the - boundary. The default includes boundary points on either end. - - >>> ps.date_range( - ... start='2017-01-01', end='2017-01-04', closed=None - ... ) # doctest: +SKIP - DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], - dtype='datetime64[ns]', freq=None) - - Use ``closed='left'`` to exclude `end` if it falls on the boundary. - - >>> ps.date_range( - ... start='2017-01-01', end='2017-01-04', closed='left' - ... ) # doctest: +SKIP - DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq=None) - - Use ``closed='right'`` to exclude `start` if it falls on the boundary. - - >>> ps.date_range( - ... start='2017-01-01', end='2017-01-04', closed='right' - ... ) # doctest: +SKIP - DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq=None) """ assert freq not in ["N", "ns"], "nanoseconds is not supported" assert tz is None, "Localized DatetimeIndex is not supported" - if closed is not None: - warnings.warn( - "Argument `closed` is deprecated in 3.4.0 and will be removed in 4.0.0.", - FutureWarning, - ) return cast( DatetimeIndex, @@ -1917,7 +1882,6 @@ def date_range( tz=tz, normalize=normalize, name=name, - closed=closed, **kwargs, ) ), diff --git a/python/pyspark/pandas/tests/test_namespace.py b/python/pyspark/pandas/tests/test_namespace.py index 64c58a70239..d1d1e1af935 100644 --- a/python/pyspark/pandas/tests/test_namespace.py +++ b/python/pyspark/pandas/tests/test_namespace.py @@ -190,10 +190,6 @@ class NamespaceTestsMixin: self.assert_eq(pd.to_datetime(pdf), ps.to_datetime(psdf)) self.assert_eq(pd.to_datetime(dict_from_pdf), ps.to_datetime(dict_from_pdf)) - @unittest.skipIf( - LooseVersion(pd.__version__) >= LooseVersion("2.0.0"), - "TODO(SPARK-43709): Enable NamespaceTests.test_date_range for pandas 2.0.0.", - ) def test_date_range(self): self.assert_eq( ps.date_range(start="1/1/2018", end="1/08/2018"), @@ -225,16 +221,6 @@ class NamespaceTestsMixin: pd.date_range(start="1/1/2018", periods=5, freq=pd.offsets.MonthEnd(3)), ) - self.assert_eq( - ps.date_range(start="2017-01-01", end="2017-01-04", closed="left"), - pd.date_range(start="2017-01-01", end="2017-01-04", closed="left"), - ) - - self.assert_eq( - ps.date_range(start="2017-01-01", end="2017-01-04", closed="right"), - pd.date_range(start="2017-01-01", end="2017-01-04", closed="right"), - ) - self.assertRaises( AssertionError, lambda: ps.date_range(start="1/1/2018", periods=5, tz="Asia/Tokyo") ) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org