This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 55ae57f725c [SPARK-45997][PS][PYTHON][CONNECT][DOCS] Remove deprecated
APIs from legacy Koalas
55ae57f725c is described below
commit 55ae57f725c980ccf03e6d64c8e3a6f325d3f4b6
Author: Haejoon Lee <[email protected]>
AuthorDate: Mon Nov 20 12:50:03 2023 +0800
[SPARK-45997][PS][PYTHON][CONNECT][DOCS] Remove deprecated APIs from legacy
Koalas
### What changes were proposed in this pull request?
This PR proposes to remove deprecated APIs related to legacy Koalas for
upcoming major release, Spark 4.0.
Related docs are updated as well.
### Why are the changes needed?
To cleanup the API surface by removing deprecated legacy Koalas-related
APIs.
### Does this PR introduce _any_ user-facing change?
Yes, `pyspark.pandas.DataFrame.koalas`, `pyspark.sql.DataFrame.to_koalas`,
`pyspark.sql.DataFrame.to_pandas_api_on_spark` will be removed.
### How was this patch tested?
The existing CI should pass.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43896 from itholic/remove_koalas.
Authored-by: Haejoon Lee <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/docs/source/migration_guide/koalas_to_pyspark.rst | 9 +++------
python/docs/source/migration_guide/pyspark_upgrade.rst | 3 +++
python/docs/source/reference/pyspark.sql/dataframe.rst | 1 -
python/pyspark/pandas/frame.py | 3 ---
python/pyspark/pandas/series.py | 3 ---
python/pyspark/sql/connect/dataframe.py | 9 ---------
python/pyspark/sql/dataframe.py | 16 ----------------
7 files changed, 6 insertions(+), 38 deletions(-)
diff --git a/python/docs/source/migration_guide/koalas_to_pyspark.rst
b/python/docs/source/migration_guide/koalas_to_pyspark.rst
index c1c1d1e55ff..b45538443da 100644
--- a/python/docs/source/migration_guide/koalas_to_pyspark.rst
+++ b/python/docs/source/migration_guide/koalas_to_pyspark.rst
@@ -27,13 +27,10 @@ Migrating from Koalas to pandas API on Spark
# import databricks.koalas as ks
import pyspark.pandas as ps
-* ``DataFrame.koalas`` in Koalas DataFrame was renamed to
``DataFrame.pandas_on_spark`` in pandas-on-Spark DataFrame.
``DataFrame.koalas`` was kept for compatibility reasons but deprecated as of
Spark 3.2.
- ``DataFrame.koalas`` will be removed in the future releases.
+* ``DataFrame.koalas`` in Koalas DataFrame was renamed to
``DataFrame.pandas_on_spark`` in pandas-on-Spark DataFrame.
``DataFrame.koalas`` was removed as of Spark 4.0.
-* Monkey-patched ``DataFrame.to_koalas`` in PySpark DataFrame was renamed to
``DataFrame.pandas_api`` in PySpark DataFrame. ``DataFrame.to_koalas`` was kept
for compatibility reasons.
- ``DataFrame.to_koalas`` will be removed in the future releases.
+* Monkey-patched ``DataFrame.to_koalas`` in PySpark DataFrame was renamed to
``DataFrame.pandas_api`` in PySpark DataFrame. ``DataFrame.to_koalas`` was
removed as of Spark 4.0.
-* Monkey-patched ``DataFrame.to_pandas_on_spark`` in PySpark DataFrame was
renamed to ``DataFrame.pandas_api`` in PySpark DataFrame.
``DataFrame.to_pandas_on_spark`` was kept for compatibility reasons but
deprecated as of Spark 3.3.
- ``DataFrame.to_pandas_on_spark`` will be removed in the future releases.
+* Monkey-patched ``DataFrame.to_pandas_on_spark`` in PySpark DataFrame was
renamed to ``DataFrame.pandas_api`` in PySpark DataFrame.
``DataFrame.to_pandas_on_spark`` was removed as of Spark 4.0.
* ``databricks.koalas.__version__`` was removed. ``pyspark.__version__``
should be used instead.
diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst
b/python/docs/source/migration_guide/pyspark_upgrade.rst
index 06991281bf0..d71c05fbee4 100644
--- a/python/docs/source/migration_guide/pyspark_upgrade.rst
+++ b/python/docs/source/migration_guide/pyspark_upgrade.rst
@@ -62,6 +62,9 @@ Upgrading from PySpark 3.5 to 4.0
* In Spark 4.0, ``Index.is_all_dates`` has been removed from pandas API on
Spark.
* In Spark 4.0, ``convert_float`` parameter from ``read_excel`` has been
removed from pandas API on Spark.
* In Spark 4.0, ``mangle_dupe_cols`` parameter from ``read_excel`` has been
removed from pandas API on Spark.
+* In Spark 4.0, ``DataFrame.koalas`` has been removed from pandas API on
Spark, use ``DataFrame.pandas_on_spark`` instead.
+* In Spark 4.0, ``DataFrame.to_koalas`` has been removed from PySpark, use
``DataFrame.pandas_api`` instead.
+* In Spark 4.0, ``DataFrame.to_pandas_on_spark`` has been removed from
PySpark, use ``DataFrame.pandas_api`` instead.
diff --git a/python/docs/source/reference/pyspark.sql/dataframe.rst
b/python/docs/source/reference/pyspark.sql/dataframe.rst
index 1d6712bb042..fefb261be8b 100644
--- a/python/docs/source/reference/pyspark.sql/dataframe.rst
+++ b/python/docs/source/reference/pyspark.sql/dataframe.rst
@@ -112,7 +112,6 @@ DataFrame
DataFrame.toJSON
DataFrame.toLocalIterator
DataFrame.toPandas
- DataFrame.to_pandas_on_spark
DataFrame.transform
DataFrame.union
DataFrame.unionAll
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 8b20abf9652..4ecc85ce8f7 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -1000,9 +1000,6 @@ class DataFrame(Frame, Generic[T]):
# create accessor for pandas-on-Spark specific methods.
pandas_on_spark = CachedAccessor("pandas_on_spark",
PandasOnSparkFrameMethods)
- # keep the name "koalas" for backward compatibility.
- koalas = CachedAccessor("koalas", PandasOnSparkFrameMethods)
-
@no_type_check
def hist(self, bins=10, **kwds):
return self.plot.hist(bins, **kwds)
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index e0cdb1ea030..c9beb6432f9 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -704,9 +704,6 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
# create accessor for pandas-on-Spark specific methods.
pandas_on_spark = CachedAccessor("pandas_on_spark",
PandasOnSparkSeriesMethods)
- # keep the name "koalas" for backward compatibility.
- koalas = CachedAccessor("koalas", PandasOnSparkSeriesMethods)
-
# Comparison Operators
def eq(self, other: Any) -> "Series":
"""
diff --git a/python/pyspark/sql/connect/dataframe.py
b/python/pyspark/sql/connect/dataframe.py
index 35e4882fb03..c713bb85c1e 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -1948,15 +1948,6 @@ class DataFrame:
toLocalIterator.__doc__ = PySparkDataFrame.toLocalIterator.__doc__
- def to_pandas_on_spark(
- self, index_col: Optional[Union[str, List[str]]] = None
- ) -> "PandasOnSparkDataFrame":
- warnings.warn(
- "DataFrame.to_pandas_on_spark is deprecated. Use
DataFrame.pandas_api instead.",
- FutureWarning,
- )
- return self.pandas_api(index_col)
-
def pandas_api(
self, index_col: Optional[Union[str, List[str]]] = None
) -> "PandasOnSparkDataFrame":
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 01067bd4c48..104b29a5e27 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -6580,16 +6580,6 @@ class DataFrame(PandasMapOpsMixin,
PandasConversionMixin):
"""
return DataFrameWriterV2(self, table)
- # Keep to_pandas_on_spark for backward compatibility for now.
- def to_pandas_on_spark(
- self, index_col: Optional[Union[str, List[str]]] = None
- ) -> "PandasOnSparkDataFrame":
- warnings.warn(
- "DataFrame.to_pandas_on_spark is deprecated. Use
DataFrame.pandas_api instead.",
- FutureWarning,
- )
- return self.pandas_api(index_col)
-
def pandas_api(
self, index_col: Optional[Union[str, List[str]]] = None
) -> "PandasOnSparkDataFrame":
@@ -6652,12 +6642,6 @@ class DataFrame(PandasMapOpsMixin,
PandasConversionMixin):
)
return PandasOnSparkDataFrame(internal)
- # Keep to_koalas for backward compatibility for now.
- def to_koalas(
- self, index_col: Optional[Union[str, List[str]]] = None
- ) -> "PandasOnSparkDataFrame":
- return self.pandas_api(index_col)
-
def _to_scala_map(sc: SparkContext, jm: Dict) -> JavaObject:
"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]