This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 8c538921a8ae [SPARK-52840][PYTHON][DOCS][FOLLOW-UP] Increase Pandas minimum version to 2.2.0 8c538921a8ae is described below commit 8c538921a8ae95be0051a66ad53def2a4ff81020 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Fri Jul 25 12:28:54 2025 +0800 [SPARK-52840][PYTHON][DOCS][FOLLOW-UP] Increase Pandas minimum version to 2.2.0 ### What changes were proposed in this pull request? follow up https://github.com/apache/spark/pull/51531 ### Why are the changes needed? some places were missing in https://github.com/apache/spark/pull/51531 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #51662 from zhengruifeng/pandas_220_followup. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/docs/source/migration_guide/pyspark_upgrade.rst | 1 + python/packaging/classic/setup.py | 2 +- python/packaging/client/setup.py | 2 +- python/pyspark/sql/pandas/utils.py | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst b/python/docs/source/migration_guide/pyspark_upgrade.rst index 2f8af88b6f8b..db77701ea41a 100644 --- a/python/docs/source/migration_guide/pyspark_upgrade.rst +++ b/python/docs/source/migration_guide/pyspark_upgrade.rst @@ -24,6 +24,7 @@ Upgrading from PySpark 4.0 to 4.1 * In Spark 4.1, Python 3.9 support was dropped in PySpark. * In Spark 4.1, the minimum supported version for PyArrow has been raised from 11.0.0 to 15.0.0 in PySpark. +* In Spark 4.1, the minimum supported version for Pandas has been raised from 2.0.0 to 2.2.0 in PySpark. * In Spark 4.1, ``DataFrame['name']`` on Spark Connect Python Client no longer eagerly validate the column name. To restore the legacy behavior, set ``PYSPARK_VALIDATE_COLUMN_NAME_LEGACY`` environment variable to ``1``. * In Spark 4.1, Arrow-optimized Python UDF supports UDT input / output instead of falling back to the regular UDF. To restore the legacy behavior, set ``spark.sql.execution.pythonUDF.arrow.legacy.fallbackOnUDT`` to ``true``. * In Spark 4.1, unnecessary conversion to pandas instances is removed when ``spark.sql.execution.pythonUDF.arrow.enabled`` is enabled. As a result, the type coercion changes when the produced output has a schema different from the specified schema. To restore the previous behavior, enable ``spark.sql.legacy.execution.pythonUDF.pandas.conversion.enabled``. diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py index 775fd4be4748..fcb607a561b3 100755 --- a/python/packaging/classic/setup.py +++ b/python/packaging/classic/setup.py @@ -150,7 +150,7 @@ if in_spark: # binary format protocol with the Java version, see ARROW_HOME/format/* for specifications. # Also don't forget to update python/docs/source/getting_started/install.rst, # python/packaging/client/setup.py, and python/packaging/connect/setup.py -_minimum_pandas_version = "2.0.0" +_minimum_pandas_version = "2.2.0" _minimum_numpy_version = "1.21" _minimum_pyarrow_version = "15.0.0" _minimum_grpc_version = "1.67.0" diff --git a/python/packaging/client/setup.py b/python/packaging/client/setup.py index 1f000046748d..d70c17465331 100755 --- a/python/packaging/client/setup.py +++ b/python/packaging/client/setup.py @@ -133,7 +133,7 @@ try: # binary format protocol with the Java version, see ARROW_HOME/format/* for specifications. # Also don't forget to update python/docs/source/getting_started/install.rst, # python/packaging/classic/setup.py, and python/packaging/connect/setup.py - _minimum_pandas_version = "2.0.0" + _minimum_pandas_version = "2.2.0" _minimum_numpy_version = "1.21" _minimum_pyarrow_version = "15.0.0" _minimum_grpc_version = "1.67.0" diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py index 5cf74b2b5732..aef2e2d93c01 100644 --- a/python/pyspark/sql/pandas/utils.py +++ b/python/pyspark/sql/pandas/utils.py @@ -22,7 +22,7 @@ from pyspark.errors import PySparkImportError, PySparkRuntimeError def require_minimum_pandas_version() -> None: """Raise ImportError if minimum version of Pandas is not installed""" # TODO(HyukjinKwon): Relocate and deduplicate the version specification. - minimum_pandas_version = "2.0.0" + minimum_pandas_version = "2.2.0" try: import pandas --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org