This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 04bf981781ba [SPARK-47737][PYTHON] Bump PyArrow to 10.0.0
04bf981781ba is described below

commit 04bf981781ba79d4b2d5a493ea32935eaa177709
Author: Haejoon Lee <haejoon....@databricks.com>
AuthorDate: Mon Apr 8 09:44:49 2024 -0700

    [SPARK-47737][PYTHON] Bump PyArrow to 10.0.0
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to bump PyArrow version up to 10.0.0
    
    ### Why are the changes needed?
    
    To leverage the new features from the latest version.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No API changes, but the PyArrow version from user-facing documentation will 
be changed.
    
    ### How was this patch tested?
    
    The existing CI should pass.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #45892 from itholic/bump_arrow_10.
    
    Authored-by: Haejoon Lee <haejoon....@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 dev/create-release/spark-rm/Dockerfile                 | 2 +-
 python/docs/source/getting_started/install.rst         | 2 +-
 python/docs/source/migration_guide/pyspark_upgrade.rst | 1 +
 python/docs/source/user_guide/sql/arrow_pandas.rst     | 2 +-
 python/packaging/classic/setup.py                      | 2 +-
 python/packaging/connect/setup.py                      | 2 +-
 python/pyspark/sql/pandas/utils.py                     | 2 +-
 7 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/dev/create-release/spark-rm/Dockerfile 
b/dev/create-release/spark-rm/Dockerfile
index 2cd50999c4cc..f51b24d58394 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -37,7 +37,7 @@ ENV DEBCONF_NONINTERACTIVE_SEEN true
 # These arguments are just for reuse and not really meant to be customized.
 ARG APT_INSTALL="apt-get install --no-install-recommends -y"
 
-ARG PIP_PKGS="sphinx==4.5.0 mkdocs==1.1.2 numpy==1.20.3 
pydata_sphinx_theme==0.13.3 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 
jinja2==3.1.2 twine==3.4.1 sphinx-plotly-directive==0.1.3 
sphinx-copybutton==0.5.2 pandas==1.5.3 pyarrow==3.0.0 plotly==5.4.0 
markupsafe==2.0.1 docutils<0.17 grpcio==1.62.0 protobuf==4.21.6 
grpcio-status==1.62.0 googleapis-common-protos==1.56.4"
+ARG PIP_PKGS="sphinx==4.5.0 mkdocs==1.1.2 numpy==1.20.3 
pydata_sphinx_theme==0.13.3 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 
jinja2==3.1.2 twine==3.4.1 sphinx-plotly-directive==0.1.3 
sphinx-copybutton==0.5.2 pandas==1.5.3 pyarrow==10.0.1 plotly==5.4.0 
markupsafe==2.0.1 docutils<0.17 grpcio==1.62.0 protobuf==4.21.6 
grpcio-status==1.62.0 googleapis-common-protos==1.56.4"
 ARG GEM_PKGS="bundler:2.3.8"
 
 # Install extra needed repos and refresh.
diff --git a/python/docs/source/getting_started/install.rst 
b/python/docs/source/getting_started/install.rst
index 6aa89a689480..4c0551433d5a 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -157,7 +157,7 @@ Package                    Supported version Note
 ========================== ========================= 
======================================================================================
 `py4j`                     >=0.10.9.7                Required
 `pandas`                   >=1.4.4                   Required for pandas API 
on Spark and Spark Connect; Optional for Spark SQL
-`pyarrow`                  >=4.0.0                   Required for pandas API 
on Spark and Spark Connect; Optional for Spark SQL
+`pyarrow`                  >=10.0.0                  Required for pandas API 
on Spark and Spark Connect; Optional for Spark SQL
 `numpy`                    >=1.21                    Required for pandas API 
on Spark and MLLib DataFrame-based API; Optional for Spark SQL
 `grpcio`                   >=1.62.0                  Required for Spark Connect
 `grpcio-status`            >=1.62.0                  Required for Spark Connect
diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst 
b/python/docs/source/migration_guide/pyspark_upgrade.rst
index 1ca5d7aad5d1..36c1eacaf2c7 100644
--- a/python/docs/source/migration_guide/pyspark_upgrade.rst
+++ b/python/docs/source/migration_guide/pyspark_upgrade.rst
@@ -25,6 +25,7 @@ Upgrading from PySpark 3.5 to 4.0
 * In Spark 4.0, it is recommended to use Pandas version 2.0.0 or above with 
PySpark for optimal compatibility.
 * In Spark 4.0, the minimum supported version for Pandas has been raised from 
1.0.5 to 1.4.4 in PySpark.
 * In Spark 4.0, the minimum supported version for Numpy has been raised from 
1.15 to 1.21 in PySpark.
+* In Spark 4.0, the minimum supported version for PyArrow has been raised from 
4.0.0 to 10.0.0 in PySpark.
 * In Spark 4.0, ``Int64Index`` and ``Float64Index`` have been removed from 
pandas API on Spark, ``Index`` should be used directly.
 * In Spark 4.0, ``DataFrame.iteritems`` has been removed from pandas API on 
Spark, use ``DataFrame.items`` instead.
 * In Spark 4.0, ``Series.iteritems`` has been removed from pandas API on 
Spark, use ``Series.items`` instead.
diff --git a/python/docs/source/user_guide/sql/arrow_pandas.rst 
b/python/docs/source/user_guide/sql/arrow_pandas.rst
index ce7c8ebb36e3..039671608b6d 100644
--- a/python/docs/source/user_guide/sql/arrow_pandas.rst
+++ b/python/docs/source/user_guide/sql/arrow_pandas.rst
@@ -414,7 +414,7 @@ working with timestamps in ``pandas_udf``\s to get the best 
performance, see
 Recommended Pandas and PyArrow Versions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-For usage with pyspark.sql, the minimum supported versions of Pandas is 1.4.4 
and PyArrow is 4.0.0.
+For usage with pyspark.sql, the minimum supported versions of Pandas is 1.4.4 
and PyArrow is 10.0.0.
 Higher versions may be used, however, compatibility and data correctness can 
not be guaranteed and should
 be verified by the user.
 
diff --git a/python/packaging/classic/setup.py 
b/python/packaging/classic/setup.py
index ddd2448e1c18..8eefc17db700 100755
--- a/python/packaging/classic/setup.py
+++ b/python/packaging/classic/setup.py
@@ -152,7 +152,7 @@ if in_spark:
 # python/packaging/connect/setup.py
 _minimum_pandas_version = "1.4.4"
 _minimum_numpy_version = "1.21"
-_minimum_pyarrow_version = "4.0.0"
+_minimum_pyarrow_version = "10.0.0"
 _minimum_grpc_version = "1.62.0"
 _minimum_googleapis_common_protos_version = "1.56.4"
 
diff --git a/python/packaging/connect/setup.py 
b/python/packaging/connect/setup.py
index 782c55fff241..3514e5cdc422 100755
--- a/python/packaging/connect/setup.py
+++ b/python/packaging/connect/setup.py
@@ -91,7 +91,7 @@ try:
     # python/packaging/classic/setup.py
     _minimum_pandas_version = "1.4.4"
     _minimum_numpy_version = "1.21"
-    _minimum_pyarrow_version = "4.0.0"
+    _minimum_pyarrow_version = "10.0.0"
     _minimum_grpc_version = "1.59.3"
     _minimum_googleapis_common_protos_version = "1.56.4"
 
diff --git a/python/pyspark/sql/pandas/utils.py 
b/python/pyspark/sql/pandas/utils.py
index ff8183c61746..654b73e3b93c 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -61,7 +61,7 @@ def require_minimum_pandas_version() -> None:
 def require_minimum_pyarrow_version() -> None:
     """Raise ImportError if minimum version of pyarrow is not installed"""
     # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
-    minimum_pyarrow_version = "4.0.0"
+    minimum_pyarrow_version = "10.0.0"
 
     import os
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to