This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 751f34acd222 [SPARK-54046][INFRA] Upgrade PyArrow to 22.0.0
751f34acd222 is described below

commit 751f34acd222e0a5c7de13d4b8cc75d78058f8b2
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Fri Nov 7 19:38:52 2025 -0800

    [SPARK-54046][INFRA] Upgrade PyArrow to 22.0.0
    
    ### What changes were proposed in this pull request?
    
    This PR aims to upgrade `PyArrow` to 22.0.0.
    
    ### Why are the changes needed?
    
    To test against the latest `PyArrow` version. `PyArrow 22.0.0` is the first 
version to support `Python 3.14`.
    - https://pypi.org/project/pyarrow/22.0.0/ (2025-10-24)
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #52748 from dongjoon-hyun/SPARK-54046.
    
    Authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit a32559a3284e299fd86a05dbbeda4bfe73a78b00)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .github/workflows/python_hosted_runner_test.yml         |  2 +-
 dev/spark-test-image/lint/Dockerfile                    |  2 +-
 dev/spark-test-image/numpy-213/Dockerfile               |  2 +-
 dev/spark-test-image/python-310/Dockerfile              |  2 +-
 dev/spark-test-image/python-311-classic-only/Dockerfile |  2 +-
 dev/spark-test-image/python-311/Dockerfile              |  2 +-
 dev/spark-test-image/python-312/Dockerfile              |  2 +-
 dev/spark-test-image/python-313-nogil/Dockerfile        |  2 +-
 dev/spark-test-image/python-313/Dockerfile              |  2 +-
 python/pyspark/pandas/tests/io/test_feather.py          | 12 +++++++++++-
 10 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/python_hosted_runner_test.yml 
b/.github/workflows/python_hosted_runner_test.yml
index d55eb1d93799..b7ee3a8b7113 100644
--- a/.github/workflows/python_hosted_runner_test.yml
+++ b/.github/workflows/python_hosted_runner_test.yml
@@ -147,7 +147,7 @@ jobs:
         run: |
           python${{matrix.python}} -m pip install --ignore-installed 
'blinker>=1.6.2'
           python${{matrix.python}} -m pip install --ignore-installed 
'six==1.16.0'
-          python${{matrix.python}} -m pip install numpy 'pyarrow>=21.0.0' 
'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage 
matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 
unittest-xml-reporting && \
+          python${{matrix.python}} -m pip install numpy 'pyarrow>=22.0.0' 
'six==1.16.0' 'pandas==2.3.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage 
matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 
unittest-xml-reporting && \
           python${{matrix.python}} -m pip install 'grpcio==1.76.0' 
'grpcio-status==1.76.0' 'protobuf==6.33.0' 'googleapis-common-protos==1.71.0' 
'graphviz==0.20.3' && \
           python${{matrix.python}} -m pip cache purge
       - name: List Python packages
diff --git a/dev/spark-test-image/lint/Dockerfile 
b/dev/spark-test-image/lint/Dockerfile
index 6686e3808e03..4dfceae63a17 100644
--- a/dev/spark-test-image/lint/Dockerfile
+++ b/dev/spark-test-image/lint/Dockerfile
@@ -94,7 +94,7 @@ RUN python3.11 -m pip install \
     'pandas' \
     'pandas-stubs==1.2.0.53' \
     'plotly>=4.8' \
-    'pyarrow>=21.0.0' \
+    'pyarrow>=22.0.0' \
     'pytest-mypy-plugins==1.9.3' \
     'pytest==7.1.3' \
     && python3.11 -m pip install torch torchvision --index-url 
https://download.pytorch.org/whl/cpu \
diff --git a/dev/spark-test-image/numpy-213/Dockerfile 
b/dev/spark-test-image/numpy-213/Dockerfile
index 06b21bb555bb..bc9a507853c2 100644
--- a/dev/spark-test-image/numpy-213/Dockerfile
+++ b/dev/spark-test-image/numpy-213/Dockerfile
@@ -69,7 +69,7 @@ RUN apt-get update && apt-get install -y \
 
 
 # Pin numpy==2.1.3
-ARG BASIC_PIP_PKGS="numpy==2.1.3 pyarrow>=21.0.0 six==1.16.0 pandas==2.2.3 
scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl 
memory-profiler>=0.61.0 scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy==2.1.3 pyarrow>=22.0.0 six==1.16.0 pandas==2.2.3 
scipy plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl 
memory-profiler>=0.61.0 scikit-learn>=1.3.2"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 
googleapis-common-protos==1.71.0 graphviz==0.20.3"
 
diff --git a/dev/spark-test-image/python-310/Dockerfile 
b/dev/spark-test-image/python-310/Dockerfile
index 99d231f47bd7..c318a615b7e0 100644
--- a/dev/spark-test-image/python-310/Dockerfile
+++ b/dev/spark-test-image/python-310/Dockerfile
@@ -64,7 +64,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 
googleapis-common-protos==1.71.0 graphviz==0.20.3"
 
diff --git a/dev/spark-test-image/python-311-classic-only/Dockerfile 
b/dev/spark-test-image/python-311-classic-only/Dockerfile
index 484c94098844..1c5f9a233578 100644
--- a/dev/spark-test-image/python-311-classic-only/Dockerfile
+++ b/dev/spark-test-image/python-311-classic-only/Dockerfile
@@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 pandas==2.3.3 plotly<6.0.0 
matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 pandas==2.3.3 plotly<6.0.0 
matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy 
scikit-learn>=1.3.2"
 ARG TEST_PIP_PKGS="coverage unittest-xml-reporting"
 
 # Install Python 3.11 packages
diff --git a/dev/spark-test-image/python-311/Dockerfile 
b/dev/spark-test-image/python-311/Dockerfile
index a45ab4554848..69d47e62774a 100644
--- a/dev/spark-test-image/python-311/Dockerfile
+++ b/dev/spark-test-image/python-311/Dockerfile
@@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 
googleapis-common-protos==1.71.0 graphviz==0.20.3"
 
diff --git a/dev/spark-test-image/python-312/Dockerfile 
b/dev/spark-test-image/python-312/Dockerfile
index e05fe327a030..0c8b816f8629 100644
--- a/dev/spark-test-image/python-312/Dockerfile
+++ b/dev/spark-test-image/python-312/Dockerfile
@@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 
googleapis-common-protos==1.71.0 graphviz==0.20.3"
 
diff --git a/dev/spark-test-image/python-313-nogil/Dockerfile 
b/dev/spark-test-image/python-313-nogil/Dockerfile
index d4469a9cd494..1262089f43e1 100644
--- a/dev/spark-test-image/python-313-nogil/Dockerfile
+++ b/dev/spark-test-image/python-313-nogil/Dockerfile
@@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 
googleapis-common-protos==1.71.0 graphviz==0.20.3"
 
 
diff --git a/dev/spark-test-image/python-313/Dockerfile 
b/dev/spark-test-image/python-313/Dockerfile
index 1d3d73c9feb9..2e4dde33077d 100644
--- a/dev/spark-test-image/python-313/Dockerfile
+++ b/dev/spark-test-image/python-313/Dockerfile
@@ -68,7 +68,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas==2.3.3 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 
googleapis-common-protos==1.71.0 graphviz==0.20.3"
 
diff --git a/python/pyspark/pandas/tests/io/test_feather.py 
b/python/pyspark/pandas/tests/io/test_feather.py
index 3ddf0a2aad92..10638d915c0e 100644
--- a/python/pyspark/pandas/tests/io/test_feather.py
+++ b/python/pyspark/pandas/tests/io/test_feather.py
@@ -20,6 +20,7 @@ import pandas as pd
 import sys
 
 from pyspark import pandas as ps
+from pyspark.loose_version import LooseVersion
 from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
 
 
@@ -35,7 +36,16 @@ class FeatherMixin:
     def psdf(self):
         return ps.from_pandas(self.pdf)
 
-    @unittest.skipIf(sys.version_info > (3, 13), "SPARK-54068")
+    has_arrow_21_or_below = False
+    try:
+        import pyarrow as pa
+
+        if LooseVersion(pa.__version__) < LooseVersion("22.0.0"):
+            has_arrow_21_or_below = True
+    except ImportError:
+        pass
+
+    @unittest.skipIf(not has_arrow_21_or_below, "SPARK-54068")
     def test_to_feather(self):
         with self.temp_dir() as dirpath:
             path1 = f"{dirpath}/file1.feather"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to