This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 71afa4688447 [SPARK-53163][PYTHON][INFRA] Upgrade PyArrow to 21.0.0
71afa4688447 is described below

commit 71afa4688447d6efaa4fc0b25feadbfa6f6efd40
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Thu Aug 7 07:29:15 2025 -0700

    [SPARK-53163][PYTHON][INFRA] Upgrade PyArrow to 21.0.0
    
    ### What changes were proposed in this pull request?
    Upgrade PyArrow to 21.0.0
    
    ### Why are the changes needed?
    to test against the latest pyarrow
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #51890 from zhengruifeng/pyarrow_21.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .github/workflows/python_hosted_runner_test.yml         | 2 +-
 dev/spark-test-image/lint/Dockerfile                    | 2 +-
 dev/spark-test-image/python-310/Dockerfile              | 2 +-
 dev/spark-test-image/python-311-classic-only/Dockerfile | 2 +-
 dev/spark-test-image/python-311/Dockerfile              | 2 +-
 dev/spark-test-image/python-312/Dockerfile              | 2 +-
 dev/spark-test-image/python-313-nogil/Dockerfile        | 2 +-
 dev/spark-test-image/python-313/Dockerfile              | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/python_hosted_runner_test.yml 
b/.github/workflows/python_hosted_runner_test.yml
index ec7c23f63dfb..471c64effd7d 100644
--- a/.github/workflows/python_hosted_runner_test.yml
+++ b/.github/workflows/python_hosted_runner_test.yml
@@ -149,7 +149,7 @@ jobs:
         run: |
           python${{matrix.python}} -m pip install --ignore-installed 
'blinker>=1.6.2'
           python${{matrix.python}} -m pip install --ignore-installed 
'six==1.16.0'
-          python${{matrix.python}} -m pip install numpy 'pyarrow>=19.0.0' 
'six==1.16.0' 'pandas==2.3.1' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage 
matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 
unittest-xml-reporting && \
+          python${{matrix.python}} -m pip install numpy 'pyarrow>=21.0.0' 
'six==1.16.0' 'pandas==2.3.1' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage 
matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 
unittest-xml-reporting && \
           python${{matrix.python}} -m pip install 'grpcio==1.67.0' 
'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 
'graphviz==0.20.3' && \
           python${{matrix.python}} -m pip cache purge
       - name: List Python packages
diff --git a/dev/spark-test-image/lint/Dockerfile 
b/dev/spark-test-image/lint/Dockerfile
index d9172b199f28..3b603d4ab4a6 100644
--- a/dev/spark-test-image/lint/Dockerfile
+++ b/dev/spark-test-image/lint/Dockerfile
@@ -93,7 +93,7 @@ RUN python3.11 -m pip install \
     'pandas' \
     'pandas-stubs==1.2.0.53' \
     'plotly>=4.8' \
-    'pyarrow>=20.0.0' \
+    'pyarrow>=21.0.0' \
     'pytest-mypy-plugins==1.9.3' \
     'pytest==7.1.3' \
     && python3.11 -m pip install torch torchvision --index-url 
https://download.pytorch.org/whl/cpu \
diff --git a/dev/spark-test-image/python-310/Dockerfile 
b/dev/spark-test-image/python-310/Dockerfile
index 83994e94c4d1..671dfdc7ba00 100644
--- a/dev/spark-test-image/python-310/Dockerfile
+++ b/dev/spark-test-image/python-310/Dockerfile
@@ -63,7 +63,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.1 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.1 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 
googleapis-common-protos==1.65.0 graphviz==0.20.3"
 
diff --git a/dev/spark-test-image/python-311-classic-only/Dockerfile 
b/dev/spark-test-image/python-311-classic-only/Dockerfile
index 12705bb7121e..7acd44788ddf 100644
--- a/dev/spark-test-image/python-311-classic-only/Dockerfile
+++ b/dev/spark-test-image/python-311-classic-only/Dockerfile
@@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 pandas==2.3.1 plotly<6.0.0 
matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 pandas==2.3.1 plotly<6.0.0 
matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy 
scikit-learn>=1.3.2"
 ARG TEST_PIP_PKGS="coverage unittest-xml-reporting"
 
 # Install Python 3.11 packages
diff --git a/dev/spark-test-image/python-311/Dockerfile 
b/dev/spark-test-image/python-311/Dockerfile
index 32ff3b40b8d2..5b677385d8e1 100644
--- a/dev/spark-test-image/python-311/Dockerfile
+++ b/dev/spark-test-image/python-311/Dockerfile
@@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.1 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.1 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 
googleapis-common-protos==1.65.0 graphviz==0.20.3"
 
diff --git a/dev/spark-test-image/python-312/Dockerfile 
b/dev/spark-test-image/python-312/Dockerfile
index 51be9774c0ea..14392871a2f8 100644
--- a/dev/spark-test-image/python-312/Dockerfile
+++ b/dev/spark-test-image/python-312/Dockerfile
@@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.1 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.1 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 
googleapis-common-protos==1.65.0 graphviz==0.20.3"
 
diff --git a/dev/spark-test-image/python-313-nogil/Dockerfile 
b/dev/spark-test-image/python-313-nogil/Dockerfile
index 5fc6adf90419..3226a514755d 100644
--- a/dev/spark-test-image/python-313-nogil/Dockerfile
+++ b/dev/spark-test-image/python-313-nogil/Dockerfile
@@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.1 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.1 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
 ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 
googleapis-common-protos==1.65.0 graphviz==0.20.3"
 
 
diff --git a/dev/spark-test-image/python-313/Dockerfile 
b/dev/spark-test-image/python-313/Dockerfile
index 1ee831becc8c..c5f6bbbadfaa 100644
--- a/dev/spark-test-image/python-313/Dockerfile
+++ b/dev/spark-test-image/python-313/Dockerfile
@@ -67,7 +67,7 @@ RUN apt-get update && apt-get install -y \
     && rm -rf /var/lib/apt/lists/*
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=20.0.0 six==1.16.0 pandas==2.3.1 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=21.0.0 six==1.16.0 pandas==2.3.1 scipy 
plotly<6.0.0 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
 # Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 
googleapis-common-protos==1.65.0 graphviz==0.20.3"
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to