This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 8b26f49a226f [SPARK-55141][PYTHON][INFRA] Set up a scheduled workflow 
for Pandas 3
8b26f49a226f is described below

commit 8b26f49a226fcd7268454f6f1bf0b9a6a5132bcd
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Fri Jan 23 11:48:55 2026 +0800

    [SPARK-55141][PYTHON][INFRA] Set up a scheduled workflow for Pandas 3
    
    ### What changes were proposed in this pull request?
    Set up a scheduled builder for Pandas 3
    
    ### Why are the changes needed?
    for development purpose, to monitor how pyspark is compatible with pandas 3
    
    ### Does this PR introduce _any_ user-facing change?
    no, infra-only
    
    ### How was this patch tested?
    test the image build with PR builder
    
    this image is successfully built in 
https://github.com/zhengruifeng/spark/actions/runs/21272805063/job/61226373282
    
    ```
    Successfully installed contourpy-1.3.3 coverage-7.13.1 cycler-0.12.1 
et-xmlfile-2.0.0 fonttools-4.61.1 googleapis-common-protos-1.71.0 
graphviz-0.20.3 grpcio-1.76.0 grpcio-status-1.76.0 joblib-1.5.3 
kiwisolver-1.4.9 lxml-6.0.2 matplotlib-3.10.8 memory-profiler-0.61.0 
numpy-2.4.1 openpyxl-3.1.5 packaging-26.0 pandas-3.0.0 pillow-12.1.0 
plotly-5.24.1 protobuf-6.33.0 psutil-7.2.1 pyarrow-23.0.0 pyparsing-3.3.2 
python-dateutil-2.9.0.post0 scikit-learn-1.8.0 scipy-1.17.0 tenacity-9.1.2 thr 
[...]
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #53926 from zhengruifeng/infra_pandas_3.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 .github/workflows/build_infra_images_cache.yml     | 14 ++++
 .github/workflows/build_python_3.12_pandas_3.yml   | 47 ++++++++++++
 README.md                                          |  1 +
 .../python-312-pandas-3/Dockerfile                 | 84 ++++++++++++++++++++++
 4 files changed, 146 insertions(+)

diff --git a/.github/workflows/build_infra_images_cache.yml 
b/.github/workflows/build_infra_images_cache.yml
index c5cb3e2e1bf6..e04b670fe57a 100644
--- a/.github/workflows/build_infra_images_cache.yml
+++ b/.github/workflows/build_infra_images_cache.yml
@@ -38,6 +38,7 @@ on:
     - 'dev/spark-test-image/python-311/Dockerfile'
     - 'dev/spark-test-image/python-311-classic-only/Dockerfile'
     - 'dev/spark-test-image/python-312/Dockerfile'
+    - 'dev/spark-test-image/python-312-pandas-3/Dockerfile'
     - 'dev/spark-test-image/python-313/Dockerfile'
     - 'dev/spark-test-image/python-313-nogil/Dockerfile'
     - 'dev/spark-test-image/python-314/Dockerfile'
@@ -219,6 +220,19 @@ jobs:
       - name: Image digest (PySpark with Python 3.12)
         if: hashFiles('dev/spark-test-image/python-312/Dockerfile') != ''
         run: echo ${{ steps.docker_build_pyspark_python_312.outputs.digest }}
+      - name: Build and push (PySpark with Python 3.12 Pandas 3)
+        if: hashFiles('dev/spark-test-image/python-312-pandas-3/Dockerfile') 
!= ''
+        id: docker_build_pyspark_python_312_pandas_3
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/python-312-pandas-3/
+          push: true
+          tags: 
ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-312-pandas-3-cache:${{
 github.ref_name }}-static
+          cache-from: 
type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-312-pandas-3-cache:${{
 github.ref_name }}
+          cache-to: 
type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-312-pandas-3-cache:${{
 github.ref_name }},mode=max
+      - name: Image digest (PySpark with Python 3.12 Pandas 3)
+        if: hashFiles('dev/spark-test-image/python-312-pandas-3/Dockerfile') 
!= ''
+        run: echo ${{ 
steps.docker_build_pyspark_python_312_pandas_3.outputs.digest }}
       - name: Build and push (PySpark with Python 3.13)
         if: hashFiles('dev/spark-test-image/python-313/Dockerfile') != ''
         id: docker_build_pyspark_python_313
diff --git a/.github/workflows/build_python_3.12_pandas_3.yml 
b/.github/workflows/build_python_3.12_pandas_3.yml
new file mode 100644
index 000000000000..ee214831be70
--- /dev/null
+++ b/.github/workflows/build_python_3.12_pandas_3.yml
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build / Python-only (master, Python 3.12, Pandas 3)"
+
+on:
+  schedule:
+    - cron: '0 21 * * *'
+  workflow_dispatch:
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 17
+      branch: master
+      hadoop: hadoop3
+      envs: >-
+        {
+          "PYSPARK_IMAGE_TO_TEST": "python-312-pandas-3",
+          "PYTHON_TO_TEST": "python3.12"
+        }
+      jobs: >-
+        {
+          "pyspark": "true",
+          "pyspark-pandas": "true"
+        }
diff --git a/README.md b/README.md
index ad6db82c51e8..6f1f214c09aa 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,7 @@ This README file only contains basic setup instructions.
 |            | [![GitHub Actions 
Build](https://github.com/apache/spark/actions/workflows/build_python_3.11_arm.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.11_arm.yml)
                   |
 |            | [![GitHub Actions 
Build](https://github.com/apache/spark/actions/workflows/build_python_3.11_macos26.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.11_macos26.yml)
           |
 |            | [![GitHub Actions 
Build](https://github.com/apache/spark/actions/workflows/build_python_3.12.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.12.yml)
                           |
+|            | [![GitHub Actions 
Build](https://github.com/apache/spark/actions/workflows/build_python_3.12_pandas_3.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.12_pandas_3.yml)
         |
 |            | [![GitHub Actions 
Build](https://github.com/apache/spark/actions/workflows/build_python_3.13.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.13.yml)
                           |
 |            | [![GitHub Actions 
Build](https://github.com/apache/spark/actions/workflows/build_python_3.13_nogil.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.13_nogil.yml)
               |
 |            | [![GitHub Actions 
Build](https://github.com/apache/spark/actions/workflows/build_python_3.14.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.14.yml)
                           |
diff --git a/dev/spark-test-image/python-312-pandas-3/Dockerfile 
b/dev/spark-test-image/python-312-pandas-3/Dockerfile
new file mode 100644
index 000000000000..483576ada82b
--- /dev/null
+++ b/dev/spark-test-image/python-312-pandas-3/Dockerfile
@@ -0,0 +1,84 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Note this is a temporary image file for development with Pandas 3,
+# and will be remvoed after PySpark is fully compatible with Pandas 3.
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project 
<[email protected]>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark 
with Python 3.12 and Pandas 3"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE=20260110
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libwebp-dev \
+    libxml2-dev \
+    openjdk-17-jdk-headless \
+    pkg-config \
+    qpdf \
+    tzdata \
+    software-properties-common \
+    wget \
+    zlib1g-dev
+
+# Install Python 3.12
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y \
+    python3.12 \
+    && apt-get autoremove --purge -y \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Note that mlflow is execluded since it requires pandas<3
+ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas>=3 scipy 
plotly<6.0.0 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
+# Python deps for Spark Connect
+ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.0 
googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
+
+# Install Python 3.12 packages
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
+# RUN python3.12 -m pip install --ignore-installed 'blinker>=1.6.2' # mlflow 
needs this
+RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting 
$CONNECT_PIP_PKGS lxml && \
+    python3.12 -m pip install torch torchvision --index-url 
https://download.pytorch.org/whl/cpu && \
+    python3.12 -m pip install torcheval && \
+    python3.12 -m pip cache purge


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to