This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 1ad2db271783 [SPARK-55414][PYTHON][INFRA] Upgrade Python 3.12 test 
images for classic-only and pandas 3 to Ubuntu 24.04
1ad2db271783 is described below

commit 1ad2db271783239b81bc002f716fc02d6369f7b3
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Mon Feb 9 07:21:22 2026 +0900

    [SPARK-55414][PYTHON][INFRA] Upgrade Python 3.12 test images for 
classic-only and pandas 3 to Ubuntu 24.04
    
    ### What changes were proposed in this pull request?
    Upgrade Python 3.12 test images for classic-only and pandas 3 to Ubuntu 
24.04
    
    ### Why are the changes needed?
    to test with newer os
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    PR builder with
    ```
    default: '{"PYSPARK_IMAGE_TO_TEST": "python-312-classic-only", 
"PYTHON_TO_TEST": "python3.12"}'
    ```
    
    
https://github.com/zhengruifeng/spark/actions/runs/21777398247/job/62836232446 
passed
    
    ```
    default: '{"PYSPARK_IMAGE_TO_TEST": "python-312-pandas-3", 
"PYTHON_TO_TEST": "python3.12"}'
    ```
    
    
https://github.com/zhengruifeng/spark/actions/runs/21778886479/job/62840558934 
failed as expected, since pandas 3 support is still WIP
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #54201 from zhengruifeng/u24_py_312_313.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 .../python-312-classic-only/Dockerfile             | 23 ++++++++++---------
 .../python-312-pandas-3/Dockerfile                 | 26 +++++++++++-----------
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/dev/spark-test-image/python-312-classic-only/Dockerfile 
b/dev/spark-test-image/python-312-classic-only/Dockerfile
index 685f4e80315c..d7fc4cfb2b5a 100644
--- a/dev/spark-test-image/python-312-classic-only/Dockerfile
+++ b/dev/spark-test-image/python-312-classic-only/Dockerfile
@@ -15,16 +15,16 @@
 # limitations under the License.
 #
 
-# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# Image for building and testing Spark branches. Based on Ubuntu 24.04.
 # See also in https://hub.docker.com/_/ubuntu
-FROM ubuntu:jammy-20240911.1
+FROM ubuntu:noble
 LABEL org.opencontainers.image.authors="Apache Spark project 
<[email protected]>"
 LABEL org.opencontainers.image.licenses="Apache-2.0"
 LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark 
Classic with Python 3.12"
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE=20260203
+ENV FULL_REFRESH_DATE=20260207
 
 ENV DEBIAN_FRONTEND=noninteractive
 ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -41,26 +41,27 @@ RUN apt-get update && apt-get install -y \
     libopenblas-dev \
     libssl-dev \
     openjdk-17-jdk-headless \
+    python3.12 \
     pkg-config \
     tzdata \
     software-properties-common \
-    zlib1g-dev
-
-# Install Python 3.12
-RUN add-apt-repository ppa:deadsnakes/ppa
-RUN apt-get update && apt-get install -y \
-    python3.12 \
+    zlib1g-dev \
     && apt-get autoremove --purge -y \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
+# Setup virtual environment
+ENV VIRTUAL_ENV=/opt/spark-venv
+RUN python3.12 -m venv --without-pip $VIRTUAL_ENV
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# Install Python 3.12 packages
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
 
 ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 pandas==2.3.3 plotly<6.0.0 
matplotlib openpyxl memory-profiler>=0.61.0 mlflow>=2.8.1 scipy 
scikit-learn>=1.3.2 pystack>=1.6.0 psutil"
 ARG TEST_PIP_PKGS="coverage unittest-xml-reporting"
 
-# Install Python 3.12 packages
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
-RUN python3.12 -m pip install --ignore-installed 'blinker>=1.6.2' # mlflow 
needs this
 RUN python3.12 -m pip install $BASIC_PIP_PKGS $TEST_PIP_PKGS && \
     python3.12 -m pip install torch torchvision --index-url 
https://download.pytorch.org/whl/cpu && \
     python3.12 -m pip install deepspeed torcheval && \
diff --git a/dev/spark-test-image/python-312-pandas-3/Dockerfile 
b/dev/spark-test-image/python-312-pandas-3/Dockerfile
index 6b2d61be529e..a310709b1a07 100644
--- a/dev/spark-test-image/python-312-pandas-3/Dockerfile
+++ b/dev/spark-test-image/python-312-pandas-3/Dockerfile
@@ -18,16 +18,16 @@
 # Note this is a temporary image file for development with Pandas 3,
 # and will be remvoed after PySpark is fully compatible with Pandas 3.
 
-# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# Image for building and testing Spark branches. Based on Ubuntu 24.04.
 # See also in https://hub.docker.com/_/ubuntu
-FROM ubuntu:jammy-20240911.1
+FROM ubuntu:noble
 LABEL org.opencontainers.image.authors="Apache Spark project 
<[email protected]>"
 LABEL org.opencontainers.image.licenses="Apache-2.0"
 LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark 
with Python 3.12 and Pandas 3"
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE=20260127
+ENV FULL_REFRESH_DATE=20260207
 
 ENV DEBIAN_FRONTEND=noninteractive
 ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -44,27 +44,27 @@ RUN apt-get update && apt-get install -y \
     libopenblas-dev \
     libssl-dev \
     openjdk-17-jdk-headless \
+    python3.12 \
     pkg-config \
     tzdata \
     software-properties-common \
-    zlib1g-dev
-
-# Install Python 3.12
-RUN add-apt-repository ppa:deadsnakes/ppa
-RUN apt-get update && apt-get install -y \
-    python3.12 \
+    zlib1g-dev \
     && apt-get autoremove --purge -y \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
+# Setup virtual environment
+ENV VIRTUAL_ENV=/opt/spark-venv
+RUN python3.12 -m venv --without-pip $VIRTUAL_ENV
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# Install Python 3.12 packages
 # Note that mlflow is execluded since it requires pandas<3
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
+
 ARG BASIC_PIP_PKGS="numpy pyarrow>=22.0.0 six==1.16.0 pandas>=3 scipy 
plotly<6.0.0 coverage matplotlib openpyxl memory-profiler>=0.61.0 
scikit-learn>=1.3.2"
-# Python deps for Spark Connect
 ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 protobuf==6.33.5 
googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20.3"
 
-# Install Python 3.12 packages
-RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
-# RUN python3.12 -m pip install --ignore-installed 'blinker>=1.6.2' # mlflow 
needs this
 RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting 
$CONNECT_PIP_PKGS lxml && \
     python3.12 -m pip install torch torchvision --index-url 
https://download.pytorch.org/whl/cpu && \
     python3.12 -m pip install torcheval && \


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to