This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new eb1673833ef8 [SPARK-55518][INFRA][PYTHON][DOCS] Upgrade Python to 3.12 
in doc build
eb1673833ef8 is described below

commit eb1673833ef8c1d2ba54e4615812f0e0c4c16401
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Feb 19 10:13:06 2026 +0900

    [SPARK-55518][INFRA][PYTHON][DOCS] Upgrade Python to 3.12 in doc build
    
    ### What changes were proposed in this pull request?
    Upgrade Python to 3.12 in doc build
    
    ### Why are the changes needed?
    1, Upgrade Python to 3.12 in doc build;
    2, Unpin `pyzmq<24.0.0` introduced in 
https://github.com/apache/spark/pull/37904 for python linter, otherwise the 
python installation fails
    
    ### Does this PR introduce _any_ user-facing change?
    No, infra-only
    
    ### How was this patch tested?
    CI, the PR builder should cover this change
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #54310 from zhengruifeng/doc_py312.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 .github/workflows/build_and_test.yml | 35 +++++++++++++++++++++++++++++++----
 dev/spark-test-image/docs/Dockerfile | 25 ++++++++++---------------
 2 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
index 226947a2e8b3..ecbb304c382b 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1104,11 +1104,14 @@ jobs:
     - name: List Python packages for branch-3.5 and branch-4.0
       if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0'
       run: python3.9 -m pip list
+    - name: List Python packages for branch-4.1
+      if: inputs.branch == 'branch-4.1'
+      run: python3.11 -m pip list
     - name: List Python packages
-      if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0'
+      if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' && 
inputs.branch != 'branch-4.1'
       run: |
         lsb_release -a
-        python3.11 -m pip list
+        python3.12 -m pip list
     - name: Install dependencies for documentation generation
       run: |
         # Keep the version of Bundler here in sync with the following 
locations:
@@ -1139,8 +1142,8 @@ jobs:
         echo "SKIP_SQLDOC: $SKIP_SQLDOC"
         cd docs
         bundle exec jekyll build
-    - name: Run documentation build
-      if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0'
+    - name: Run documentation build for branch-4.1
+      if: inputs.branch == 'branch-4.1'
       run: |
         # We need this link to make sure `python3` points to `python3.11` 
which contains the prerequisite packages.
         ln -s "$(which python3.11)" "/usr/local/bin/python3"
@@ -1163,6 +1166,30 @@ jobs:
         echo "SKIP_SQLDOC: $SKIP_SQLDOC"
         cd docs
         bundle exec jekyll build
+    - name: Run documentation build
+      if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' && 
inputs.branch != 'branch-4.1'
+      run: |
+        # We need this link to make sure `python3` points to `python3.12` 
which contains the prerequisite packages.
+        ln -s "$(which python3.12)" "/usr/local/bin/python3"
+        # Build docs first with SKIP_API to ensure they are buildable without 
requiring any
+        # language docs to be built beforehand.
+        cd docs; SKIP_ERRORDOC=1 SKIP_API=1 bundle exec jekyll build; cd ..
+        if [ -f "./dev/is-changed.py" ]; then
+          # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs
+          pyspark_modules=`cd dev && python3.12 -c "import 
sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if 
m.name.startswith('pyspark')))"`
+          if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then 
export SKIP_PYTHONDOC=1; fi
+          if [ `./dev/is-changed.py -m sparkr` = false ]; then export 
SKIP_RDOC=1; fi
+        fi
+        export PYSPARK_DRIVER_PYTHON=python3.12
+        export PYSPARK_PYTHON=python3.12
+        # Print the values of environment variables `SKIP_ERRORDOC`, 
`SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and `SKIP_SQLDOC`
+        echo "SKIP_ERRORDOC: $SKIP_ERRORDOC"
+        echo "SKIP_SCALADOC: $SKIP_SCALADOC"
+        echo "SKIP_PYTHONDOC: $SKIP_PYTHONDOC"
+        echo "SKIP_RDOC: $SKIP_RDOC"
+        echo "SKIP_SQLDOC: $SKIP_SQLDOC"
+        cd docs
+        bundle exec jekyll build
     - name: Tar documentation
       if: github.repository != 'apache/spark'
       run: tar cjf site.tar.bz2 docs/_site
diff --git a/dev/spark-test-image/docs/Dockerfile 
b/dev/spark-test-image/docs/Dockerfile
index 03f5c5988180..347192b3c334 100644
--- a/dev/spark-test-image/docs/Dockerfile
+++ b/dev/spark-test-image/docs/Dockerfile
@@ -24,7 +24,7 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra 
Image for Documentat
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE=20260208
+ENV FULL_REFRESH_DATE=20260213
 
 ENV DEBIAN_FRONTEND=noninteractive
 ENV DEBCONF_NONINTERACTIVE_SEEN=true
@@ -56,6 +56,8 @@ RUN apt-get update && apt-get install -y \
     openjdk-17-jdk-headless \
     pandoc \
     pkg-config \
+    python3.12 \
+    python3.12-venv \
     qpdf \
     tzdata \
     r-base \
@@ -63,7 +65,10 @@ RUN apt-get update && apt-get install -y \
     ruby-dev \
     software-properties-common \
     wget \
-    zlib1g-dev
+    zlib1g-dev \
+    && apt-get autoremove --purge -y \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
 
 # See more in SPARK-39959, roxygen2 < 7.2.1
 RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown', 
'rmarkdown', 'testthat'), repos='https://cloud.r-project.org/')" && \
@@ -74,27 +79,17 @@ RUN Rscript -e "install.packages(c('devtools', 'knitr', 
'markdown', 'rmarkdown',
 # See more in SPARK-39735
 ENV 
R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
 
-# Install Python 3.11
-RUN add-apt-repository ppa:deadsnakes/ppa
-RUN apt-get update && apt-get install -y \
-    python3.11 \
-    && apt-get autoremove --purge -y \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
 # Setup virtual environment
 ENV VIRTUAL_ENV=/opt/spark-venv
-RUN python3.11 -m venv --without-pip $VIRTUAL_ENV
+RUN python3.12 -m venv $VIRTUAL_ENV
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
-RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
-
 # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
 # See 'ipython_genutils' in SPARK-38517
 # See 'docutils<0.18.0' in SPARK-39421
-RUN python3.11 -m pip install 'sphinx==4.5.0' mkdocs 
'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 
markupsafe 'pyzmq<24.0.0' \
+RUN python3.12 -m pip install 'sphinx==4.5.0' mkdocs 
'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 
markupsafe \
   ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 
'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \
   'flake8==3.9.0' 'mypy==1.19.1' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 
'black==23.12.1' \
   'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 
'protobuf==6.33.5' 'grpc-stubs==1.24.11' 
'googleapis-common-protos-stubs==2.2.0' \
   'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 
'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 
'sphinxcontrib-serializinghtml==1.1.5' \
-  && python3.11 -m pip cache purge
+  && python3.12 -m pip cache purge


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to