This is an automated email from the ASF dual-hosted git repository.

yzheng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris.git


The following commit(s) were added to refs/heads/main by this push:
     new c7c86a4fb Refactor: improve and clean up Dockerfiles (#2957)
c7c86a4fb is described below

commit c7c86a4fb8a213a25f26380e52b10f5830832fe3
Author: Yong Zheng <[email protected]>
AuthorDate: Tue Nov 11 08:20:56 2025 -0600

    Refactor: improve and clean up Dockerfiles (#2957)
    
    * Refactor: improve and clean up Dockerfiles
    
    * Refactor: improve and clean up Dockerfiles
    
    * Refactor: improve and clean up Dockerfiles
    
    * Refactor: improve and clean up Dockerfiles
    
    * Refactor: improve and clean up Dockerfiles
    
    * Refactor: improve and clean up Dockerfiles
---
 getting-started/spark/notebooks/Dockerfile         |  4 +-
 .../v3.5/getting-started/notebooks/Dockerfile      |  7 ++--
 .../getting-started/notebooks/SparkPolaris.ipynb   |  3 +-
 plugins/spark/v3.5/regtests/Dockerfile             | 34 ++++++++--------
 regtests/Dockerfile                                | 47 ++++++++++------------
 runtime/admin/src/main/docker/Dockerfile.jvm       | 24 ++++++-----
 runtime/server/src/main/docker/Dockerfile.jvm      | 32 +++++++--------
 site/docker/Dockerfile                             | 21 +++-------
 8 files changed, 79 insertions(+), 93 deletions(-)

diff --git a/getting-started/spark/notebooks/Dockerfile 
b/getting-started/spark/notebooks/Dockerfile
index 32ee4067b..bb57ee65f 100644
--- a/getting-started/spark/notebooks/Dockerfile
+++ b/getting-started/spark/notebooks/Dockerfile
@@ -19,8 +19,8 @@
 
 FROM docker.io/apache/spark:3.5.6-java17
 
-ENV 
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
-ENV PYSPARK_PYTHON=/home/spark/venv/bin/python
+ENV PYSPARK_PYTHON=/home/spark/venv/bin/python \
+    
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
 
 USER root
 
diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile 
b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile
index f5e052b2a..392d79e0a 100644
--- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile
+++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile
@@ -19,8 +19,8 @@
 
 FROM docker.io/apache/spark:3.5.6-java17
 
-ENV 
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
-ENV PYSPARK_PYTHON=/home/spark/venv/bin/python
+ENV PYSPARK_PYTHON=/home/spark/venv/bin/python \
+    
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
 
 USER root
 
@@ -36,8 +36,7 @@ WORKDIR /home/spark
 COPY --chown=spark client /home/spark/client
 COPY --chown=spark regtests/requirements.txt /tmp
 COPY --chown=spark regtests/notebook_requirements.txt /tmp
-COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs 
/home/spark/polaris_libs
-
+COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs/*bundle.jar 
/opt/spark/jars/
 
 RUN python3 -m venv /home/spark/venv && \
     . /home/spark/venv/bin/activate && \
diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb 
b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb
index f4e4a00bb..226a42920 100644
--- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb
+++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb
@@ -265,7 +265,8 @@
     "from pyspark.sql import SparkSession\n",
     "\n",
     "spark = (SparkSession.builder\n",
-    "  .config(\"spark.jars\", 
\"../polaris_libs/polaris-spark-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar\")
  # TODO: add a way to automatically discover the Jar\n",
+    "  # This jar is now automatically discovered, thus no longer needed\n",
+    "  #.config(\"spark.jars\", 
\"../polaris_libs/polaris-spark-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar\")\n",
     "  .config(\"spark.jars.packages\", 
\"org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.2.1\")\n",
     "  .config(\"spark.sql.catalog.spark_catalog\", 
\"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n",
     "  .config('spark.sql.iceberg.vectorization.enabled', 'false')\n",
diff --git a/plugins/spark/v3.5/regtests/Dockerfile 
b/plugins/spark/v3.5/regtests/Dockerfile
index db84d3eb1..5c4c480f2 100755
--- a/plugins/spark/v3.5/regtests/Dockerfile
+++ b/plugins/spark/v3.5/regtests/Dockerfile
@@ -18,31 +18,29 @@
 #
 
 FROM docker.io/apache/spark:3.5.6-java17
-ARG POLARIS_HOST=polaris
-ENV POLARIS_HOST=$POLARIS_HOST
-ENV SPARK_HOME=/opt/spark
-ENV CURRENT_SCALA_VERSION='2.12'
-ENV LANGUAGE='en_US:en'
+
+ARG POLARIS_HOST=polaris \
+    CURRENT_SCALA_VERSION=2.12
+
+ENV POLARIS_HOST=${POLARIS_HOST} \
+    CURRENT_SCALA_VERSION=${CURRENT_SCALA_VERSION}
 
 USER root
-RUN apt update
-RUN apt-get install -y diffutils wget curl
-RUN mkdir -p /home/spark &&  \
-    chown -R spark /home/spark && \
-    mkdir -p /tmp/polaris-regtests && \
-    chown -R spark /tmp/polaris-regtests
-RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf
 
-USER spark
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends diffutils wget curl && \
+    rm -rf /var/lib/apt/lists/* && \
+    mkdir -p /home/spark /tmp/polaris-regtests /opt/spark/conf && \
+    chown -R spark:spark /home/spark /tmp/polaris-regtests && \
+    chmod -R 777 /opt/spark/conf
 
 WORKDIR /home/spark/polaris
 
-COPY --chown=spark ./v3.5 /home/spark/polaris/v3.5
+COPY --chown=spark:spark ./v3.5 /home/spark/polaris/v3.5
+
+# /home/spark/.../regtests might not be writable in all situations, see 
https://github.com/apache/polaris/pull/205
+RUN chmod -R 777 /home/spark/polaris/v3.5/regtests
 
-# /home/spark/regtests might not be writable in all situations, see 
https://github.com/apache/polaris/pull/205
-USER root
-RUN chmod -R go+rwx /home/spark/polaris
-RUN chmod -R 777 ./v3.5/regtests
 USER spark
 
 ENTRYPOINT ["./v3.5/regtests/run.sh"]
diff --git a/regtests/Dockerfile b/regtests/Dockerfile
index 88fa13dda..183701a5c 100644
--- a/regtests/Dockerfile
+++ b/regtests/Dockerfile
@@ -18,45 +18,40 @@
 #
 
 FROM docker.io/apache/spark:3.5.6-java17-python3
+
 ARG POLARIS_HOST=polaris
-ENV POLARIS_HOST=$POLARIS_HOST
-ENV SPARK_HOME=/opt/spark
-ENV LANGUAGE='en_US:en'
+
+ENV POLARIS_HOST=${POLARIS_HOST} \
+    
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip"
 
 USER root
-RUN apt update
-RUN apt-get install -y diffutils wget curl python3.10-venv jq
-RUN mkdir -p /home/spark &&  \
-    chown -R spark /home/spark && \
-    mkdir -p /tmp/polaris-regtests && \
-    chown -R spark /tmp/polaris-regtests
-RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends diffutils wget curl 
python3.10-venv jq && \
+    rm -rf /var/lib/apt/lists/* && \
+    mkdir -p /home/spark /tmp/polaris-regtests /opt/spark/conf && \
+    chown -R spark:spark /home/spark /tmp/polaris-regtests && \
+    chmod -R 777 /opt/spark/conf
+
+COPY --chown=spark:spark ./regtests/setup.sh ./regtests/pyspark-setup.sh 
./regtests/requirements.txt /home/spark/polaris/regtests/
+COPY --chown=spark:spark ./client/python /home/spark/polaris/client/python
+COPY --chown=spark:spark ./polaris /home/spark/polaris/polaris
+COPY --chown=spark:spark ./spec /home/spark/polaris/spec
+COPY --chown=spark:spark ./regtests /home/spark/polaris/regtests
+
+# /home/spark/regtests might not be writable in all situations, see 
https://github.com/apache/polaris/pull/205
+RUN chmod -R go+rwx /home/spark/polaris
 
 USER spark
-ENV 
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH"
 
-# Copy and run setup.sh separately so that test sources can change, but the 
setup script run is still cached
 WORKDIR /home/spark/polaris
-COPY --chown=spark ./regtests/setup.sh /home/spark/polaris/regtests/setup.sh
-COPY --chown=spark ./regtests/pyspark-setup.sh 
/home/spark/polaris/regtests/pyspark-setup.sh
-COPY --chown=spark ./client/python /home/spark/polaris/client/python
-COPY --chown=spark ./polaris /home/spark/polaris/polaris
-COPY --chown=spark ./spec /home/spark/polaris/spec
-COPY --chown=spark ./regtests/requirements.txt /tmp/
 
 RUN python3 -m venv /home/spark/polaris/polaris-venv && \
     . /home/spark/polaris/polaris-venv/bin/activate && \
-    pip install -r /tmp/requirements.txt && \
+    pip install -r /home/spark/polaris/regtests/requirements.txt && \
     cd /home/spark/polaris/client/python && \
     poetry install && \
     deactivate && \
     /home/spark/polaris/regtests/setup.sh
 
-COPY --chown=spark ./regtests /home/spark/polaris/regtests
-
-# /home/spark/regtests might not be writable in all situations, see 
https://github.com/apache/polaris/pull/205
-USER root
-RUN chmod -R go+rwx /home/spark/polaris
-USER spark
-
 ENTRYPOINT ["./regtests/run.sh"]
diff --git a/runtime/admin/src/main/docker/Dockerfile.jvm 
b/runtime/admin/src/main/docker/Dockerfile.jvm
index 4a6330ec3..ebd121d13 100644
--- a/runtime/admin/src/main/docker/Dockerfile.jvm
+++ b/runtime/admin/src/main/docker/Dockerfile.jvm
@@ -18,23 +18,25 @@
 #
 FROM registry.access.redhat.com/ubi9/openjdk-21-runtime:1.23-6.1761164966
 
-LABEL org.opencontainers.image.source=https://github.com/apache/polaris
-LABEL org.opencontainers.image.description="Apache Polaris (incubating) Admin 
Tool"
-LABEL org.opencontainers.image.licenses=Apache-2.0
+LABEL org.opencontainers.image.source=https://github.com/apache/polaris \
+      org.opencontainers.image.description="Apache Polaris (incubating) Admin 
Tool" \
+      org.opencontainers.image.licenses=Apache-2.0
 
-ENV LANGUAGE='en_US:en'
+ENV LANGUAGE='en_US:en' \
+    USER=polaris \
+    UID=10000 \
+    HOME=/home/polaris
 
 USER root
-RUN groupadd --gid 10001 polaris \
-      && useradd --uid 10000 --gid polaris polaris \
-      && chown -R polaris:polaris /opt/jboss/container \
-      && chown -R polaris:polaris /deployments
+
+RUN groupadd --gid 10001 polaris && \
+    useradd --uid 10000 --gid polaris -m polaris && \
+    mkdir -p /deployments && \
+    chown -R polaris:polaris /deployments /opt/jboss/container
 
 USER polaris
+
 WORKDIR /home/polaris
-ENV USER=polaris
-ENV UID=10000
-ENV HOME=/home/polaris
 
 # We make four distinct layers so if there are application changes the library 
layers can be reused
 COPY --chown=polaris:polaris build/quarkus-app/lib/ /deployments/lib/
diff --git a/runtime/server/src/main/docker/Dockerfile.jvm 
b/runtime/server/src/main/docker/Dockerfile.jvm
index 393c9c8a3..70d694c0f 100644
--- a/runtime/server/src/main/docker/Dockerfile.jvm
+++ b/runtime/server/src/main/docker/Dockerfile.jvm
@@ -18,23 +18,27 @@
 #
 FROM registry.access.redhat.com/ubi9/openjdk-21-runtime:1.23-6.1761164966
 
-LABEL org.opencontainers.image.source=https://github.com/apache/polaris
-LABEL org.opencontainers.image.description="Apache Polaris (incubating)"
-LABEL org.opencontainers.image.licenses=Apache-2.0
+LABEL org.opencontainers.image.source=https://github.com/apache/polaris \
+      org.opencontainers.image.description="Apache Polaris (incubating)" \
+      org.opencontainers.image.licenses=Apache-2.0
 
-ENV LANGUAGE='en_US:en'
+ENV LANGUAGE='en_US:en' \
+    USER=polaris \
+    UID=10000 \
+    HOME=/home/polaris \
+    AB_JOLOKIA_OFF="" \
+    JAVA_APP_JAR="/deployments/quarkus-run.jar"
 
 USER root
-RUN groupadd --gid 10001 polaris \
-      && useradd --uid 10000 --gid polaris polaris \
-      && chown -R polaris:polaris /opt/jboss/container \
-      && chown -R polaris:polaris /deployments
+
+RUN groupadd --gid 10001 polaris && \
+    useradd --uid 10000 --gid polaris polaris && \
+    chown -R polaris:polaris /opt/jboss/container && \
+    chown -R polaris:polaris /deployments
 
 USER polaris
+
 WORKDIR /home/polaris
-ENV USER=polaris
-ENV UID=10000
-ENV HOME=/home/polaris
 
 # We make four distinct layers so if there are application changes the library 
layers can be reused
 COPY --chown=polaris:polaris build/quarkus-app/lib/ /deployments/lib/
@@ -45,8 +49,4 @@ COPY --chown=polaris:polaris distribution/LICENSE 
/deployments/
 COPY --chown=polaris:polaris distribution/NOTICE /deployments/
 COPY --chown=polaris:polaris distribution/DISCLAIMER /deployments/
 
-EXPOSE 8181
-EXPOSE 8182
-
-ENV AB_JOLOKIA_OFF=""
-ENV JAVA_APP_JAR="/deployments/quarkus-run.jar"
+EXPOSE 8181 8182
\ No newline at end of file
diff --git a/site/docker/Dockerfile b/site/docker/Dockerfile
index 714b1ccf5..297c9aa7d 100644
--- a/site/docker/Dockerfile
+++ b/site/docker/Dockerfile
@@ -21,23 +21,14 @@ FROM ubuntu:24.04 AS hugo
 
 ENV LANGUAGE='en_US:en'
 
-RUN apt-get update
-RUN apt-get install --yes golang hugo asciidoctor npm curl
-RUN apt-get clean
-# http-server is used when building the static site to manually check it 
locally
-# (via `site/bin/create-static-site.sh --local` at http://localhost:8080/)
-RUN npm install --global http-server
-
-# these dependencies are needed to build the static site
-#RUN npm install --global autoprefixer postcss postcss-cli http-server
-
-RUN mkdir /polaris
-RUN mkdir /polaris/site
-RUN mkdir /polaris/site/resources
+RUN apt-get update && \
+    apt-get install --yes --no-install-recommends golang hugo asciidoctor npm 
curl git && \
+    rm -rf /var/lib/apt/lists/* && \
+    npm install --global http-server && \
+    mkdir -p /polaris/site/resources
 
 COPY _run_in_docker.sh /hugo/run
 
-EXPOSE 1313
-EXPOSE 8080
+EXPOSE 1313 8080
 
 ENTRYPOINT ["/hugo/run"]

Reply via email to