This is an automated email from the ASF dual-hosted git repository.
yzheng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris.git
The following commit(s) were added to refs/heads/main by this push:
new c7c86a4fb Refactor: improve and clean up Dockerfiles (#2957)
c7c86a4fb is described below
commit c7c86a4fb8a213a25f26380e52b10f5830832fe3
Author: Yong Zheng <[email protected]>
AuthorDate: Tue Nov 11 08:20:56 2025 -0600
Refactor: improve and clean up Dockerfiles (#2957)
* Refactor: improve and clean up Dockerfiles
* Refactor: improve and clean up Dockerfiles
* Refactor: improve and clean up Dockerfiles
* Refactor: improve and clean up Dockerfiles
* Refactor: improve and clean up Dockerfiles
* Refactor: improve and clean up Dockerfiles
---
getting-started/spark/notebooks/Dockerfile | 4 +-
.../v3.5/getting-started/notebooks/Dockerfile | 7 ++--
.../getting-started/notebooks/SparkPolaris.ipynb | 3 +-
plugins/spark/v3.5/regtests/Dockerfile | 34 ++++++++--------
regtests/Dockerfile | 47 ++++++++++------------
runtime/admin/src/main/docker/Dockerfile.jvm | 24 ++++++-----
runtime/server/src/main/docker/Dockerfile.jvm | 32 +++++++--------
site/docker/Dockerfile | 21 +++-------
8 files changed, 79 insertions(+), 93 deletions(-)
diff --git a/getting-started/spark/notebooks/Dockerfile
b/getting-started/spark/notebooks/Dockerfile
index 32ee4067b..bb57ee65f 100644
--- a/getting-started/spark/notebooks/Dockerfile
+++ b/getting-started/spark/notebooks/Dockerfile
@@ -19,8 +19,8 @@
FROM docker.io/apache/spark:3.5.6-java17
-ENV
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
-ENV PYSPARK_PYTHON=/home/spark/venv/bin/python
+ENV PYSPARK_PYTHON=/home/spark/venv/bin/python \
+
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
USER root
diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile
b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile
index f5e052b2a..392d79e0a 100644
--- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile
+++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile
@@ -19,8 +19,8 @@
FROM docker.io/apache/spark:3.5.6-java17
-ENV
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
-ENV PYSPARK_PYTHON=/home/spark/venv/bin/python
+ENV PYSPARK_PYTHON=/home/spark/venv/bin/python \
+
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages"
USER root
@@ -36,8 +36,7 @@ WORKDIR /home/spark
COPY --chown=spark client /home/spark/client
COPY --chown=spark regtests/requirements.txt /tmp
COPY --chown=spark regtests/notebook_requirements.txt /tmp
-COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs
/home/spark/polaris_libs
-
+COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs/*bundle.jar
/opt/spark/jars/
RUN python3 -m venv /home/spark/venv && \
. /home/spark/venv/bin/activate && \
diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb
b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb
index f4e4a00bb..226a42920 100644
--- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb
+++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb
@@ -265,7 +265,8 @@
"from pyspark.sql import SparkSession\n",
"\n",
"spark = (SparkSession.builder\n",
- " .config(\"spark.jars\",
\"../polaris_libs/polaris-spark-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar\")
# TODO: add a way to automatically discover the Jar\n",
+ " # This jar is now automatically discovered, thus no longer needed\n",
+ " #.config(\"spark.jars\",
\"../polaris_libs/polaris-spark-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar\")\n",
" .config(\"spark.jars.packages\",
\"org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.2.1\")\n",
" .config(\"spark.sql.catalog.spark_catalog\",
\"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n",
" .config('spark.sql.iceberg.vectorization.enabled', 'false')\n",
diff --git a/plugins/spark/v3.5/regtests/Dockerfile
b/plugins/spark/v3.5/regtests/Dockerfile
index db84d3eb1..5c4c480f2 100755
--- a/plugins/spark/v3.5/regtests/Dockerfile
+++ b/plugins/spark/v3.5/regtests/Dockerfile
@@ -18,31 +18,29 @@
#
FROM docker.io/apache/spark:3.5.6-java17
-ARG POLARIS_HOST=polaris
-ENV POLARIS_HOST=$POLARIS_HOST
-ENV SPARK_HOME=/opt/spark
-ENV CURRENT_SCALA_VERSION='2.12'
-ENV LANGUAGE='en_US:en'
+
+ARG POLARIS_HOST=polaris \
+ CURRENT_SCALA_VERSION=2.12
+
+ENV POLARIS_HOST=${POLARIS_HOST} \
+ CURRENT_SCALA_VERSION=${CURRENT_SCALA_VERSION}
USER root
-RUN apt update
-RUN apt-get install -y diffutils wget curl
-RUN mkdir -p /home/spark && \
- chown -R spark /home/spark && \
- mkdir -p /tmp/polaris-regtests && \
- chown -R spark /tmp/polaris-regtests
-RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf
-USER spark
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends diffutils wget curl && \
+ rm -rf /var/lib/apt/lists/* && \
+ mkdir -p /home/spark /tmp/polaris-regtests /opt/spark/conf && \
+ chown -R spark:spark /home/spark /tmp/polaris-regtests && \
+ chmod -R 777 /opt/spark/conf
WORKDIR /home/spark/polaris
-COPY --chown=spark ./v3.5 /home/spark/polaris/v3.5
+COPY --chown=spark:spark ./v3.5 /home/spark/polaris/v3.5
+
+# /home/spark/.../regtests might not be writable in all situations, see
https://github.com/apache/polaris/pull/205
+RUN chmod -R 777 /home/spark/polaris/v3.5/regtests
-# /home/spark/regtests might not be writable in all situations, see
https://github.com/apache/polaris/pull/205
-USER root
-RUN chmod -R go+rwx /home/spark/polaris
-RUN chmod -R 777 ./v3.5/regtests
USER spark
ENTRYPOINT ["./v3.5/regtests/run.sh"]
diff --git a/regtests/Dockerfile b/regtests/Dockerfile
index 88fa13dda..183701a5c 100644
--- a/regtests/Dockerfile
+++ b/regtests/Dockerfile
@@ -18,45 +18,40 @@
#
FROM docker.io/apache/spark:3.5.6-java17-python3
+
ARG POLARIS_HOST=polaris
-ENV POLARIS_HOST=$POLARIS_HOST
-ENV SPARK_HOME=/opt/spark
-ENV LANGUAGE='en_US:en'
+
+ENV POLARIS_HOST=${POLARIS_HOST} \
+
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip"
USER root
-RUN apt update
-RUN apt-get install -y diffutils wget curl python3.10-venv jq
-RUN mkdir -p /home/spark && \
- chown -R spark /home/spark && \
- mkdir -p /tmp/polaris-regtests && \
- chown -R spark /tmp/polaris-regtests
-RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf
+
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends diffutils wget curl
python3.10-venv jq && \
+ rm -rf /var/lib/apt/lists/* && \
+ mkdir -p /home/spark /tmp/polaris-regtests /opt/spark/conf && \
+ chown -R spark:spark /home/spark /tmp/polaris-regtests && \
+ chmod -R 777 /opt/spark/conf
+
+COPY --chown=spark:spark ./regtests/setup.sh ./regtests/pyspark-setup.sh
./regtests/requirements.txt /home/spark/polaris/regtests/
+COPY --chown=spark:spark ./client/python /home/spark/polaris/client/python
+COPY --chown=spark:spark ./polaris /home/spark/polaris/polaris
+COPY --chown=spark:spark ./spec /home/spark/polaris/spec
+COPY --chown=spark:spark ./regtests /home/spark/polaris/regtests
+
+# /home/spark/regtests might not be writable in all situations, see
https://github.com/apache/polaris/pull/205
+RUN chmod -R go+rwx /home/spark/polaris
USER spark
-ENV
PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH"
-# Copy and run setup.sh separately so that test sources can change, but the
setup script run is still cached
WORKDIR /home/spark/polaris
-COPY --chown=spark ./regtests/setup.sh /home/spark/polaris/regtests/setup.sh
-COPY --chown=spark ./regtests/pyspark-setup.sh
/home/spark/polaris/regtests/pyspark-setup.sh
-COPY --chown=spark ./client/python /home/spark/polaris/client/python
-COPY --chown=spark ./polaris /home/spark/polaris/polaris
-COPY --chown=spark ./spec /home/spark/polaris/spec
-COPY --chown=spark ./regtests/requirements.txt /tmp/
RUN python3 -m venv /home/spark/polaris/polaris-venv && \
. /home/spark/polaris/polaris-venv/bin/activate && \
- pip install -r /tmp/requirements.txt && \
+ pip install -r /home/spark/polaris/regtests/requirements.txt && \
cd /home/spark/polaris/client/python && \
poetry install && \
deactivate && \
/home/spark/polaris/regtests/setup.sh
-COPY --chown=spark ./regtests /home/spark/polaris/regtests
-
-# /home/spark/regtests might not be writable in all situations, see
https://github.com/apache/polaris/pull/205
-USER root
-RUN chmod -R go+rwx /home/spark/polaris
-USER spark
-
ENTRYPOINT ["./regtests/run.sh"]
diff --git a/runtime/admin/src/main/docker/Dockerfile.jvm
b/runtime/admin/src/main/docker/Dockerfile.jvm
index 4a6330ec3..ebd121d13 100644
--- a/runtime/admin/src/main/docker/Dockerfile.jvm
+++ b/runtime/admin/src/main/docker/Dockerfile.jvm
@@ -18,23 +18,25 @@
#
FROM registry.access.redhat.com/ubi9/openjdk-21-runtime:1.23-6.1761164966
-LABEL org.opencontainers.image.source=https://github.com/apache/polaris
-LABEL org.opencontainers.image.description="Apache Polaris (incubating) Admin
Tool"
-LABEL org.opencontainers.image.licenses=Apache-2.0
+LABEL org.opencontainers.image.source=https://github.com/apache/polaris \
+ org.opencontainers.image.description="Apache Polaris (incubating) Admin
Tool" \
+ org.opencontainers.image.licenses=Apache-2.0
-ENV LANGUAGE='en_US:en'
+ENV LANGUAGE='en_US:en' \
+ USER=polaris \
+ UID=10000 \
+ HOME=/home/polaris
USER root
-RUN groupadd --gid 10001 polaris \
- && useradd --uid 10000 --gid polaris polaris \
- && chown -R polaris:polaris /opt/jboss/container \
- && chown -R polaris:polaris /deployments
+
+RUN groupadd --gid 10001 polaris && \
+ useradd --uid 10000 --gid polaris -m polaris && \
+ mkdir -p /deployments && \
+ chown -R polaris:polaris /deployments /opt/jboss/container
USER polaris
+
WORKDIR /home/polaris
-ENV USER=polaris
-ENV UID=10000
-ENV HOME=/home/polaris
# We make four distinct layers so if there are application changes the library
layers can be reused
COPY --chown=polaris:polaris build/quarkus-app/lib/ /deployments/lib/
diff --git a/runtime/server/src/main/docker/Dockerfile.jvm
b/runtime/server/src/main/docker/Dockerfile.jvm
index 393c9c8a3..70d694c0f 100644
--- a/runtime/server/src/main/docker/Dockerfile.jvm
+++ b/runtime/server/src/main/docker/Dockerfile.jvm
@@ -18,23 +18,27 @@
#
FROM registry.access.redhat.com/ubi9/openjdk-21-runtime:1.23-6.1761164966
-LABEL org.opencontainers.image.source=https://github.com/apache/polaris
-LABEL org.opencontainers.image.description="Apache Polaris (incubating)"
-LABEL org.opencontainers.image.licenses=Apache-2.0
+LABEL org.opencontainers.image.source=https://github.com/apache/polaris \
+ org.opencontainers.image.description="Apache Polaris (incubating)" \
+ org.opencontainers.image.licenses=Apache-2.0
-ENV LANGUAGE='en_US:en'
+ENV LANGUAGE='en_US:en' \
+ USER=polaris \
+ UID=10000 \
+ HOME=/home/polaris \
+ AB_JOLOKIA_OFF="" \
+ JAVA_APP_JAR="/deployments/quarkus-run.jar"
USER root
-RUN groupadd --gid 10001 polaris \
- && useradd --uid 10000 --gid polaris polaris \
- && chown -R polaris:polaris /opt/jboss/container \
- && chown -R polaris:polaris /deployments
+
+RUN groupadd --gid 10001 polaris && \
+ useradd --uid 10000 --gid polaris polaris && \
+ chown -R polaris:polaris /opt/jboss/container && \
+ chown -R polaris:polaris /deployments
USER polaris
+
WORKDIR /home/polaris
-ENV USER=polaris
-ENV UID=10000
-ENV HOME=/home/polaris
# We make four distinct layers so if there are application changes the library
layers can be reused
COPY --chown=polaris:polaris build/quarkus-app/lib/ /deployments/lib/
@@ -45,8 +49,4 @@ COPY --chown=polaris:polaris distribution/LICENSE
/deployments/
COPY --chown=polaris:polaris distribution/NOTICE /deployments/
COPY --chown=polaris:polaris distribution/DISCLAIMER /deployments/
-EXPOSE 8181
-EXPOSE 8182
-
-ENV AB_JOLOKIA_OFF=""
-ENV JAVA_APP_JAR="/deployments/quarkus-run.jar"
+EXPOSE 8181 8182
\ No newline at end of file
diff --git a/site/docker/Dockerfile b/site/docker/Dockerfile
index 714b1ccf5..297c9aa7d 100644
--- a/site/docker/Dockerfile
+++ b/site/docker/Dockerfile
@@ -21,23 +21,14 @@ FROM ubuntu:24.04 AS hugo
ENV LANGUAGE='en_US:en'
-RUN apt-get update
-RUN apt-get install --yes golang hugo asciidoctor npm curl
-RUN apt-get clean
-# http-server is used when building the static site to manually check it
locally
-# (via `site/bin/create-static-site.sh --local` at http://localhost:8080/)
-RUN npm install --global http-server
-
-# these dependencies are needed to build the static site
-#RUN npm install --global autoprefixer postcss postcss-cli http-server
-
-RUN mkdir /polaris
-RUN mkdir /polaris/site
-RUN mkdir /polaris/site/resources
+RUN apt-get update && \
+ apt-get install --yes --no-install-recommends golang hugo asciidoctor npm
curl git && \
+ rm -rf /var/lib/apt/lists/* && \
+ npm install --global http-server && \
+ mkdir -p /polaris/site/resources
COPY _run_in_docker.sh /hugo/run
-EXPOSE 1313
-EXPOSE 8080
+EXPOSE 1313 8080
ENTRYPOINT ["/hugo/run"]