This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-1.2.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 807b44c4d7939b74875e2ba0fb82cc481c3137c2 Author: voonhous <[email protected]> AuthorDate: Sat May 16 04:36:11 2026 +0800 fix(docker): tag base image per Java version to avoid latest collision (#18663) --- docker/README.md | 34 +++++++++++++++++++++------ docker/build_docker_images.sh | 6 ++++- docker/hoodie/hadoop/datanode/Dockerfile | 5 ++-- docker/hoodie/hadoop/historyserver/Dockerfile | 5 ++-- docker/hoodie/hadoop/hive_base/Dockerfile | 5 ++-- docker/hoodie/hadoop/namenode/Dockerfile | 5 ++-- docker/hoodie/hadoop/prestobase/Dockerfile | 3 ++- docker/hoodie/hadoop/trinobase/Dockerfile | 3 ++- 8 files changed, 48 insertions(+), 18 deletions(-) diff --git a/docker/README.md b/docker/README.md index 718d1943ef7e..f655f42dca8b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -27,6 +27,22 @@ docker demo environment. The `/hoodie` folder contains all the configs for assembling necessary docker images. The name and repository of each docker image, e.g., `apachehudi/hudi-hadoop_2.8.4-trinobase_368`, is defined in the maven configuration file `pom.xml`. +### Base images by Java version + +`build_docker_images.sh` auto-selects one of the two supported base images from `--spark-version`: + +| Base module | JDK | Used for | +|---------------|---------|------------| +| `base_java11` | Java 11 | Spark 3.x | +| `base_java17` | Java 17 | Spark 4.0+ | + +The legacy Java 8 `base` module under `/hoodie/hadoop/base` is retained for historical reference only; Spark 2.x is no +longer supported and `build_docker_images.sh` never selects it. + +Downstream Dockerfiles (`datanode`, `historyserver`, `hive_base`, `namenode`, `prestobase`, `trinobase`) pick the base +via the `BASE_IMAGE_TAG` build arg (default `java11`). `build_docker_images.sh` sets it automatically; bare `docker +build` invocations targeting the Java 17 base must pass `--build-arg BASE_IMAGE_TAG=java17`. + ### Docker compose config for the Demo - `/compose` The `/compose` folder contains the yaml file to compose the Docker environment for running Hudi Demo. @@ -150,19 +166,23 @@ push the image to the dockerhub repo: # Run under hoodie/hadoop, the <tag> is optional, "latest" by default docker buildx build <image_folder_name> --platform <comma-separated,platforms> -t <hub-user>/<repo-name>[:<tag>] --push -# For example, to build base image -docker buildx build base --platform linux/arm64 -t apachehudi/hudi-hadoop_2.8.4-base:linux-arm64-0.10.1 --push +# For example, to build the Java 11 base image +docker buildx build base_java11 --platform linux/arm64 -t apachehudi/hudi-hadoop_2.8.4-base-java11:linux-arm64-0.10.1 --push ``` +Note: the base image is now tagged per Java variant (`-base-java11` / `-base-java17`). Downstream Dockerfiles +select the variant via the `BASE_IMAGE_TAG` build arg (default `java11`). If you also need the Java 17 base for +arm64, repeat the build against `base_java17` and tag it as `...-base-java17:<tag>`. + Once the base image is pushed then you could do something similar for other images. Change [hive](./hoodie/hadoop/hive_base/Dockerfile) dockerfile to pull the base image with tag corresponding to linux/arm64 platform. ``` # Change below line in the Dockerfile -FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest -# as shown below -FROM --platform=linux/arm64 apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:linux-arm64-0.10.1 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest +# as shown below (pin to the same Java variant you built above, e.g. java11) +FROM --platform=linux/arm64 apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-java11:linux-arm64-0.10.1 # and then build & push from under hoodie/hadoop dir docker buildx build hive_base --platform linux/arm64 -t apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:linux-arm64-0.10.1 --push @@ -178,8 +198,8 @@ shows what changes to make in Dockerfiles (assuming tag is named `linux-arm64-0. of `docker buildx` commands. ``` -docker buildx build base --platform linux/arm64 -t apachehudi/hudi-hadoop_2.8.4-base:linux-arm64-0.10.1 --push -docker buildx build datanode --platform linux/arm64 -t apachehudi/hudi-hadoop_2.8.4-datanode:linux-arm64-0.10.1 --push +docker buildx build base_java11 --platform linux/arm64 -t apachehudi/hudi-hadoop_2.8.4-base-java11:linux-arm64-0.10.1 --push +docker buildx build datanode --platform linux/arm64 --build-arg BASE_IMAGE_TAG=java11 -t apachehudi/hudi-hadoop_2.8.4-datanode:linux-arm64-0.10.1 --push docker buildx build historyserver --platform linux/arm64 -t apachehudi/hudi-hadoop_2.8.4-history:linux-arm64-0.10.1 --push docker buildx build hive_base --platform linux/arm64 -t apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:linux-arm64-0.10.1 --push docker buildx build namenode --platform linux/arm64 -t apachehudi/hudi-hadoop_2.8.4-namenode:linux-arm64-0.10.1 --push diff --git a/docker/build_docker_images.sh b/docker/build_docker_images.sh index 5756f87d7a1a..bb613a2a5155 100755 --- a/docker/build_docker_images.sh +++ b/docker/build_docker_images.sh @@ -95,16 +95,18 @@ DOCKER_CONTEXT_DIR="hoodie/hadoop" SPARK_MAJOR=$(echo "$SPARK_VERSION" | cut -d. -f1) if [ "$SPARK_MAJOR" -ge 4 ] 2>/dev/null; then BASE_IMAGE_DIR="base_java17" + BASE_JAVA_TAG="java17" echo "Using Java 17 base image for Spark ${SPARK_VERSION}" else BASE_IMAGE_DIR="base_java11" + BASE_JAVA_TAG="java11" echo "Using Java 11 base image for Spark ${SPARK_VERSION}" fi # List of images to build: "subdir|image_base_name" # Each entry: <subdir>|<image_base_name> DOCKER_IMAGES=( - "${BASE_IMAGE_DIR}|apachehudi/hudi-hadoop_${HADOOP_VERSION}-base" + "${BASE_IMAGE_DIR}|apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_JAVA_TAG}" "datanode|apachehudi/hudi-hadoop_${HADOOP_VERSION}-datanode" "historyserver|apachehudi/hudi-hadoop_${HADOOP_VERSION}-history" "hive_base|apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}" @@ -128,6 +130,7 @@ for IMAGE_CONFIG in "${DOCKER_IMAGES[@]}"; do --build-arg HADOOP_VERSION=${HADOOP_VERSION} \ --build-arg SPARK_VERSION=${SPARK_VERSION} \ --build-arg HIVE_VERSION=${HIVE_VERSION} \ + --build-arg BASE_IMAGE_TAG=${BASE_JAVA_TAG} \ "$IMAGE_CONTEXT" -t "$TAG_LATEST" -t "$TAG_VERSIONED"; then echo "Error: Failed to build docker image for $IMAGE_CONTEXT" exit 1 @@ -137,6 +140,7 @@ for IMAGE_CONFIG in "${DOCKER_IMAGES[@]}"; do --build-arg HADOOP_VERSION=${HADOOP_VERSION} \ --build-arg SPARK_VERSION=${SPARK_VERSION} \ --build-arg HIVE_VERSION=${HIVE_VERSION} \ + --build-arg BASE_IMAGE_TAG=${BASE_JAVA_TAG} \ "$IMAGE_CONTEXT" -t "$TAG_LATEST" -t "$TAG_VERSIONED"; then echo "Error: Failed to build docker image for $IMAGE_CONTEXT" exit 1 diff --git a/docker/hoodie/hadoop/datanode/Dockerfile b/docker/hoodie/hadoop/datanode/Dockerfile index 61297bd3b6bb..bc157214f182 100644 --- a/docker/hoodie/hadoop/datanode/Dockerfile +++ b/docker/hoodie/hadoop/datanode/Dockerfile @@ -15,9 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG HADOOP_VERSION=3.3.4 +ARG HADOOP_VERSION=3.3.4 ARG HADOOP_DN_PORT=50075 -FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest +ARG BASE_IMAGE_TAG=java11 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest ENV HADOOP_DN_PORT ${HADOOP_DN_PORT} diff --git a/docker/hoodie/hadoop/historyserver/Dockerfile b/docker/hoodie/hadoop/historyserver/Dockerfile index f001b511d94a..0c77188e3e51 100644 --- a/docker/hoodie/hadoop/historyserver/Dockerfile +++ b/docker/hoodie/hadoop/historyserver/Dockerfile @@ -15,9 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG HADOOP_VERSION=3.3.4 +ARG HADOOP_VERSION=3.3.4 ARG HADOOP_HISTORY_PORT=8188 -FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest +ARG BASE_IMAGE_TAG=java11 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest # Install unzip and wget RUN apt-get update && \ diff --git a/docker/hoodie/hadoop/hive_base/Dockerfile b/docker/hoodie/hadoop/hive_base/Dockerfile index 98d26895c077..f77c4c4e455e 100644 --- a/docker/hoodie/hadoop/hive_base/Dockerfile +++ b/docker/hoodie/hadoop/hive_base/Dockerfile @@ -15,8 +15,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG HADOOP_VERSION=3.3.4 -FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest +ARG HADOOP_VERSION=3.3.4 +ARG BASE_IMAGE_TAG=java11 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest ENV HIVE_HOME /opt/hive ENV PATH $HIVE_HOME/bin:$PATH diff --git a/docker/hoodie/hadoop/namenode/Dockerfile b/docker/hoodie/hadoop/namenode/Dockerfile index 776d03eb6670..33e2ab4b9955 100644 --- a/docker/hoodie/hadoop/namenode/Dockerfile +++ b/docker/hoodie/hadoop/namenode/Dockerfile @@ -15,9 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG HADOOP_VERSION=3.3.4 +ARG HADOOP_VERSION=3.3.4 ARG HADOOP_WEBHDFS_PORT=50070 -FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest +ARG BASE_IMAGE_TAG=java11 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest ENV HADOOP_WEBHDFS_PORT ${HADOOP_WEBHDFS_PORT} diff --git a/docker/hoodie/hadoop/prestobase/Dockerfile b/docker/hoodie/hadoop/prestobase/Dockerfile index accedb94db3d..d40aa9c8f273 100644 --- a/docker/hoodie/hadoop/prestobase/Dockerfile +++ b/docker/hoodie/hadoop/prestobase/Dockerfile @@ -20,7 +20,8 @@ ARG HADOOP_VERSION=2.8.4 ARG HIVE_VERSION=2.3.3 -FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest as hadoop-base +ARG BASE_IMAGE_TAG=java11 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest as hadoop-base ARG PRESTO_VERSION=0.271 diff --git a/docker/hoodie/hadoop/trinobase/Dockerfile b/docker/hoodie/hadoop/trinobase/Dockerfile index 9d7c23010fbb..0700fa2f6bfb 100644 --- a/docker/hoodie/hadoop/trinobase/Dockerfile +++ b/docker/hoodie/hadoop/trinobase/Dockerfile @@ -20,7 +20,8 @@ ARG HADOOP_VERSION=2.8.4 ARG HIVE_VERSION=2.3.3 -FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-java11:latest as hadoop-base +ARG BASE_IMAGE_TAG=java11 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest as hadoop-base ENV TRINO_VERSION=368 ENV TRINO_HOME=/usr/local/trino
