This is an automated email from the ASF dual-hosted git repository.

abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new ddfc31d7ed0 Reduce the size of distribution docker image (#15968)
ddfc31d7ed0 is described below

commit ddfc31d7ed0dbd9c0eb3c5c84956d5fb27880427
Author: Abhishek Agarwal <[email protected]>
AuthorDate: Mon Feb 26 21:18:55 2024 +0530

    Reduce the size of distribution docker image (#15968)
    
    This PR creates symlinks when there are duplicate jars present in the 
extension. Docker image includes contrib extensions, too, and the size of the 
image has bloated up quite a lot of late. This change also fixes 
"ITNestedQueryPushDownTest integration test"
---
 .github/workflows/standard-its.yml             |  3 +-
 distribution/docker/Dockerfile                 | 12 ++++--
 distribution/docker/deduplicate_jars.sh        | 51 ++++++++++++++++++++++++++
 integration-tests/script/setup_druid_on_k8s.sh |  2 +-
 4 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/standard-its.yml 
b/.github/workflows/standard-its.yml
index a0d4f856e62..8709b09e596 100644
--- a/.github/workflows/standard-its.yml
+++ b/.github/workflows/standard-its.yml
@@ -183,7 +183,8 @@ jobs:
         run: |
           for v in broker middlemanager router coordinator historical ; do
           echo 
"------------------------druid-tiny-cluster-"$v"s-0-------------------------";
-          sudo /usr/local/bin/kubectl logs --tail 1000 
druid-tiny-cluster-"$v"s-0 ||:;
+          /usr/local/bin/kubectl logs --tail 1000 druid-tiny-cluster-"$v"s-0 
||:;
+          /usr/local/bin/kubectl get events | grep druid-tiny-cluster-"$v"s-0 
||:;
           done
 
   integration-other-tests:
diff --git a/distribution/docker/Dockerfile b/distribution/docker/Dockerfile
index bebd0a1c6e0..230f09acaac 100644
--- a/distribution/docker/Dockerfile
+++ b/distribution/docker/Dockerfile
@@ -40,7 +40,7 @@ RUN --mount=type=cache,target=/root/.m2 if [ 
"$BUILD_FROM_SOURCE" = "true" ]; th
       install \
       -Pdist,bundle-contrib-exts \
       -Pskip-static-checks,skip-tests \
-      -Dmaven.javadoc.skip=true \
+      -Dmaven.javadoc.skip=true -T1C \
       ; fi
 
 RUN --mount=type=cache,target=/root/.m2 VERSION=$(mvn -B -q 
org.apache.maven.plugins:maven-help-plugin:3.2.0:evaluate \
@@ -82,15 +82,19 @@ RUN addgroup -S -g 1000 druid \
 COPY --from=bash-static /bin/bash /bin/bash
 RUN chmod 755 /bin/bash
 
-COPY --chown=druid:druid --from=builder /opt /opt
 COPY distribution/docker/druid.sh /druid.sh
 COPY distribution/docker/peon.sh /peon.sh
+COPY distribution/docker/deduplicate_jars.sh /deduplicate_jars.sh
 
 # create necessary directories which could be mounted as volume
+# copy and de-duplicate jars from builder in same layer to reduce image size
 #   /opt/druid/var is used to keep individual files(e.g. log) of each Druid 
service
 #   /opt/shared is used to keep segments and task logs shared among Druid 
services
-RUN mkdir /opt/druid/var /opt/shared \
- && chown druid:druid /opt/druid/var /opt/shared \
+RUN --mount=type=bind,from=builder,source=/opt,target=/builder/opt \
+ mkdir -p /opt/druid/var /opt/shared \
+ && cp -r /builder/opt/druid /opt/ \
+ && /deduplicate_jars.sh /opt/druid \
+ && chown -R druid:druid /opt/druid \
  && chmod 775 /opt/druid/var /opt/shared
 
 USER druid
diff --git a/distribution/docker/deduplicate_jars.sh 
b/distribution/docker/deduplicate_jars.sh
new file mode 100755
index 00000000000..336f955a54b
--- /dev/null
+++ b/distribution/docker/deduplicate_jars.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+set -e
+
+# Check if an argument is provided
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <path-to-druid-home>"
+    exit 1
+fi
+
+# Directory containing JAR files
+JAR_DIR="$1"
+
+# Declare an associative array to hold the canonical filenames. Works on base 
version >= 4
+declare -A canonical
+
+# Find all JAR files, sort them to ensure duplicates are processed together
+while IFS= read -r jar; do
+    # Extract the base name and sanitize it to create a valid array key
+    key=$(basename "$jar")
+
+    # Check if this is the first occurrence of this file
+    if [ -z "${canonical[$key]}" ]; then
+        # Mark this file as the canonical one for this basename
+        canonical[$key]="$jar"
+    else
+        # This file is a duplicate, replace it with a symlink to the canonical 
file
+        ln -sf "${canonical[$key]}" "$jar"
+        echo "Replaced duplicate $jar with symlink to ${canonical[$key]}"
+    fi
+# Read in an order that retain core libs as original jars
+done < <(find $JAR_DIR -wholename '*/lib/*.jar' | sort ; find $JAR_DIR 
-wholename '*/extensions/*.jar' | sort ; find $JAR_DIR -wholename 
'*/hadoop-dependencies/*.jar' | sort)
diff --git a/integration-tests/script/setup_druid_on_k8s.sh 
b/integration-tests/script/setup_druid_on_k8s.sh
index 665850a62d2..960eedfe5fc 100755
--- a/integration-tests/script/setup_druid_on_k8s.sh
+++ b/integration-tests/script/setup_druid_on_k8s.sh
@@ -30,7 +30,7 @@ mvn -B -ff -q dependency:go-offline \
       install \
       -Pdist,bundle-contrib-exts \
       -Pskip-static-checks,skip-tests \
-      -Dmaven.javadoc.skip=true
+      -Dmaven.javadoc.skip=true -T1C
 
 DOCKER_BUILDKIT=1 docker build --build-arg BUILD_FROM_SOURCE=0 -t 
druid/base:v1 -f distribution/docker/Dockerfile .
 DOCKER_BUILDKIT=1 docker build --build-arg BASE_IMAGE=druid/base:v1 -t 
druid/cluster:v1 -f distribution/docker/DockerfileBuildTarAdvanced .


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to