This is an automated email from the ASF dual-hosted git repository.
felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new d1eebb36e1 [VL] Update dockerfile (#9644)
d1eebb36e1 is described below
commit d1eebb36e1dfdb6aed3fb068e0d75671df162f61
Author: BInwei Yang <[email protected]>
AuthorDate: Thu May 15 16:52:13 2025 -0700
[VL] Update dockerfile (#9644)
This PR consolidates multiple Dockerfile scripts and updates documentation
to reduce image size and improve build efficiency. Key changes include merging
shell commands into single RUN steps, pre-installing Maven dependencies into
the .m2 cache, removing redundant ccache installations, and introducing build
args to control the Java version.
---
.github/workflows/docker_image.yml | 5 +-
dev/docker/Dockerfile.centos7-static-build | 54 +++++++++++----------
dev/docker/Dockerfile.centos8-dynamic-build | 59 ++++++++++++++---------
dev/docker/Dockerfile.centos8-dynamic-build-jdk11 | 48 ------------------
dev/docker/Dockerfile.centos8-dynamic-build-jdk17 | 48 ------------------
dev/docker/Dockerfile.centos8-static-build | 52 +++++++++++---------
docs/developers/velox-backend-CI.md | 21 +++++++-
7 files changed, 114 insertions(+), 173 deletions(-)
diff --git a/.github/workflows/docker_image.yml
b/.github/workflows/docker_image.yml
index cfc9e79297..fb23d0851d 100644
--- a/.github/workflows/docker_image.yml
+++ b/.github/workflows/docker_image.yml
@@ -25,8 +25,6 @@ on:
- 'dev/docker/Dockerfile.centos7-static-build'
- 'dev/docker/Dockerfile.centos8-static-build'
- 'dev/docker/Dockerfile.centos8-dynamic-build'
- - 'dev/docker/Dockerfile.centos8-dynamic-build-jdk11'
- - 'dev/docker/Dockerfile.centos8-dynamic-build-jdk17'
- 'dev/docker/cudf/Dockerfile'
schedule:
- cron: '0 20 * * 0'
@@ -173,6 +171,7 @@ jobs:
uses: docker/build-push-action@v6
with:
context: .
+ build-args: JAVA_VERSION=1.8.0
file: dev/docker/Dockerfile.centos8-dynamic-build
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,"name=${{ env.DOCKERHUB_REPO
}}",push-by-digest=true,push=true
@@ -223,6 +222,7 @@ jobs:
uses: docker/build-push-action@v6
with:
context: .
+ build-args: JAVA_VERSION=11
file: dev/docker/Dockerfile.centos8-dynamic-build-jdk11
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,"name=${{ env.DOCKERHUB_REPO
}}",push-by-digest=true,push=true
@@ -273,6 +273,7 @@ jobs:
uses: docker/build-push-action@v6
with:
context: .
+ build-args: JAVA_VERSION=17
file: dev/docker/Dockerfile.centos8-dynamic-build-jdk17
labels: ${{ steps.meta.outputs.labels }}
outputs: type=image,"name=${{ env.DOCKERHUB_REPO
}}",push-by-digest=true,push=true
diff --git a/dev/docker/Dockerfile.centos7-static-build
b/dev/docker/Dockerfile.centos7-static-build
index e9b4ebfb82..dfd2499e58 100644
--- a/dev/docker/Dockerfile.centos7-static-build
+++ b/dev/docker/Dockerfile.centos7-static-build
@@ -15,33 +15,37 @@
FROM centos:7
-RUN sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true
-RUN sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-* || true
-
-RUN yum install -y epel-release centos-release-scl
-RUN rm /etc/yum.repos.d/CentOS-SCLo-scl.repo -f
-RUN sed -i \
- -e 's/^mirrorlist/#mirrorlist/' \
- -e 's/^#baseurl/baseurl/' \
- -e 's/mirror\.centos\.org/vault.centos.org/' \
- /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo
-
-RUN yum install -y git patch wget sudo java-1.8.0-openjdk-devel ccache
-
-RUN git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten
-
-RUN echo "check_certificate = off" >> ~/.wgetrc
-
-RUN cd /opt/gluten && bash ./dev/vcpkg/setup-build-depends.sh
+ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
+ENV PATH=$JAVA_HOME/bin:$PATH
# An actual path used for vcpkg cache.
-RUN mkdir -p /var/cache/vcpkg
-
+ENV VCPKG_PATH=/var/cache/vcpkg
# Set vcpkg cache path.
-ENV VCPKG_BINARY_SOURCES=clear;files,/var/cache/vcpkg,readwrite
+ENV VCPKG_BINARY_SOURCES=clear;files,${VCPKG_PATH},readwrite
+
-# Build arrow, then install the native libs to system paths and jar package to
.m2/ directory.
-RUN cd /opt/gluten && source /opt/rh/devtoolset-11/enable && \
+RUN set -ex; \
+ sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* ||
true; \
+ sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-* || true; \
+ yum install -y epel-release centos-release-scl; \
+ rm /etc/yum.repos.d/CentOS-SCLo-scl.repo -f; \
+ sed -i \
+ -e 's/^mirrorlist/#mirrorlist/' \
+ -e 's/^#baseurl/baseurl/' \
+ -e 's/mirror\.centos\.org/vault.centos.org/' \
+ /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo; \
+ yum install -y git patch wget sudo java-1.8.0-openjdk-devel ccache; \
+ git clone --depth=1 https://github.com/apache/incubator-gluten
/opt/gluten; \
+ echo "check_certificate = off" >> ~/.wgetrc; \
+ cd /opt/gluten && bash ./dev/vcpkg/setup-build-depends.sh; \
+ mkdir -p ${VCPKG_PATH}; \
+ echo "Build arrow, then install the native libs to system paths and jar
package to .m2/ directory."; \
+ cd /opt/gluten; \
+ source /opt/rh/devtoolset-11/enable; \
bash ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=ON
--enable_s3=ON --enable_gcs=ON \
- --enable_hdfs=ON --enable_abfs=ON
build_arrow && \
- rm -rf /opt/gluten
+ --enable_hdfs=ON --enable_abfs=ON
build_arrow; \
+ mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta -Pspark-3.5
-DskipTests; \
+ dnf clean all; \
+ rm -rf /opt/gluten; \
+ rm -rf /root/.cache/ccache;
+
diff --git a/dev/docker/Dockerfile.centos8-dynamic-build
b/dev/docker/Dockerfile.centos8-dynamic-build
index 7888a3ee29..baa0098075 100644
--- a/dev/docker/Dockerfile.centos8-dynamic-build
+++ b/dev/docker/Dockerfile.centos8-dynamic-build
@@ -15,34 +15,45 @@
FROM centos:8
-RUN sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true
-RUN sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-* || true
+ARG JAVA_VERSION=1.8.0
+#ARG JAVA_VERSION=11
+#ARG JAVA_VERSION=17
-RUN yum update -y && yum install -y epel-release sudo dnf && yum install -y
ccache
-RUN dnf install -y --setopt=install_weak_deps=False gcc-toolset-11
-RUN echo "check_certificate = off" >> ~/.wgetrc
-RUN yum install -y java-1.8.0-openjdk-devel patch wget git perl
-RUN yum install
https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os/Packages/tzdata-2025a-1.el9.noarch.rpm
-y
-ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
+ENV JAVA_HOME=/usr/lib/jvm/java-${JAVA_VERSION}-openjdk
ENV PATH=$JAVA_HOME/bin:$PATH
-RUN wget --no-check-certificate
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
&& \
- tar -xvf apache-maven-3.8.8-bin.tar.gz && \
- mv apache-maven-3.8.8 /usr/lib/maven
ENV PATH=${PATH}:/usr/lib/maven/bin
-RUN wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.4.3/apache-celeborn-0.4.3-bin.tgz
-P /opt/
-RUN wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.5.4/apache-celeborn-0.5.4-bin.tgz
-P /opt/
-RUN wget -nv
https://archive.apache.org/dist/incubator/uniffle/0.9.2/apache-uniffle-0.9.2-incubating-bin.tar.gz
-P /opt/
-RUN wget -nv
https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz
-P /opt/
-
-RUN git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten
-
-RUN cd /opt/gluten/.github/workflows/util/ && ./install_spark_resources.sh 3.2
&& ./install_spark_resources.sh 3.3 \
- && ./install_spark_resources.sh 3.4 && ./install_spark_resources.sh 3.5 &&
./install_spark_resources.sh 3.5-scala2.13
-
-RUN if [ "$(uname -m)" = "aarch64" ]; then \
+RUN set -ex; \
+ sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* ||
true; \
+ sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-* || true; \
+ yum update -y && yum install -y epel-release sudo dnf && yum install -y
ccache; \
+ dnf install -y --setopt=install_weak_deps=False gcc-toolset-11; \
+ echo "check_certificate = off" >> ~/.wgetrc; \
+ yum install -y java-${JAVA_VERSION}-openjdk-devel patch wget git perl; \
+ yum install
https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os/Packages/tzdata-2025a-1.el9.noarch.rpm
-y; \
+ wget --no-check-certificate
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz;
\
+ tar -xvf apache-maven-3.8.8-bin.tar.gz; \
+ mv apache-maven-3.8.8 /usr/lib/maven; \
+ wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.4.3/apache-celeborn-0.4.3-bin.tgz
-P /opt/; \
+ wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.5.4/apache-celeborn-0.5.4-bin.tgz
-P /opt/; \
+ wget -nv
https://archive.apache.org/dist/incubator/uniffle/0.9.2/apache-uniffle-0.9.2-incubating-bin.tar.gz
-P /opt/; \
+ wget -nv
https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz
-P /opt/; \
+ git clone --depth=1 https://github.com/apache/incubator-gluten
/opt/gluten; \
+ cd /opt/gluten/.github/workflows/util/; \
+ ./install_spark_resources.sh 3.2; \
+ ./install_spark_resources.sh 3.3; \
+ ./install_spark_resources.sh 3.4; \
+ ./install_spark_resources.sh 3.5; \
+ ./install_spark_resources.sh 3.5-scala2.13; \
+ if [ "$(uname -m)" = "aarch64" ]; then \
export CPU_TARGET="aarch64"; \
- fi && \
- cd /opt/gluten && source /opt/rh/gcc-toolset-11/enable &&
./dev/builddeps-veloxbe.sh --run_setup_script=ON build_arrow && rm -rf
/opt/gluten
+ fi; \
+ cd /opt/gluten; \
+ source /opt/rh/gcc-toolset-11/enable; \
+ ./dev/builddeps-veloxbe.sh --run_setup_script=ON build_arrow; \
+ mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta -Pspark-3.5
-DskipTests; \
+ dnf clean all; \
+ rm -rf /opt/gluten; \
+ rm -rf /root/.cache/ccache;
diff --git a/dev/docker/Dockerfile.centos8-dynamic-build-jdk11
b/dev/docker/Dockerfile.centos8-dynamic-build-jdk11
deleted file mode 100644
index 2f33211d3b..0000000000
--- a/dev/docker/Dockerfile.centos8-dynamic-build-jdk11
+++ /dev/null
@@ -1,48 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-FROM centos:8
-
-RUN sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true
-RUN sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-* || true
-
-RUN yum update -y && yum install -y epel-release sudo dnf && yum install -y
ccache
-RUN dnf install -y --setopt=install_weak_deps=False gcc-toolset-11
-RUN echo "check_certificate = off" >> ~/.wgetrc
-
-RUN yum install -y java-11-openjdk-devel patch wget git perl
-RUN yum install
https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os/Packages/tzdata-2025a-1.el9.noarch.rpm
-y
-ENV JAVA_HOME=/usr/lib/jvm/java-11-openjdk
-ENV PATH=$JAVA_HOME/bin:$PATH
-RUN wget --no-check-certificate
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
&& \
- tar -xvf apache-maven-3.8.8-bin.tar.gz && \
- mv apache-maven-3.8.8 /usr/lib/maven
-ENV PATH=${PATH}:/usr/lib/maven/bin
-
-RUN wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.4.3/apache-celeborn-0.4.3-bin.tgz
-P /opt/
-RUN wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.5.4/apache-celeborn-0.5.4-bin.tgz
-P /opt/
-
-RUN wget -nv
https://archive.apache.org/dist/incubator/uniffle/0.9.2/apache-uniffle-0.9.2-incubating-bin.tar.gz
-P /opt/
-RUN wget -nv
https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz
-P /opt/
-
-RUN git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten
-
-RUN cd /opt/gluten/.github/workflows/util/ && ./install_spark_resources.sh 3.2
&& ./install_spark_resources.sh 3.3 \
- && ./install_spark_resources.sh 3.4 && ./install_spark_resources.sh 3.5 &&
./install_spark_resources.sh 3.5-scala2.13
-
-RUN if [ "$(uname -m)" = "aarch64" ]; then \
- export CPU_TARGET="aarch64"; \
- fi && \
- cd /opt/gluten && source /opt/rh/gcc-toolset-11/enable &&
./dev/builddeps-veloxbe.sh --run_setup_script=ON build_arrow && rm -rf
/opt/gluten
diff --git a/dev/docker/Dockerfile.centos8-dynamic-build-jdk17
b/dev/docker/Dockerfile.centos8-dynamic-build-jdk17
deleted file mode 100644
index 158ae61790..0000000000
--- a/dev/docker/Dockerfile.centos8-dynamic-build-jdk17
+++ /dev/null
@@ -1,48 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-FROM centos:8
-
-RUN sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true
-RUN sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-* || true
-
-RUN yum update -y && yum install -y epel-release sudo dnf && yum install -y
ccache
-RUN dnf install -y --setopt=install_weak_deps=False gcc-toolset-11
-RUN echo "check_certificate = off" >> ~/.wgetrc
-
-RUN yum install -y java-17-openjdk-devel patch wget git perl
-RUN yum install
https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os/Packages/tzdata-2025a-1.el9.noarch.rpm
-y
-ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk
-ENV PATH=$JAVA_HOME/bin:$PATH
-RUN wget --no-check-certificate
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
&& \
- tar -xvf apache-maven-3.8.8-bin.tar.gz && \
- mv apache-maven-3.8.8 /usr/lib/maven
-ENV PATH=${PATH}:/usr/lib/maven/bin
-
-RUN wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.4.3/apache-celeborn-0.4.3-bin.tgz
-P /opt/
-RUN wget -nv
https://archive.apache.org/dist/celeborn/celeborn-0.5.4/apache-celeborn-0.5.4-bin.tgz
-P /opt/
-
-RUN wget -nv
https://archive.apache.org/dist/incubator/uniffle/0.9.2/apache-uniffle-0.9.2-incubating-bin.tar.gz
-P /opt/
-RUN wget -nv
https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz
-P /opt/
-
-RUN git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten
-
-RUN cd /opt/gluten/.github/workflows/util/ && ./install_spark_resources.sh 3.2
&& ./install_spark_resources.sh 3.3 \
- && ./install_spark_resources.sh 3.4 && ./install_spark_resources.sh 3.5 &&
./install_spark_resources.sh 3.5-scala2.13
-
-RUN if [ "$(uname -m)" = "aarch64" ]; then \
- export CPU_TARGET="aarch64"; \
- fi && \
- cd /opt/gluten && source /opt/rh/gcc-toolset-11/enable &&
./dev/builddeps-veloxbe.sh --run_setup_script=ON build_arrow && rm -rf
/opt/gluten
diff --git a/dev/docker/Dockerfile.centos8-static-build
b/dev/docker/Dockerfile.centos8-static-build
index 21864bb27a..cf2cd7e5fe 100644
--- a/dev/docker/Dockerfile.centos8-static-build
+++ b/dev/docker/Dockerfile.centos8-static-build
@@ -15,37 +15,41 @@
FROM centos:8
-RUN sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true
-RUN sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-* || true
-RUN yum update -y && yum install -y epel-release sudo dnf && yum install -y
ccache
-RUN dnf install -y --setopt=install_weak_deps=False gcc-toolset-11
-RUN echo "check_certificate = off" >> ~/.wgetrc
-
-RUN yum install -y java-1.8.0-openjdk-devel patch wget git perl
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
ENV PATH=$JAVA_HOME/bin:$PATH
-RUN wget --no-check-certificate
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
&& \
- tar -xvf apache-maven-3.8.8-bin.tar.gz && \
- mv apache-maven-3.8.8 /usr/lib/maven
ENV PATH=${PATH}:/usr/lib/maven/bin
-RUN git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten
-
-RUN cd /opt/gluten && bash ./dev/vcpkg/setup-build-depends.sh
-
# An actual path used for vcpkg cache.
-RUN mkdir -p /var/cache/vcpkg
-
+ENV VCPKG_PATH=/var/cache/vcpkg
# Set vcpkg cache path.
-ENV VCPKG_BINARY_SOURCES=clear;files,/var/cache/vcpkg,readwrite
-
-# Build arrow, then install the native libs to system paths and jar package to
.m2/ directory.
-RUN if [ "$(uname -m)" = "aarch64" ]; then \
+ENV VCPKG_BINARY_SOURCES=clear;files,${VCPKG_PATH},readwrite
+
+RUN set -ex; \
+ sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* ||
true; \
+ sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-* || true; \
+ yum update -y && yum install -y epel-release sudo dnf && yum install -y
ccache; \
+ dnf install -y --setopt=install_weak_deps=False gcc-toolset-11; \
+ echo "check_certificate = off" >> ~/.wgetrc; \
+ yum install -y java-1.8.0-openjdk-devel patch wget git perl; \
+ yum install
https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os/Packages/tzdata-2025a-1.el9.noarch.rpm
-y; \
+ rpm -qa | grep tzdata; \
+ dnf clean all; \
+ wget --no-check-certificate
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz;
\
+ tar -xvf apache-maven-3.8.8-bin.tar.gz; \
+ mv apache-maven-3.8.8 /usr/lib/maven; \
+ git clone --depth=1 https://github.com/apache/incubator-gluten
/opt/gluten; \
+ cd /opt/gluten && bash ./dev/vcpkg/setup-build-depends.sh; \
+ mkdir -p ${VCPKG_PATH}; \
+ echo "Build arrow, then install the native libs to system paths and jar
package to .m2/ directory."; \
+ if [ "$(uname -m)" = "aarch64" ]; then \
export CPU_TARGET="aarch64"; \
export VCPKG_FORCE_SYSTEM_BINARIES=1; \
- fi && \
- cd /opt/gluten && source /opt/rh/gcc-toolset-11/enable && \
+ fi; \
+ source /opt/rh/gcc-toolset-11/enable; \
+ cd /opt/gluten; \
bash ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=ON
--enable_s3=ON --enable_gcs=ON \
- --enable_hdfs=ON --enable_abfs=ON
build_arrow && \
- rm -rf /opt/gluten
+ --enable_hdfs=ON --enable_abfs=ON
build_arrow; \
+ mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta -Pspark-3.5
-DskipTests; \
+ rm -rf /opt/gluten; \
+ rm -rf /root/.cache/ccache;
diff --git a/docs/developers/velox-backend-CI.md
b/docs/developers/velox-backend-CI.md
index b70887ad56..a0af0b0f39 100644
--- a/docs/developers/velox-backend-CI.md
+++ b/docs/developers/velox-backend-CI.md
@@ -9,16 +9,33 @@ parent: Developer Overview
GHA workflows are defined under `.github/workflows/`.
## Docker Build
-We have a weekly job defined in `docker_image.yml` to build docker images
based on `Dockerfile.centos7-static-build` and
`Dockerfile.centos8-dynamic-build` for CI verification.
+We have a weekly job defined in `docker_image.yml` to build docker images for
CI verification. The docker files and images are listed below:
+
+file | images | comments
+-- | -- | --
+dev/docker/Dockerfile.centos7-static-build | apache/gluten:vcpkg-centos-7 |
centos 7, static link, jdk8
+dev/docker/Dockerfile.centos8-static-build | apache/gluten:vcpkg-centos-8 |
centos 8, static link, jdk8
+dev/docker/Dockerfile.centos8-dynamic-build | apache/gluten:centos-8-jdk8 |
centos 8, dynamic link, jdk8
+dev/docker/Dockerfile.centos8-dynamic-build | apache/gluten:centos-8-jdk11 |
centos 8, dynamic link, jdk11
+dev/docker/Dockerfile.centos8-dynamic-build | apache/gluten:centos-8-jdk17 |
centos 8, dynamic link, jdk17
+dev/docker/cudf/Dockerfile | apache/gluten:centos-9-jdk8-cudf | centos 9,
dynamic link, jdk8
+
+Docker images can be found from https://hub.docker.com/r/apache/gluten/tags
## Vcpkg Caching
-Gluten main branch is pulled down during docker build. And vcpkg will cache
binary data of all dependencies defined under dev/vcpkg.
+Gluten main branch is pulled down during static build in docker. And vcpkg
will cache binary data of all dependencies defined under dev/vcpkg.
These binary data is cached into `/var/cache/vcpkg` and CI job can re-use them
in new build. By setting `VCPKG_BINARY_SOURCES=clear` in env.,
reusing vcpkg cache can be disabled.
## Arrow Libs Pre-installation
Arrow libs are pre-installed in docker, assuming they are not actively
changed, then not necessarily to be re-built every time.
+## .M2 cache
+The dependency libraries are pre installed in to /root/.m2 by `mvn
dependency:go-offline` Spark is set to 3.5 by default.
+
+## Ccache
+Since the docker image is rebuilt weekly, the ccache mostly are outdated. So
the cache is removed from the image.
+
## Updating Docker Image
Two GitHub secrets `DOCKERHUB_USER` & `DOCKERHUB_TOKEN` can be used to push
docker image to docker hub: https://hub.docker.com/r/apache/gluten/tags.
Note GitHub secrets are not retrievable in PR from forked repo.
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]