This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 072ba7e669 [GLUTEN-7535][VL] Containerized build within CentOS 7 image
(#7538)
072ba7e669 is described below
commit 072ba7e6690d514a7575a54615507840d098980e
Author: Hongze Zhang <[email protected]>
AuthorDate: Wed Oct 16 10:03:06 2024 +0800
[GLUTEN-7535][VL] Containerized build within CentOS 7 image (#7538)
Closes #7535
---
tools/gluten-te/centos/buildenv.sh | 4 +
tools/gluten-te/centos/centos-7-deps.sh | 108 +++++++++++++++------
tools/gluten-te/centos/centos-8-deps.sh | 13 ++-
tools/gluten-te/centos/defaults.conf | 5 +-
tools/gluten-te/centos/dockerfile-buildenv | 35 +++----
.../buildhere-veloxbe-portable-libs/README.md | 15 ++-
.../run-default.sh} | 13 +--
.../buildhere-veloxbe-portable-libs/run.sh | 2 +
.../scripts/all.sh} | 35 ++++++-
tools/gluten-te/ubuntu/buildenv.sh | 4 +
tools/gluten-te/ubuntu/defaults.conf | 3 +
tools/gluten-te/ubuntu/dockerfile-buildenv | 20 ++--
.../buildhere-veloxbe-portable-libs/README.md | 13 +--
.../buildhere-veloxbe-portable-libs/run.sh | 2 +
14 files changed, 172 insertions(+), 100 deletions(-)
diff --git a/tools/gluten-te/centos/buildenv.sh
b/tools/gluten-te/centos/buildenv.sh
index 91e90618bd..0806d58e18 100755
--- a/tools/gluten-te/centos/buildenv.sh
+++ b/tools/gluten-te/centos/buildenv.sh
@@ -33,6 +33,9 @@ HTTP_PROXY_PORT=${HTTP_PROXY_PORT:-$DEFAULT_HTTP_PROXY_PORT}
# If on, use maven mirror settings for PRC's network environment
USE_ALI_MAVEN_MIRROR=${USE_ALI_MAVEN_MIRROR:-$DEFAULT_USE_ALI_MAVEN_MIRROR}
+# Whether to build Spark binaries in buildenv image
+BUILD_SPARK_BINARIES=${BUILD_SPARK_BINARIES:-$DEFAULT_BUILD_SPARK_BINARIES}
+
# Set timezone name
TIMEZONE=${TIMEZONE:-$DEFAULT_TIMEZONE}
@@ -55,6 +58,7 @@ BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS
--build-arg OS_IMAGE_NAM
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg
MAVEN_MIRROR_URL=$MAVEN_MIRROR_URL"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg
HTTP_PROXY_HOST=$HTTP_PROXY_HOST"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg
HTTP_PROXY_PORT=$HTTP_PROXY_PORT"
+BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg
BUILD_SPARK_BINARIES=$BUILD_SPARK_BINARIES"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -f
$BASEDIR/dockerfile-buildenv"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --target
gluten-buildenv"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -t
$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE"
diff --git a/tools/gluten-te/centos/centos-7-deps.sh
b/tools/gluten-te/centos/centos-7-deps.sh
index 4971efc945..3b23c03382 100755
--- a/tools/gluten-te/centos/centos-7-deps.sh
+++ b/tools/gluten-te/centos/centos-7-deps.sh
@@ -16,34 +16,82 @@
set -ex
-#CENTOS_MIRROR_URL=https://mirrors.edge.kernel.org/centos
-#CENTOS_MIRROR_GPGKEY="${CENTOS_MIRROR_URL}/RPM-GPG-KEY-CentOS-7"
-#
-#cp /etc/yum.repos.d/CentOS-Base.repo /tmp/CentOS-Base.repo
-#sed -i "/^mirrorlist/d;s/^\#baseurl=/baseurl=/" /tmp/CentOS-Base.repo
-#sed -i "s|^gpgkey=.*$|gpgkey=${CENTOS_MIRROR_GPGKEY}|" /tmp/CentOS-Base.repo
-#sed -i "s|http://mirror.centos.org/centos|${CENTOS_MIRROR_URL}|"
/tmp/CentOS-Base.repo
-#rm /etc/yum.repos.d/*
-#mv /tmp/CentOS-Base.repo /etc/yum.repos.d/
-
-sed -e 's|^mirrorlist=|#mirrorlist=|g' \
- -e
's|^#baseurl=http://mirror.centos.org/centos|baseurl=https://mirrors.ustc.edu.cn/centos|g'
\
- -i.bak \
- /etc/yum.repos.d/CentOS-Base.repo
-
-# Disable fastestmirror
-sed -i "s/enabled=1/enabled=0/" /etc/yum/pluginconf.d/fastestmirror.conf
-
-yum -y install epel-release centos-release-scl
+sed -i -e "s/enabled=1/enabled=0/" /etc/yum/pluginconf.d/fastestmirror.conf
+sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-*
+sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-*
+yum install -y centos-release-scl
+rm -f /etc/yum.repos.d/CentOS-SCLo-scl.repo
+sed -i \
+ -e 's/^mirrorlist/#mirrorlist/' \
+ -e 's/^#baseurl/baseurl/' \
+ -e 's/mirror\.centos\.org/vault.centos.org/' \
+ /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo
+
+yum -y install epel-release
yum -y install \
- git \
- dnf \
- cmake3 \
- ccache \
- devtoolset-9 \
- java-1.8.0-openjdk \
- java-1.8.0-openjdk-devel \
- ninja-build \
- wget
-
-ln -s /usr/bin/cmake3 /usr/local/bin/cmake
+ wget curl tar zip unzip which patch sudo \
+ ninja-build perl-IPC-Cmd autoconf autoconf-archive automake libtool \
+ devtoolset-9 python3 pip dnf \
+ bison \
+ java-1.8.0-openjdk java-1.8.0-openjdk-devel
+
+# Link c++ to the one in devtoolset.
+ln -s /opt/rh/devtoolset-9/root/usr/bin/c++ /usr/bin/c++
+
+pip3 install --upgrade pip
+
+# cmake >= 3.28.3
+pip3 install cmake==3.28.3
+
+# git >= 2.7.4
+if [[ "$(git --version)" != "git version 2."* ]]; then
+ [ -f /etc/yum.repos.d/ius.repo ] || yum -y install
https://repo.ius.io/ius-release-el7.rpm
+ yum -y remove git
+ yum -y install git236
+fi
+
+# flex>=2.6.0
+if [[ "$(PATH="/usr/local/bin:$PATH" flex --version 2>&1)" != "flex 2.6."* ]];
then
+ yum -y install gettext-devel
+
FLEX_URL="https://github.com/westes/flex/releases/download/v2.6.4/flex-2.6.4.tar.gz"
+ mkdir -p /tmp/flex
+ wget -q --max-redirect 3 -O - "${FLEX_URL}" | tar -xz -C /tmp/flex
--strip-components=1
+ cd /tmp/flex
+ ./autogen.sh
+ ./configure
+ make install
+ cd
+ rm -rf /tmp/flex
+fi
+
+# automake>=1.14
+installed_automake_version="$(aclocal --version | sed -En "1s/^.*
([1-9\.]*)$/\1/p")"
+if [ "$(semver "$installed_automake_version")" -lt "$(semver 1.14)" ]; then
+ mkdir -p /tmp/automake
+ wget -O - http://ftp.gnu.org/gnu/automake/automake-1.16.5.tar.xz | tar -x
--xz -C /tmp/automake --strip-components=1
+ cd /tmp/automake
+ ./configure
+ make install -j
+ cd
+ rm -rf /tmp/automake
+
+ # Fix aclocal search path
+ echo /usr/share/aclocal >/usr/local/share/aclocal/dirlist
+fi
+
+# cmake
+if [ -z "$(which mvn)" ]; then
+ maven_version=3.9.2
+ maven_install_dir=/opt/maven-$maven_version
+ if [ -d /opt/maven-$maven_version ]; then
+ echo "Failed to install maven: ${maven_install_dir} is exists" >&2
+ exit 1
+ fi
+
+ cd /tmp
+ wget
https://archive.apache.org/dist/maven/maven-3/$maven_version/binaries/apache-maven-$maven_version-bin.tar.gz
+ tar -xvf apache-maven-$maven_version-bin.tar.gz
+ rm -f apache-maven-$maven_version-bin.tar.gz
+ mv apache-maven-$maven_version "${maven_install_dir}"
+ ln -s "${maven_install_dir}/bin/mvn" /usr/local/bin/mvn
+fi
diff --git a/tools/gluten-te/centos/centos-8-deps.sh
b/tools/gluten-te/centos/centos-8-deps.sh
index f76db6df5e..65a5fd0545 100755
--- a/tools/gluten-te/centos/centos-8-deps.sh
+++ b/tools/gluten-te/centos/centos-8-deps.sh
@@ -16,13 +16,12 @@
set -ex
-sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-*
-# The connection to vault.centos.org in CI is unstable
-# sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-*
-minorver=8.5.2111
-sed -i -e \
-
"s|^#baseurl=http://mirror.centos.org/\$contentdir/\$releasever|baseurl=https://mirrors.aliyun.com/centos-vault/$minorver|g"
\
- /etc/yum.repos.d/CentOS-*.repo
+# TODO, use CentOS 7 instead.
+echo " This script is out of date!"
+exit 1
+
+sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-*
+sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-*
dnf install -y epel-release sudo
yum -y update && yum clean all && yum install -y dnf-plugins-core
diff --git a/tools/gluten-te/centos/defaults.conf
b/tools/gluten-te/centos/defaults.conf
index 1213ff66d6..c39b396ba4 100755
--- a/tools/gluten-te/centos/defaults.conf
+++ b/tools/gluten-te/centos/defaults.conf
@@ -31,11 +31,14 @@ DEFAULT_HTTP_PROXY_PORT=
# If on, use maven mirror settings for PRC's network environment
DEFAULT_USE_ALI_MAVEN_MIRROR=OFF
+# Whether to build Spark binaries in buildenv image
+DEFAULT_BUILD_SPARK_BINARIES=OFF
+
# Base operator system image used in build scripts.
DEFAULT_OS_IMAGE_NAME=centos
# Version ID of os image
-DEFAULT_OS_IMAGE_TAG=8
+DEFAULT_OS_IMAGE_TAG=7
# Set timezone name
DEFAULT_TIMEZONE=Asia/Shanghai
diff --git a/tools/gluten-te/centos/dockerfile-buildenv
b/tools/gluten-te/centos/dockerfile-buildenv
index 5bd2ed74a1..d3b9eb1eae 100755
--- a/tools/gluten-te/centos/dockerfile-buildenv
+++ b/tools/gluten-te/centos/dockerfile-buildenv
@@ -51,35 +51,22 @@ RUN /tmp/deps.sh \
&& dnf clean all \
&& rm -rf /var/cache/yum
-# Install deps from url
-ENV PATH="$PATH:/usr/lib/jvm/java-1.8.0-openjdk/bin"
-RUN wget
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
&& \
- tar -xvf apache-maven-3.8.8-bin.tar.gz && \
- rm apache-maven-3.8.8-bin.tar.gz && \
- mv apache-maven-3.8.8 /usr/lib/maven
+# Spark binaries
+WORKDIR /opt
+ARG BUILD_SPARK_BINARIES
# Build & install Spark 3.2.2
-RUN cd /opt && wget
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz
-RUN cd /opt && mkdir spark322 && tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C
spark322 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then wget
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz;
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark322 && tar -xvf
spark-3.2.2-bin-hadoop3.2.tgz -C spark322 --strip-components=1; fi
# Build & install Spark 3.3.1
-RUN cd /opt && wget
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark331 && tar -xvf spark-3.3.1-bin-hadoop3.tgz -C
spark331 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then wget
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz;
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark331 && tar -xvf
spark-3.3.1-bin-hadoop3.tgz -C spark331 --strip-components=1; fi
# Build & install Spark 3.4.3
-RUN cd /opt && wget
https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark342 && tar -xvf spark-3.4.3-bin-hadoop3.tgz -C
spark342 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then
https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz;
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark343 && tar -xvf
spark-3.4.3-bin-hadoop3.tgz -C spark343 --strip-components=1; fi
# Build & install Spark 3.5.1
-RUN cd /opt && wget
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark351 && tar -xvf spark-3.5.1-bin-hadoop3.tgz -C
spark351 --strip-components=1
-
-ENV PATH="$PATH:/usr/lib/maven/bin"
-ENV
LD_LIBRARY_PATH=/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib:/lib64:/lib
-
-# Velox setup scripts require sudo
-RUN yum -y install sudo \
- && yum clean all
-RUN echo '%wheel ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
-
-COPY scripts/env.sh /env.sh
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz;
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark351 && tar -xvf
spark-3.5.1-bin-hadoop3.tgz -C spark351 --strip-components=1; fi
diff --git
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/README.md
similarity index 65%
copy from
tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
copy to
tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/README.md
index 28e955dac6..5574dd0d72 100644
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
+++ b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/README.md
@@ -1,4 +1,4 @@
-# Utility for building C++ libs in container
+# Utility for building C++ libs in CentOS 7 (with glibc 2.17) container
The folder contains script code to build `libvelox.so` and `libgluten.so` in
docker container and for host use.
@@ -11,26 +11,23 @@ The folder contains script code to build `libvelox.so` and
`libgluten.so` in doc
## Usage
```sh
-# 1. Set the following envs in case you are behind http proxy.
+# 1. (Optional) Set the following envs in case you are behind http proxy.
export HTTP_PROXY_HOST=myproxy.example.com
export HTTP_PROXY_PORT=55555
-# 2. Set the following env to install Gluten's modified Arrow Jars on host.
-export MOUNT_MAVEN_CACHE=ON
-
-# 3. Build the C++ libs in a ubuntu 20.04 docker container.
+# 2. Build the C++ libs in a centos 7 docker container.
# Note, this command could take much longer time to finish if it's never run
before.
# After the first run, the essential build environment will be cached in
docker builder.
#
# Additionally, changes to HTTP_PROXY_HOST / HTTP_PROXY_PORT could invalidate
the build cache
# either. For more details, please check docker file `dockerfile-buildenv`.
cd gluten/
-tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
+tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run-default.sh
-# 4. Check the built libs.
+# 3. Check the built libs.
ls -l cpp/build/releases/
-# 5. If you intend to build Gluten's bundled jar, continue running subsequent
Maven commands.
+# 4. If you intend to build Gluten's bundled jar, continue running subsequent
Maven commands.
# For example:
mvn clean install -P spark-3.4,backends-velox -DskipTests
```
\ No newline at end of file
diff --git a/tools/gluten-te/centos/scripts/env.sh
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run-default.sh
old mode 100644
new mode 100755
similarity index 75%
rename from tools/gluten-te/centos/scripts/env.sh
rename to
tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run-default.sh
index 0782e97f8e..2648725ce0
--- a/tools/gluten-te/centos/scripts/env.sh
+++
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run-default.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
@@ -13,12 +13,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-# shellcheck disable=SC1091,SC2155
-if [ -f "/opt/rh/gcc-toolset-9/enable" ]; then
- . /opt/rh/gcc-toolset-9/enable
-elif [ -f "/opt/rh/devtoolset-9/enable" ]; then # CentOS 7
- . /opt/rh/devtoolset-9/enable
-fi
+set -ex
-export MAKEFLAGS="-j$(nproc)"
\ No newline at end of file
+BASEDIR=$(readlink -f $(dirname $0))
+
+$BASEDIR/run.sh --enable_vcpkg=ON --build_tests=OFF --build_benchmarks=OFF
--enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
diff --git
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run.sh
similarity index 91%
copy from tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
copy to tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run.sh
index 4d28d45211..51dbb93a8c 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+++ b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run.sh
@@ -20,6 +20,8 @@ BASEDIR=$(readlink -f $(dirname $0))
TIMESTAMP=$(date +%s)
+# Set the following env to install Gluten's modified Arrow Jars on host.
+export MOUNT_MAVEN_CACHE=ON
export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP
-v $BASEDIR/scripts:/opt/scripts"
BASH_ARGS="$*"
diff --git
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
similarity index 56%
copy from tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
copy to
tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
index 4d28d45211..f1f57e97c1 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+++
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
@@ -16,12 +16,37 @@
set -ex
-BASEDIR=$(readlink -f $(dirname $0))
+export NUM_THREADS=$(nproc)
+export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)
-TIMESTAMP=$(date +%s)
+# Retry code copied from https://unix.stackexchange.com/a/137639.
+function fail {
+ echo $1 >&2
+ exit 1
+}
-export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP
-v $BASEDIR/scripts:/opt/scripts"
+function retry {
+ local n=1
+ local max=5
+ local delay=15
+ while true; do
+ "$@" && break || {
+ if [[ $n -lt $max ]]; then
+ ((n++))
+ echo "Command failed. Attempt $n/$max:"
+ sleep $delay;
+ else
+ fail "The command has failed after $n attempts."
+ fi
+ }
+ done
+}
-BASH_ARGS="$*"
+# FIXME: Works only in CentOS 7
+source /opt/rh/devtoolset-9/enable
-$BASEDIR/../../cbash-mount.sh "/opt/scripts/all.sh $BASH_ARGS"
+cd /opt/gluten
+
+BASH_ARGS=$@
+
+retry dev/builddeps-veloxbe.sh $BASH_ARGS
diff --git a/tools/gluten-te/ubuntu/buildenv.sh
b/tools/gluten-te/ubuntu/buildenv.sh
index e96a0faba9..4c552ba117 100755
--- a/tools/gluten-te/ubuntu/buildenv.sh
+++ b/tools/gluten-te/ubuntu/buildenv.sh
@@ -33,6 +33,9 @@ HTTP_PROXY_PORT=${HTTP_PROXY_PORT:-$DEFAULT_HTTP_PROXY_PORT}
# If on, use maven mirror settings for PRC's network environment
USE_ALI_MAVEN_MIRROR=${USE_ALI_MAVEN_MIRROR:-$DEFAULT_USE_ALI_MAVEN_MIRROR}
+# Whether to build Spark binaries in buildenv image
+BUILD_SPARK_BINARIES=${BUILD_SPARK_BINARIES:-$DEFAULT_BUILD_SPARK_BINARIES}
+
# Set timezone name
TIMEZONE=${TIMEZONE:-$DEFAULT_TIMEZONE}
@@ -54,6 +57,7 @@ BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS
--build-arg OS_IMAGE_NAM
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg
MAVEN_MIRROR_URL=$MAVEN_MIRROR_URL"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg
HTTP_PROXY_HOST=$HTTP_PROXY_HOST"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg
HTTP_PROXY_PORT=$HTTP_PROXY_PORT"
+BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg
BUILD_SPARK_BINARIES=$BUILD_SPARK_BINARIES"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -f
$BASEDIR/dockerfile-buildenv"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --target
gluten-buildenv"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -t
$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE"
diff --git a/tools/gluten-te/ubuntu/defaults.conf
b/tools/gluten-te/ubuntu/defaults.conf
index 177c1073b9..393b23a473 100644
--- a/tools/gluten-te/ubuntu/defaults.conf
+++ b/tools/gluten-te/ubuntu/defaults.conf
@@ -31,6 +31,9 @@ DEFAULT_HTTP_PROXY_PORT=
# If on, use maven mirror settings for PRC's network environment
DEFAULT_USE_ALI_MAVEN_MIRROR=OFF
+# Whether to build Spark binaries in buildenv image
+DEFAULT_BUILD_SPARK_BINARIES=OFF
+
# Base operator system image used in build scripts.
DEFAULT_OS_IMAGE_NAME=ubuntu
diff --git a/tools/gluten-te/ubuntu/dockerfile-buildenv
b/tools/gluten-te/ubuntu/dockerfile-buildenv
index 1449ed46a0..47c7b80d86 100644
--- a/tools/gluten-te/ubuntu/dockerfile-buildenv
+++ b/tools/gluten-te/ubuntu/dockerfile-buildenv
@@ -111,21 +111,25 @@ RUN cd /opt && wget
https://github.com/Kitware/CMake/releases/download/v3.28.3/c
RUN cmake --version
+# Spark binaries
+WORKDIR /opt
+ARG BUILD_SPARK_BINARIES
+
# Build & install Spark 3.2.2
-RUN cd /opt && wget
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz
-RUN cd /opt && mkdir spark322 && tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C
spark322 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then wget
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz;
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark322 && tar -xvf
spark-3.2.2-bin-hadoop3.2.tgz -C spark322 --strip-components=1; fi
# Build & install Spark 3.3.1
-RUN cd /opt && wget
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark331 && tar -xvf spark-3.3.1-bin-hadoop3.tgz -C
spark331 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then wget
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz;
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark331 && tar -xvf
spark-3.3.1-bin-hadoop3.tgz -C spark331 --strip-components=1; fi
# Build & install Spark 3.4.3
-RUN cd /opt && wget
https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark343 && tar -xvf spark-3.4.3-bin-hadoop3.tgz -C
spark343 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then
https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz;
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark343 && tar -xvf
spark-3.4.3-bin-hadoop3.tgz -C spark343 --strip-components=1; fi
# Build & install Spark 3.5.1
-RUN cd /opt && wget
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark351 && tar -xvf spark-3.5.1-bin-hadoop3.tgz -C
spark351 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz;
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark351 && tar -xvf
spark-3.5.1-bin-hadoop3.tgz -C spark351 --strip-components=1; fi
# Prepare entry command
COPY scripts/cmd.sh /root/.cmd.sh
diff --git
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
index 28e955dac6..8660265e35 100644
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
@@ -1,4 +1,4 @@
-# Utility for building C++ libs in container
+# Utility for building C++ libs in Ubuntu 20.04 (with glibc 2.31) container
The folder contains script code to build `libvelox.so` and `libgluten.so` in
docker container and for host use.
@@ -11,14 +11,11 @@ The folder contains script code to build `libvelox.so` and
`libgluten.so` in doc
## Usage
```sh
-# 1. Set the following envs in case you are behind http proxy.
+# 1. (Optional) Set the following envs in case you are behind http proxy.
export HTTP_PROXY_HOST=myproxy.example.com
export HTTP_PROXY_PORT=55555
-# 2. Set the following env to install Gluten's modified Arrow Jars on host.
-export MOUNT_MAVEN_CACHE=ON
-
-# 3. Build the C++ libs in a ubuntu 20.04 docker container.
+# 2. Build the C++ libs in a ubuntu 20.04 docker container.
# Note, this command could take much longer time to finish if it's never run
before.
# After the first run, the essential build environment will be cached in
docker builder.
#
@@ -27,10 +24,10 @@ export MOUNT_MAVEN_CACHE=ON
cd gluten/
tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
-# 4. Check the built libs.
+# 3. Check the built libs.
ls -l cpp/build/releases/
-# 5. If you intend to build Gluten's bundled jar, continue running subsequent
Maven commands.
+# 4. If you intend to build Gluten's bundled jar, continue running subsequent
Maven commands.
# For example:
mvn clean install -P spark-3.4,backends-velox -DskipTests
```
\ No newline at end of file
diff --git
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
index 4d28d45211..51dbb93a8c 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
@@ -20,6 +20,8 @@ BASEDIR=$(readlink -f $(dirname $0))
TIMESTAMP=$(date +%s)
+# Set the following env to install Gluten's modified Arrow Jars on host.
+export MOUNT_MAVEN_CACHE=ON
export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP
-v $BASEDIR/scripts:/opt/scripts"
BASH_ARGS="$*"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]