This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 072ba7e669 [GLUTEN-7535][VL] Containerized build within CentOS 7 image 
(#7538)
072ba7e669 is described below

commit 072ba7e6690d514a7575a54615507840d098980e
Author: Hongze Zhang <[email protected]>
AuthorDate: Wed Oct 16 10:03:06 2024 +0800

    [GLUTEN-7535][VL] Containerized build within CentOS 7 image (#7538)
    
    Closes #7535
---
 tools/gluten-te/centos/buildenv.sh                 |   4 +
 tools/gluten-te/centos/centos-7-deps.sh            | 108 +++++++++++++++------
 tools/gluten-te/centos/centos-8-deps.sh            |  13 ++-
 tools/gluten-te/centos/defaults.conf               |   5 +-
 tools/gluten-te/centos/dockerfile-buildenv         |  35 +++----
 .../buildhere-veloxbe-portable-libs/README.md      |  15 ++-
 .../run-default.sh}                                |  13 +--
 .../buildhere-veloxbe-portable-libs/run.sh         |   2 +
 .../scripts/all.sh}                                |  35 ++++++-
 tools/gluten-te/ubuntu/buildenv.sh                 |   4 +
 tools/gluten-te/ubuntu/defaults.conf               |   3 +
 tools/gluten-te/ubuntu/dockerfile-buildenv         |  20 ++--
 .../buildhere-veloxbe-portable-libs/README.md      |  13 +--
 .../buildhere-veloxbe-portable-libs/run.sh         |   2 +
 14 files changed, 172 insertions(+), 100 deletions(-)

diff --git a/tools/gluten-te/centos/buildenv.sh 
b/tools/gluten-te/centos/buildenv.sh
index 91e90618bd..0806d58e18 100755
--- a/tools/gluten-te/centos/buildenv.sh
+++ b/tools/gluten-te/centos/buildenv.sh
@@ -33,6 +33,9 @@ HTTP_PROXY_PORT=${HTTP_PROXY_PORT:-$DEFAULT_HTTP_PROXY_PORT}
 # If on, use maven mirror settings for PRC's network environment
 USE_ALI_MAVEN_MIRROR=${USE_ALI_MAVEN_MIRROR:-$DEFAULT_USE_ALI_MAVEN_MIRROR}
 
+# Whether to build Spark binaries in buildenv image
+BUILD_SPARK_BINARIES=${BUILD_SPARK_BINARIES:-$DEFAULT_BUILD_SPARK_BINARIES}
+
 # Set timezone name
 TIMEZONE=${TIMEZONE:-$DEFAULT_TIMEZONE}
 
@@ -55,6 +58,7 @@ BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS 
--build-arg OS_IMAGE_NAM
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg 
MAVEN_MIRROR_URL=$MAVEN_MIRROR_URL"
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg 
HTTP_PROXY_HOST=$HTTP_PROXY_HOST"
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg 
HTTP_PROXY_PORT=$HTTP_PROXY_PORT"
+BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg 
BUILD_SPARK_BINARIES=$BUILD_SPARK_BINARIES"
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -f 
$BASEDIR/dockerfile-buildenv"
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --target 
gluten-buildenv"
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -t 
$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE"
diff --git a/tools/gluten-te/centos/centos-7-deps.sh 
b/tools/gluten-te/centos/centos-7-deps.sh
index 4971efc945..3b23c03382 100755
--- a/tools/gluten-te/centos/centos-7-deps.sh
+++ b/tools/gluten-te/centos/centos-7-deps.sh
@@ -16,34 +16,82 @@
 
 set -ex
 
-#CENTOS_MIRROR_URL=https://mirrors.edge.kernel.org/centos
-#CENTOS_MIRROR_GPGKEY="${CENTOS_MIRROR_URL}/RPM-GPG-KEY-CentOS-7"
-#
-#cp /etc/yum.repos.d/CentOS-Base.repo /tmp/CentOS-Base.repo
-#sed -i "/^mirrorlist/d;s/^\#baseurl=/baseurl=/" /tmp/CentOS-Base.repo
-#sed -i "s|^gpgkey=.*$|gpgkey=${CENTOS_MIRROR_GPGKEY}|" /tmp/CentOS-Base.repo
-#sed -i "s|http://mirror.centos.org/centos|${CENTOS_MIRROR_URL}|" 
/tmp/CentOS-Base.repo
-#rm /etc/yum.repos.d/*
-#mv /tmp/CentOS-Base.repo /etc/yum.repos.d/
-
-sed -e 's|^mirrorlist=|#mirrorlist=|g' \
-         -e 
's|^#baseurl=http://mirror.centos.org/centos|baseurl=https://mirrors.ustc.edu.cn/centos|g'
 \
-         -i.bak \
-         /etc/yum.repos.d/CentOS-Base.repo
-
-# Disable fastestmirror
-sed -i "s/enabled=1/enabled=0/" /etc/yum/pluginconf.d/fastestmirror.conf 
-
-yum -y install epel-release centos-release-scl
+sed -i -e "s/enabled=1/enabled=0/" /etc/yum/pluginconf.d/fastestmirror.conf
+sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-*
+sed -i -e 
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" 
/etc/yum.repos.d/CentOS-*
+yum install -y centos-release-scl
+rm -f /etc/yum.repos.d/CentOS-SCLo-scl.repo
+sed -i \
+  -e 's/^mirrorlist/#mirrorlist/' \
+  -e 's/^#baseurl/baseurl/' \
+  -e 's/mirror\.centos\.org/vault.centos.org/' \
+  /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo
+
+yum -y install epel-release
 yum -y install \
-    git \
-    dnf \
-    cmake3 \
-    ccache \
-    devtoolset-9 \
-    java-1.8.0-openjdk \
-    java-1.8.0-openjdk-devel \
-    ninja-build \
-    wget
-
-ln -s /usr/bin/cmake3 /usr/local/bin/cmake
+  wget curl tar zip unzip which patch sudo \
+  ninja-build perl-IPC-Cmd autoconf autoconf-archive automake libtool \
+  devtoolset-9 python3 pip dnf \
+  bison \
+  java-1.8.0-openjdk java-1.8.0-openjdk-devel
+
+# Link c++ to the one in devtoolset.
+ln -s /opt/rh/devtoolset-9/root/usr/bin/c++ /usr/bin/c++
+
+pip3 install --upgrade pip
+
+# cmake >= 3.28.3
+pip3 install cmake==3.28.3
+
+# git >= 2.7.4
+if [[ "$(git --version)" != "git version 2."* ]]; then
+  [ -f /etc/yum.repos.d/ius.repo ] || yum -y install 
https://repo.ius.io/ius-release-el7.rpm
+  yum -y remove git
+  yum -y install git236
+fi
+
+# flex>=2.6.0
+if [[ "$(PATH="/usr/local/bin:$PATH" flex --version 2>&1)" != "flex 2.6."* ]]; 
then
+  yum -y install gettext-devel
+  
FLEX_URL="https://github.com/westes/flex/releases/download/v2.6.4/flex-2.6.4.tar.gz";
+  mkdir -p /tmp/flex
+  wget -q --max-redirect 3 -O - "${FLEX_URL}" | tar -xz -C /tmp/flex 
--strip-components=1
+  cd /tmp/flex
+  ./autogen.sh
+  ./configure
+  make install
+  cd
+  rm -rf /tmp/flex
+fi
+
+# automake>=1.14
+installed_automake_version="$(aclocal --version | sed -En "1s/^.* 
([1-9\.]*)$/\1/p")"
+if [ "$(semver "$installed_automake_version")" -lt "$(semver 1.14)" ]; then
+  mkdir -p /tmp/automake
+  wget -O - http://ftp.gnu.org/gnu/automake/automake-1.16.5.tar.xz | tar -x 
--xz -C /tmp/automake --strip-components=1
+  cd /tmp/automake
+  ./configure
+  make install -j
+  cd
+  rm -rf /tmp/automake
+
+  # Fix aclocal search path
+  echo /usr/share/aclocal >/usr/local/share/aclocal/dirlist
+fi
+
+# cmake
+if [ -z "$(which mvn)" ]; then
+  maven_version=3.9.2
+  maven_install_dir=/opt/maven-$maven_version
+  if [ -d /opt/maven-$maven_version ]; then
+    echo "Failed to install maven: ${maven_install_dir} is exists" >&2
+    exit 1
+  fi
+
+  cd /tmp
+  wget 
https://archive.apache.org/dist/maven/maven-3/$maven_version/binaries/apache-maven-$maven_version-bin.tar.gz
+  tar -xvf apache-maven-$maven_version-bin.tar.gz
+  rm -f apache-maven-$maven_version-bin.tar.gz
+  mv apache-maven-$maven_version "${maven_install_dir}"
+  ln -s "${maven_install_dir}/bin/mvn" /usr/local/bin/mvn
+fi
diff --git a/tools/gluten-te/centos/centos-8-deps.sh 
b/tools/gluten-te/centos/centos-8-deps.sh
index f76db6df5e..65a5fd0545 100755
--- a/tools/gluten-te/centos/centos-8-deps.sh
+++ b/tools/gluten-te/centos/centos-8-deps.sh
@@ -16,13 +16,12 @@
 
 set -ex
 
-sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* 
-# The connection to vault.centos.org in CI is unstable
-# sed -i -e 
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" 
/etc/yum.repos.d/CentOS-*
-minorver=8.5.2111
-sed -i -e \
-  
"s|^#baseurl=http://mirror.centos.org/\$contentdir/\$releasever|baseurl=https://mirrors.aliyun.com/centos-vault/$minorver|g"
 \
-  /etc/yum.repos.d/CentOS-*.repo
+# TODO, use CentOS 7 instead.
+echo " This script is out of date!"
+exit 1
+
+sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-*
+sed -i -e 
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" 
/etc/yum.repos.d/CentOS-*
 
 dnf install -y epel-release sudo
 yum -y update && yum clean all && yum install -y dnf-plugins-core
diff --git a/tools/gluten-te/centos/defaults.conf 
b/tools/gluten-te/centos/defaults.conf
index 1213ff66d6..c39b396ba4 100755
--- a/tools/gluten-te/centos/defaults.conf
+++ b/tools/gluten-te/centos/defaults.conf
@@ -31,11 +31,14 @@ DEFAULT_HTTP_PROXY_PORT=
 # If on, use maven mirror settings for PRC's network environment
 DEFAULT_USE_ALI_MAVEN_MIRROR=OFF
 
+# Whether to build Spark binaries in buildenv image
+DEFAULT_BUILD_SPARK_BINARIES=OFF
+
 # Base operator system image used in build scripts.
 DEFAULT_OS_IMAGE_NAME=centos
 
 # Version ID of os image
-DEFAULT_OS_IMAGE_TAG=8
+DEFAULT_OS_IMAGE_TAG=7
 
 # Set timezone name
 DEFAULT_TIMEZONE=Asia/Shanghai
diff --git a/tools/gluten-te/centos/dockerfile-buildenv 
b/tools/gluten-te/centos/dockerfile-buildenv
index 5bd2ed74a1..d3b9eb1eae 100755
--- a/tools/gluten-te/centos/dockerfile-buildenv
+++ b/tools/gluten-te/centos/dockerfile-buildenv
@@ -51,35 +51,22 @@ RUN /tmp/deps.sh \
   && dnf clean all \
   && rm -rf /var/cache/yum
 
-# Install deps from url
-ENV PATH="$PATH:/usr/lib/jvm/java-1.8.0-openjdk/bin"
-RUN wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
 && \
-    tar -xvf apache-maven-3.8.8-bin.tar.gz && \
-    rm apache-maven-3.8.8-bin.tar.gz && \
-    mv apache-maven-3.8.8 /usr/lib/maven
+# Spark binaries
+WORKDIR /opt
+ARG BUILD_SPARK_BINARIES
 
 # Build & install Spark 3.2.2
-RUN cd /opt && wget 
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz
-RUN cd /opt && mkdir spark322 && tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C 
spark322 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then wget 
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz;
 fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark322 && tar -xvf 
spark-3.2.2-bin-hadoop3.2.tgz -C spark322 --strip-components=1; fi
 
 # Build & install Spark 3.3.1
-RUN cd /opt && wget 
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark331 && tar -xvf spark-3.3.1-bin-hadoop3.tgz -C 
spark331 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then wget 
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz; 
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark331 && tar -xvf 
spark-3.3.1-bin-hadoop3.tgz -C spark331 --strip-components=1; fi
 
 # Build & install Spark 3.4.3
-RUN cd /opt && wget 
https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark342 && tar -xvf spark-3.4.3-bin-hadoop3.tgz -C 
spark342 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then 
https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz; 
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark343 && tar -xvf 
spark-3.4.3-bin-hadoop3.tgz -C spark343 --strip-components=1; fi
 
 # Build & install Spark 3.5.1
-RUN cd /opt && wget 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark351 && tar -xvf spark-3.5.1-bin-hadoop3.tgz -C 
spark351 --strip-components=1
-
-ENV PATH="$PATH:/usr/lib/maven/bin"
-ENV 
LD_LIBRARY_PATH=/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib:/lib64:/lib
-
-# Velox setup scripts require sudo
-RUN yum -y install sudo \
-    && yum clean all
-RUN echo '%wheel ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
-
-COPY scripts/env.sh /env.sh
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz; 
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark351 && tar -xvf 
spark-3.5.1-bin-hadoop3.tgz -C spark351 --strip-components=1; fi
diff --git 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md 
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/README.md
similarity index 65%
copy from 
tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
copy to 
tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/README.md
index 28e955dac6..5574dd0d72 100644
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
+++ b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/README.md
@@ -1,4 +1,4 @@
-# Utility for building C++ libs in container
+# Utility for building C++ libs in CentOS 7 (with glibc 2.17) container
 
 The folder contains script code to build `libvelox.so` and `libgluten.so` in 
docker container and for host use.
 
@@ -11,26 +11,23 @@ The folder contains script code to build `libvelox.so` and 
`libgluten.so` in doc
 ## Usage
 
 ```sh
-# 1. Set the following envs in case you are behind http proxy.
+# 1. (Optional) Set the following envs in case you are behind http proxy.
 export HTTP_PROXY_HOST=myproxy.example.com
 export HTTP_PROXY_PORT=55555
 
-# 2. Set the following env to install Gluten's modified Arrow Jars on host.
-export MOUNT_MAVEN_CACHE=ON
-
-# 3. Build the C++ libs in a ubuntu 20.04 docker container.
+# 2. Build the C++ libs in a centos 7 docker container.
 # Note, this command could take much longer time to finish if it's never run 
before.
 # After the first run, the essential build environment will be cached in 
docker builder.
 #
 # Additionally, changes to HTTP_PROXY_HOST / HTTP_PROXY_PORT could invalidate 
the build cache
 # either. For more details, please check docker file `dockerfile-buildenv`.
 cd gluten/
-tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
+tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run-default.sh
 
-# 4. Check the built libs.
+# 3. Check the built libs.
 ls -l cpp/build/releases/
 
-# 5. If you intend to build Gluten's bundled jar, continue running subsequent 
Maven commands.
+# 4. If you intend to build Gluten's bundled jar, continue running subsequent 
Maven commands.
 # For example:
 mvn clean install -P spark-3.4,backends-velox -DskipTests
 ```
\ No newline at end of file
diff --git a/tools/gluten-te/centos/scripts/env.sh 
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run-default.sh
old mode 100644
new mode 100755
similarity index 75%
rename from tools/gluten-te/centos/scripts/env.sh
rename to 
tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run-default.sh
index 0782e97f8e..2648725ce0
--- a/tools/gluten-te/centos/scripts/env.sh
+++ 
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run-default.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/bash
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
@@ -13,12 +13,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# shellcheck disable=SC1091,SC2155
 
-if [ -f "/opt/rh/gcc-toolset-9/enable" ]; then
-    . /opt/rh/gcc-toolset-9/enable
-elif [ -f "/opt/rh/devtoolset-9/enable" ]; then # CentOS 7
-    . /opt/rh/devtoolset-9/enable
-fi
+set -ex
 
-export MAKEFLAGS="-j$(nproc)"
\ No newline at end of file
+BASEDIR=$(readlink -f $(dirname $0))
+
+$BASEDIR/run.sh --enable_vcpkg=ON --build_tests=OFF --build_benchmarks=OFF 
--enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
diff --git 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh 
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run.sh
similarity index 91%
copy from tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
copy to tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run.sh
index 4d28d45211..51dbb93a8c 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+++ b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/run.sh
@@ -20,6 +20,8 @@ BASEDIR=$(readlink -f $(dirname $0))
 
 TIMESTAMP=$(date +%s)
 
+# Set the following env to install Gluten's modified Arrow Jars on host.
+export MOUNT_MAVEN_CACHE=ON
 export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP 
-v $BASEDIR/scripts:/opt/scripts"
 
 BASH_ARGS="$*"
diff --git 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh 
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
similarity index 56%
copy from tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
copy to 
tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
index 4d28d45211..f1f57e97c1 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+++ 
b/tools/gluten-te/centos/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
@@ -16,12 +16,37 @@
 
 set -ex
 
-BASEDIR=$(readlink -f $(dirname $0))
+export NUM_THREADS=$(nproc)
+export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)
 
-TIMESTAMP=$(date +%s)
+# Retry code copied from https://unix.stackexchange.com/a/137639.
+function fail {
+  echo $1 >&2
+  exit 1
+}
 
-export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP 
-v $BASEDIR/scripts:/opt/scripts"
+function retry {
+  local n=1
+  local max=5
+  local delay=15
+  while true; do
+    "$@" && break || {
+      if [[ $n -lt $max ]]; then
+        ((n++))
+        echo "Command failed. Attempt $n/$max:"
+        sleep $delay;
+      else
+        fail "The command has failed after $n attempts."
+      fi
+    }
+  done
+}
 
-BASH_ARGS="$*"
+# FIXME: Works only in CentOS 7
+source /opt/rh/devtoolset-9/enable
 
-$BASEDIR/../../cbash-mount.sh "/opt/scripts/all.sh $BASH_ARGS"
+cd /opt/gluten
+
+BASH_ARGS=$@
+
+retry dev/builddeps-veloxbe.sh $BASH_ARGS
diff --git a/tools/gluten-te/ubuntu/buildenv.sh 
b/tools/gluten-te/ubuntu/buildenv.sh
index e96a0faba9..4c552ba117 100755
--- a/tools/gluten-te/ubuntu/buildenv.sh
+++ b/tools/gluten-te/ubuntu/buildenv.sh
@@ -33,6 +33,9 @@ HTTP_PROXY_PORT=${HTTP_PROXY_PORT:-$DEFAULT_HTTP_PROXY_PORT}
 # If on, use maven mirror settings for PRC's network environment
 USE_ALI_MAVEN_MIRROR=${USE_ALI_MAVEN_MIRROR:-$DEFAULT_USE_ALI_MAVEN_MIRROR}
 
+# Whether to build Spark binaries in buildenv image
+BUILD_SPARK_BINARIES=${BUILD_SPARK_BINARIES:-$DEFAULT_BUILD_SPARK_BINARIES}
+
 # Set timezone name
 TIMEZONE=${TIMEZONE:-$DEFAULT_TIMEZONE}
 
@@ -54,6 +57,7 @@ BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS 
--build-arg OS_IMAGE_NAM
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg 
MAVEN_MIRROR_URL=$MAVEN_MIRROR_URL"
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg 
HTTP_PROXY_HOST=$HTTP_PROXY_HOST"
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg 
HTTP_PROXY_PORT=$HTTP_PROXY_PORT"
+BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg 
BUILD_SPARK_BINARIES=$BUILD_SPARK_BINARIES"
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -f 
$BASEDIR/dockerfile-buildenv"
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --target 
gluten-buildenv"
 BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -t 
$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE"
diff --git a/tools/gluten-te/ubuntu/defaults.conf 
b/tools/gluten-te/ubuntu/defaults.conf
index 177c1073b9..393b23a473 100644
--- a/tools/gluten-te/ubuntu/defaults.conf
+++ b/tools/gluten-te/ubuntu/defaults.conf
@@ -31,6 +31,9 @@ DEFAULT_HTTP_PROXY_PORT=
 # If on, use maven mirror settings for PRC's network environment
 DEFAULT_USE_ALI_MAVEN_MIRROR=OFF
 
+# Whether to build Spark binaries in buildenv image
+DEFAULT_BUILD_SPARK_BINARIES=OFF
+
 # Base operator system image used in build scripts.
 DEFAULT_OS_IMAGE_NAME=ubuntu
 
diff --git a/tools/gluten-te/ubuntu/dockerfile-buildenv 
b/tools/gluten-te/ubuntu/dockerfile-buildenv
index 1449ed46a0..47c7b80d86 100644
--- a/tools/gluten-te/ubuntu/dockerfile-buildenv
+++ b/tools/gluten-te/ubuntu/dockerfile-buildenv
@@ -111,21 +111,25 @@ RUN cd /opt && wget 
https://github.com/Kitware/CMake/releases/download/v3.28.3/c
 
 RUN cmake --version
 
+# Spark binaries
+WORKDIR /opt
+ARG BUILD_SPARK_BINARIES
+
 # Build & install Spark 3.2.2
-RUN cd /opt && wget 
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz
-RUN cd /opt && mkdir spark322 && tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C 
spark322 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then wget 
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz;
 fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark322 && tar -xvf 
spark-3.2.2-bin-hadoop3.2.tgz -C spark322 --strip-components=1; fi
 
 # Build & install Spark 3.3.1
-RUN cd /opt && wget 
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark331 && tar -xvf spark-3.3.1-bin-hadoop3.tgz -C 
spark331 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then wget 
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz; 
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark331 && tar -xvf 
spark-3.3.1-bin-hadoop3.tgz -C spark331 --strip-components=1; fi
 
 # Build & install Spark 3.4.3
-RUN cd /opt && wget 
https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark343 && tar -xvf spark-3.4.3-bin-hadoop3.tgz -C 
spark343 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then 
https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz; 
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark343 && tar -xvf 
spark-3.4.3-bin-hadoop3.tgz -C spark343 --strip-components=1; fi
 
 # Build & install Spark 3.5.1
-RUN cd /opt && wget 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz
-RUN cd /opt && mkdir spark351 && tar -xvf spark-3.5.1-bin-hadoop3.tgz -C 
spark351 --strip-components=1
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz; 
fi
+RUN if [ "$BUILD_SPARK_BINARIES" = "ON" ]; then mkdir spark351 && tar -xvf 
spark-3.5.1-bin-hadoop3.tgz -C spark351 --strip-components=1; fi
 
 # Prepare entry command
 COPY scripts/cmd.sh /root/.cmd.sh
diff --git 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md 
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
index 28e955dac6..8660265e35 100644
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
@@ -1,4 +1,4 @@
-# Utility for building C++ libs in container
+# Utility for building C++ libs in Ubuntu 20.04 (with glibc 2.31) container
 
 The folder contains script code to build `libvelox.so` and `libgluten.so` in 
docker container and for host use.
 
@@ -11,14 +11,11 @@ The folder contains script code to build `libvelox.so` and 
`libgluten.so` in doc
 ## Usage
 
 ```sh
-# 1. Set the following envs in case you are behind http proxy.
+# 1. (Optional) Set the following envs in case you are behind http proxy.
 export HTTP_PROXY_HOST=myproxy.example.com
 export HTTP_PROXY_PORT=55555
 
-# 2. Set the following env to install Gluten's modified Arrow Jars on host.
-export MOUNT_MAVEN_CACHE=ON
-
-# 3. Build the C++ libs in a ubuntu 20.04 docker container.
+# 2. Build the C++ libs in a ubuntu 20.04 docker container.
 # Note, this command could take much longer time to finish if it's never run 
before.
 # After the first run, the essential build environment will be cached in 
docker builder.
 #
@@ -27,10 +24,10 @@ export MOUNT_MAVEN_CACHE=ON
 cd gluten/
 tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
 
-# 4. Check the built libs.
+# 3. Check the built libs.
 ls -l cpp/build/releases/
 
-# 5. If you intend to build Gluten's bundled jar, continue running subsequent 
Maven commands.
+# 4. If you intend to build Gluten's bundled jar, continue running subsequent 
Maven commands.
 # For example:
 mvn clean install -P spark-3.4,backends-velox -DskipTests
 ```
\ No newline at end of file
diff --git 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh 
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
index 4d28d45211..51dbb93a8c 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
@@ -20,6 +20,8 @@ BASEDIR=$(readlink -f $(dirname $0))
 
 TIMESTAMP=$(date +%s)
 
+# Set the following env to install Gluten's modified Arrow Jars on host.
+export MOUNT_MAVEN_CACHE=ON
 export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP 
-v $BASEDIR/scripts:/opt/scripts"
 
 BASH_ARGS="$*"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to