This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new a71e609a3 [VL] Minor improvements on gluten-it / gluten-te toolchains
(#6476)
a71e609a3 is described below
commit a71e609a34507983532eabac19a83c5bf4c2246e
Author: Hongze Zhang <[email protected]>
AuthorDate: Thu Jul 18 09:12:26 2024 +0800
[VL] Minor improvements on gluten-it / gluten-te toolchains (#6476)
---
.../org/apache/gluten/integration/command/SparkRunModes.java | 3 +++
tools/gluten-it/pom.xml | 6 +++++-
tools/gluten-it/sbin/gluten-it.sh | 8 ++++++++
tools/gluten-it/spark-home/jars | 1 +
tools/gluten-te/centos/shared.sh | 7 +++++++
tools/gluten-te/ubuntu/dockerfile-buildenv | 10 +++++++++-
.../ubuntu/examples/buildhere-veloxbe-portable-libs/README.md | 9 ++++++---
.../buildhere-veloxbe-portable-libs/{run.sh => run-default.sh} | 6 +-----
.../ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh | 4 +++-
.../examples/buildhere-veloxbe-portable-libs/scripts/all.sh | 8 +++++---
tools/gluten-te/ubuntu/shared.sh | 7 +++++++
11 files changed, 55 insertions(+), 14 deletions(-)
diff --git
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
index cfd3848d8..56ef68db9 100644
---
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
+++
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
@@ -129,6 +129,9 @@ public final class SparkRunModes {
if (!System.getenv().containsKey("SPARK_HOME")) {
throw new IllegalArgumentException("SPARK_HOME not set! Please use
--local if there is no local Spark build");
}
+ if (!System.getenv().containsKey("SPARK_SCALA_VERSION")) {
+ throw new IllegalArgumentException("SPARK_SCALA_VERSION not set!
Please set it first or use --local instead. Example: export
SPARK_SCALA_VERSION=2.12");
+ }
return String.format("local-cluster[%d,%d,%d]", lcWorkers,
lcWorkerCores, Utils.byteStringAsMb(lcWorkerMem));
}
diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml
index c092a0ebb..a65324f27 100644
--- a/tools/gluten-it/pom.xml
+++ b/tools/gluten-it/pom.xml
@@ -17,7 +17,7 @@
<java.version>1.8</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
- <scala.library.version>2.12.15</scala.library.version>
+ <scala.library.version>2.12.17</scala.library.version>
<spark.version>3.4.2</spark.version>
<scala.binary.version>2.12</scala.binary.version>
<spark.major.version>3</spark.major.version>
@@ -144,24 +144,28 @@
</activation>
<properties>
<spark.version>3.2.2</spark.version>
+ <scala.library.version>2.12.15</scala.library.version>
</properties>
</profile>
<profile>
<id>spark-3.3</id>
<properties>
<spark.version>3.3.1</spark.version>
+ <scala.library.version>2.12.15</scala.library.version>
</properties>
</profile>
<profile>
<id>spark-3.4</id>
<properties>
<spark.version>3.4.2</spark.version>
+ <scala.library.version>2.12.17</scala.library.version>
</properties>
</profile>
<profile>
<id>spark-3.5</id>
<properties>
<spark.version>3.5.1</spark.version>
+ <scala.library.version>2.12.18</scala.library.version>
</properties>
</profile>
<profile>
diff --git a/tools/gluten-it/sbin/gluten-it.sh
b/tools/gluten-it/sbin/gluten-it.sh
index fda117417..b21038ccd 100755
--- a/tools/gluten-it/sbin/gluten-it.sh
+++ b/tools/gluten-it/sbin/gluten-it.sh
@@ -28,6 +28,14 @@ fi
JAR_PATH=$LIB_DIR/*
+EMBEDDED_SPARK_HOME=$BASEDIR/../spark-home
+
+export SPARK_HOME=${SPARK_HOME:-$EMBEDDED_SPARK_HOME}
+export SPARK_SCALA_VERSION=${SPARK_SCALA_VERSION:-'2.12'}
+
+echo "SPARK_HOME set at [$SPARK_HOME]."
+echo "SPARK_SCALA_VERSION set at [$SPARK_SCALA_VERSION]."
+
$JAVA_HOME/bin/java $GLUTEN_IT_JVM_ARGS \
-XX:+IgnoreUnrecognizedVMOptions \
--add-opens=java.base/java.lang=ALL-UNNAMED \
diff --git a/tools/gluten-it/spark-home/jars b/tools/gluten-it/spark-home/jars
new file mode 120000
index 000000000..2939305ca
--- /dev/null
+++ b/tools/gluten-it/spark-home/jars
@@ -0,0 +1 @@
+../package/target/lib
\ No newline at end of file
diff --git a/tools/gluten-te/centos/shared.sh b/tools/gluten-te/centos/shared.sh
index d14b35bf9..0253c16ce 100755
--- a/tools/gluten-te/centos/shared.sh
+++ b/tools/gluten-te/centos/shared.sh
@@ -24,6 +24,13 @@ source "$SHARED_BASEDIR/defaults.conf"
export DOCKER_BUILDKIT=1
export BUILDKIT_PROGRESS=plain
+# Validate envs
+if [ -z "$HOME" ]
+then
+ echo 'Environment variable $HOME not found. Aborting.'
+ exit 1
+fi
+
# Set operating system
OS_IMAGE_NAME=${OS_IMAGE_NAME:-$DEFAULT_OS_IMAGE_NAME}
diff --git a/tools/gluten-te/ubuntu/dockerfile-buildenv
b/tools/gluten-te/ubuntu/dockerfile-buildenv
index 41fc20239..e520fd295 100644
--- a/tools/gluten-te/ubuntu/dockerfile-buildenv
+++ b/tools/gluten-te/ubuntu/dockerfile-buildenv
@@ -65,7 +65,7 @@ RUN cat /root/.m2/settings.xml
## APT dependencies
# Update, then install essentials
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo
locales wget tar tzdata git ccache cmake ninja-build build-essential
llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev
libboost-all-dev libcurl4-openssl-dev
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo
locales wget tar tzdata git ccache ninja-build build-essential llvm-11-dev
clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev
libboost-all-dev libcurl4-openssl-dev curl zip unzip tar pkg-config
autoconf-archive bison flex
# install HBM dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y
autoconf automake g++ libnuma-dev libtool numactl unzip libdaxctl-dev
@@ -103,6 +103,14 @@ RUN set-login-env "LANG=en_US.UTF-8"
RUN set-login-env "LANGUAGE=en_US:en"
RUN set-login-env "LC_ALL=en_US.UTF-8"
+# Install CMake
+RUN cd /opt && wget
https://github.com/Kitware/CMake/releases/download/v3.28.3/cmake-3.28.3-linux-x86_64.sh
\
+ && mkdir cmake \
+ && bash cmake-3.28.3-linux-x86_64.sh --skip-license --prefix=/opt/cmake \
+ && ln -s /opt/cmake/bin/cmake /usr/bin/cmake
+
+RUN cmake --version
+
# Build & install Spark 3.2.2
RUN cd /opt && wget
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz
RUN cd /opt && mkdir spark322 && tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C
spark322 --strip-components=1
diff --git
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
index 27e97467d..cd76e74e7 100644
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
@@ -15,7 +15,10 @@ The folder contains script code to build `libvelox.so` and
`libgluten.so` in doc
export HTTP_PROXY_HOST=myproxy.example.com
export HTTP_PROXY_PORT=55555
-# 2. Build the C++ libs in a ubuntu 20.04 docker container.
+# 2. Set the following env to install Gluten's modified Arrow Jars on host.
+export MOUNT_MAVEN_CACHE=ON
+
+# 3. Build the C++ libs in a ubuntu 20.04 docker container.
# Note, this command could take much longer time to finish if it's never run
before.
# After the first run, the essential build environment will be cached in
docker builder.
#
@@ -24,10 +27,10 @@ export HTTP_PROXY_PORT=55555
cd gluten/
tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
-# 3. Check the built libs.
+# 4. Check the built libs.
ls -l cpp/build/releases/
-# 4. If you intend to build Gluten's bundled jar, continue running subsequent
Maven commands.
+# 5. If you intend to build Gluten's bundled jar, continue running subsequent
Maven commands.
# For example:
mvn clean install -P spark-3.4,backends-velox -DskipTests
```
\ No newline at end of file
diff --git
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
similarity index 81%
copy from tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
copy to
tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
index 8a0f71bbc..2648725ce 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+++
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
@@ -18,8 +18,4 @@ set -ex
BASEDIR=$(readlink -f $(dirname $0))
-TIMESTAMP=$(date +%s)
-
-export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP
-v $BASEDIR/scripts:/opt/scripts"
-
-$BASEDIR/../../cbash-mount.sh '/opt/scripts/all.sh'
+$BASEDIR/run.sh --enable_vcpkg=ON --build_tests=OFF --build_benchmarks=OFF
--enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
diff --git
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
index 8a0f71bbc..4d28d4521 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
@@ -22,4 +22,6 @@ TIMESTAMP=$(date +%s)
export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP
-v $BASEDIR/scripts:/opt/scripts"
-$BASEDIR/../../cbash-mount.sh '/opt/scripts/all.sh'
+BASH_ARGS="$*"
+
+$BASEDIR/../../cbash-mount.sh "/opt/scripts/all.sh $BASH_ARGS"
diff --git
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
index 267423552..18dd92a34 100755
---
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
+++
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
@@ -44,6 +44,8 @@ function retry {
cd /opt/gluten
retry apt-get update
-retry apt-get install -y curl zip unzip tar pkg-config autoconf-archive bison
flex
-retry source ./dev/vcpkg/env.sh
-retry dev/builddeps-veloxbe.sh --build_tests=OFF --build_benchmarks=OFF
--enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
+retry apt-get install -y --dry-run # We now have all essentials installed in
image.
+
+BASH_ARGS=$@
+
+retry dev/builddeps-veloxbe.sh $BASH_ARGS
diff --git a/tools/gluten-te/ubuntu/shared.sh b/tools/gluten-te/ubuntu/shared.sh
index d14b35bf9..0253c16ce 100755
--- a/tools/gluten-te/ubuntu/shared.sh
+++ b/tools/gluten-te/ubuntu/shared.sh
@@ -24,6 +24,13 @@ source "$SHARED_BASEDIR/defaults.conf"
export DOCKER_BUILDKIT=1
export BUILDKIT_PROGRESS=plain
+# Validate envs
+if [ -z "$HOME" ]
+then
+ echo 'Environment variable $HOME not found. Aborting.'
+ exit 1
+fi
+
# Set operating system
OS_IMAGE_NAME=${OS_IMAGE_NAME:-$DEFAULT_OS_IMAGE_NAME}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]