This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new a71e609a3 [VL] Minor improvements on gluten-it / gluten-te toolchains 
(#6476)
a71e609a3 is described below

commit a71e609a34507983532eabac19a83c5bf4c2246e
Author: Hongze Zhang <[email protected]>
AuthorDate: Thu Jul 18 09:12:26 2024 +0800

    [VL] Minor improvements on gluten-it / gluten-te toolchains (#6476)
---
 .../org/apache/gluten/integration/command/SparkRunModes.java   |  3 +++
 tools/gluten-it/pom.xml                                        |  6 +++++-
 tools/gluten-it/sbin/gluten-it.sh                              |  8 ++++++++
 tools/gluten-it/spark-home/jars                                |  1 +
 tools/gluten-te/centos/shared.sh                               |  7 +++++++
 tools/gluten-te/ubuntu/dockerfile-buildenv                     | 10 +++++++++-
 .../ubuntu/examples/buildhere-veloxbe-portable-libs/README.md  |  9 ++++++---
 .../buildhere-veloxbe-portable-libs/{run.sh => run-default.sh} |  6 +-----
 .../ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh     |  4 +++-
 .../examples/buildhere-veloxbe-portable-libs/scripts/all.sh    |  8 +++++---
 tools/gluten-te/ubuntu/shared.sh                               |  7 +++++++
 11 files changed, 55 insertions(+), 14 deletions(-)

diff --git 
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
 
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
index cfd3848d8..56ef68db9 100644
--- 
a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
+++ 
b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
@@ -129,6 +129,9 @@ public final class SparkRunModes {
       if (!System.getenv().containsKey("SPARK_HOME")) {
         throw new IllegalArgumentException("SPARK_HOME not set! Please use 
--local if there is no local Spark build");
       }
+      if (!System.getenv().containsKey("SPARK_SCALA_VERSION")) {
+        throw new IllegalArgumentException("SPARK_SCALA_VERSION not set! 
Please set it first or use --local instead. Example: export 
SPARK_SCALA_VERSION=2.12");
+      }
       return String.format("local-cluster[%d,%d,%d]", lcWorkers, 
lcWorkerCores, Utils.byteStringAsMb(lcWorkerMem));
     }
 
diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml
index c092a0ebb..a65324f27 100644
--- a/tools/gluten-it/pom.xml
+++ b/tools/gluten-it/pom.xml
@@ -17,7 +17,7 @@
     <java.version>1.8</java.version>
     <maven.compiler.source>${java.version}</maven.compiler.source>
     <maven.compiler.target>${java.version}</maven.compiler.target>
-    <scala.library.version>2.12.15</scala.library.version>
+    <scala.library.version>2.12.17</scala.library.version>
     <spark.version>3.4.2</spark.version>
     <scala.binary.version>2.12</scala.binary.version>
     <spark.major.version>3</spark.major.version>
@@ -144,24 +144,28 @@
       </activation>
       <properties>
         <spark.version>3.2.2</spark.version>
+        <scala.library.version>2.12.15</scala.library.version>
       </properties>
     </profile>
     <profile>
       <id>spark-3.3</id>
       <properties>
         <spark.version>3.3.1</spark.version>
+        <scala.library.version>2.12.15</scala.library.version>
       </properties>
     </profile>
     <profile>
       <id>spark-3.4</id>
       <properties>
         <spark.version>3.4.2</spark.version>
+        <scala.library.version>2.12.17</scala.library.version>
       </properties>
     </profile>
     <profile>
       <id>spark-3.5</id>
       <properties>
         <spark.version>3.5.1</spark.version>
+        <scala.library.version>2.12.18</scala.library.version>
       </properties>
     </profile>
     <profile>
diff --git a/tools/gluten-it/sbin/gluten-it.sh 
b/tools/gluten-it/sbin/gluten-it.sh
index fda117417..b21038ccd 100755
--- a/tools/gluten-it/sbin/gluten-it.sh
+++ b/tools/gluten-it/sbin/gluten-it.sh
@@ -28,6 +28,14 @@ fi
 
 JAR_PATH=$LIB_DIR/*
 
+EMBEDDED_SPARK_HOME=$BASEDIR/../spark-home
+
+export SPARK_HOME=${SPARK_HOME:-$EMBEDDED_SPARK_HOME}
+export SPARK_SCALA_VERSION=${SPARK_SCALA_VERSION:-'2.12'}
+
+echo "SPARK_HOME set at [$SPARK_HOME]."
+echo "SPARK_SCALA_VERSION set at [$SPARK_SCALA_VERSION]."
+
 $JAVA_HOME/bin/java $GLUTEN_IT_JVM_ARGS \
     -XX:+IgnoreUnrecognizedVMOptions \
     --add-opens=java.base/java.lang=ALL-UNNAMED \
diff --git a/tools/gluten-it/spark-home/jars b/tools/gluten-it/spark-home/jars
new file mode 120000
index 000000000..2939305ca
--- /dev/null
+++ b/tools/gluten-it/spark-home/jars
@@ -0,0 +1 @@
+../package/target/lib
\ No newline at end of file
diff --git a/tools/gluten-te/centos/shared.sh b/tools/gluten-te/centos/shared.sh
index d14b35bf9..0253c16ce 100755
--- a/tools/gluten-te/centos/shared.sh
+++ b/tools/gluten-te/centos/shared.sh
@@ -24,6 +24,13 @@ source "$SHARED_BASEDIR/defaults.conf"
 export DOCKER_BUILDKIT=1
 export BUILDKIT_PROGRESS=plain
 
+# Validate envs
+if [ -z "$HOME" ]
+then
+  echo 'Environment variable $HOME not found. Aborting.'
+  exit 1
+fi
+
 # Set operating system
 OS_IMAGE_NAME=${OS_IMAGE_NAME:-$DEFAULT_OS_IMAGE_NAME}
 
diff --git a/tools/gluten-te/ubuntu/dockerfile-buildenv 
b/tools/gluten-te/ubuntu/dockerfile-buildenv
index 41fc20239..e520fd295 100644
--- a/tools/gluten-te/ubuntu/dockerfile-buildenv
+++ b/tools/gluten-te/ubuntu/dockerfile-buildenv
@@ -65,7 +65,7 @@ RUN cat /root/.m2/settings.xml
 ## APT dependencies
 
 # Update, then install essentials
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo 
locales wget tar tzdata git ccache cmake ninja-build build-essential 
llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev 
libboost-all-dev libcurl4-openssl-dev
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo 
locales wget tar tzdata git ccache ninja-build build-essential llvm-11-dev 
clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev 
libboost-all-dev libcurl4-openssl-dev curl zip unzip tar pkg-config 
autoconf-archive bison flex
 
 # install HBM dependencies
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y 
autoconf automake g++ libnuma-dev libtool numactl unzip libdaxctl-dev
@@ -103,6 +103,14 @@ RUN set-login-env "LANG=en_US.UTF-8"
 RUN set-login-env "LANGUAGE=en_US:en"
 RUN set-login-env "LC_ALL=en_US.UTF-8"
 
+# Install CMake
+RUN cd /opt && wget 
https://github.com/Kitware/CMake/releases/download/v3.28.3/cmake-3.28.3-linux-x86_64.sh
 \
+    && mkdir cmake \
+    && bash cmake-3.28.3-linux-x86_64.sh --skip-license --prefix=/opt/cmake \
+    && ln -s /opt/cmake/bin/cmake /usr/bin/cmake
+
+RUN cmake --version
+
 # Build & install Spark 3.2.2
 RUN cd /opt && wget 
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz
 RUN cd /opt && mkdir spark322 && tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C 
spark322 --strip-components=1
diff --git 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md 
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
index 27e97467d..cd76e74e7 100644
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
@@ -15,7 +15,10 @@ The folder contains script code to build `libvelox.so` and 
`libgluten.so` in doc
 export HTTP_PROXY_HOST=myproxy.example.com
 export HTTP_PROXY_PORT=55555
 
-# 2. Build the C++ libs in a ubuntu 20.04 docker container.
+# 2. Set the following env to install Gluten's modified Arrow Jars on host.
+export MOUNT_MAVEN_CACHE=ON
+
+# 3. Build the C++ libs in a ubuntu 20.04 docker container.
 # Note, this command could take much longer time to finish if it's never run 
before.
 # After the first run, the essential build environment will be cached in 
docker builder.
 #
@@ -24,10 +27,10 @@ export HTTP_PROXY_PORT=55555
 cd gluten/
 tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
 
-# 3. Check the built libs.
+# 4. Check the built libs.
 ls -l cpp/build/releases/
 
-# 4. If you intend to build Gluten's bundled jar, continue running subsequent 
Maven commands.
+# 5. If you intend to build Gluten's bundled jar, continue running subsequent 
Maven commands.
 # For example:
 mvn clean install -P spark-3.4,backends-velox -DskipTests
 ```
\ No newline at end of file
diff --git 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh 
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
similarity index 81%
copy from tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
copy to 
tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
index 8a0f71bbc..2648725ce 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+++ 
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
@@ -18,8 +18,4 @@ set -ex
 
 BASEDIR=$(readlink -f $(dirname $0))
 
-TIMESTAMP=$(date +%s)
-
-export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP 
-v $BASEDIR/scripts:/opt/scripts"
-
-$BASEDIR/../../cbash-mount.sh '/opt/scripts/all.sh'
+$BASEDIR/run.sh --enable_vcpkg=ON --build_tests=OFF --build_benchmarks=OFF 
--enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
diff --git 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh 
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
index 8a0f71bbc..4d28d4521 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
@@ -22,4 +22,6 @@ TIMESTAMP=$(date +%s)
 
 export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP 
-v $BASEDIR/scripts:/opt/scripts"
 
-$BASEDIR/../../cbash-mount.sh '/opt/scripts/all.sh'
+BASH_ARGS="$*"
+
+$BASEDIR/../../cbash-mount.sh "/opt/scripts/all.sh $BASH_ARGS"
diff --git 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
 
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
index 267423552..18dd92a34 100755
--- 
a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
+++ 
b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
@@ -44,6 +44,8 @@ function retry {
 
 cd /opt/gluten
 retry apt-get update
-retry apt-get install -y curl zip unzip tar pkg-config autoconf-archive bison 
flex
-retry source ./dev/vcpkg/env.sh
-retry dev/builddeps-veloxbe.sh --build_tests=OFF --build_benchmarks=OFF 
--enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
+retry apt-get install -y --dry-run # We now have all essentials installed in 
image.
+
+BASH_ARGS=$@
+
+retry dev/builddeps-veloxbe.sh $BASH_ARGS
diff --git a/tools/gluten-te/ubuntu/shared.sh b/tools/gluten-te/ubuntu/shared.sh
index d14b35bf9..0253c16ce 100755
--- a/tools/gluten-te/ubuntu/shared.sh
+++ b/tools/gluten-te/ubuntu/shared.sh
@@ -24,6 +24,13 @@ source "$SHARED_BASEDIR/defaults.conf"
 export DOCKER_BUILDKIT=1
 export BUILDKIT_PROGRESS=plain
 
+# Validate envs
+if [ -z "$HOME" ]
+then
+  echo 'Environment variable $HOME not found. Aborting.'
+  exit 1
+fi
+
 # Set operating system
 OS_IMAGE_NAME=${OS_IMAGE_NAME:-$DEFAULT_OS_IMAGE_NAME}
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to