This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new d793744abd [CORE] Use build/mvn wrapper for scheduled jobs and in
Dockerfiles (#11515)
d793744abd is described below
commit d793744abd2d04d1e9561bcae70263f84cefc86e
Author: Kent Yao <[email protected]>
AuthorDate: Wed Feb 4 12:36:55 2026 +0800
[CORE] Use build/mvn wrapper for scheduled jobs and in Dockerfiles (#11515)
* [CORE] Use build/mvn wrapper and remove setup-helper.sh
- Remove SETUP env var and setup-helper.sh dependency from workflows
- Replace raw mvn commands with ./build/mvn wrapper
- Remove manual Maven installation (MAVEN_HOME/PATH exports)
- Remove wget from yum install since Maven download is no longer needed
- Rename 'Setup java and maven' steps to 'Setup java'
- Update all Dockerfiles to use ./build/mvn:
- Dockerfile.centos7-static-build
- Dockerfile.centos7-gcc13-static-build
- Dockerfile.centos8-static-build
- Dockerfile.centos8-gcc13-static-build
- Dockerfile.centos8-dynamic-build
- Dockerfile.centos9-static-build
- Dockerfile.centos9-dynamic-build
- Dockerfile.cudf (removed system maven install)
- Remove inline Maven download/install code from dynamic-build Dockerfiles
- Delete .github/workflows/util/setup-helper.sh (no longer needed)
* [CORE] Fix MVN_CMD path for subdirectory execution
* [CORE] Use absolute path for build/mvn in openeuler jobs
* [CORE] Inline Hadoop/HDFS setup in velox_backend_x86.yml
* [GLUTEN-11515][CORE] Fix install-resources.sh to allow sourcing for
function definitions
Move install_hadoop and setup_hdfs functions before case statement,
and wrap case statement in BASH_SOURCE check so the script can be
sourced to access functions without immediately executing the case
statement.
---
.github/workflows/build_bundle_package.yml | 8 +-
.github/workflows/docker_image.yml | 2 +-
.github/workflows/flink.yml | 9 +-
.github/workflows/scala_code_format.yml | 3 -
...all-spark-resources.sh => install-resources.sh} | 155 +++++++++++++++------
.github/workflows/util/setup-helper.sh | 131 -----------------
.github/workflows/velox_backend_enhanced.yml | 2 +-
.github/workflows/velox_backend_x86.yml | 15 +-
.github/workflows/velox_nightly.yml | 75 ++++------
.github/workflows/velox_weekly.yml | 15 +-
dev/docker/Dockerfile.centos7-gcc13-static-build | 4 +-
dev/docker/Dockerfile.centos7-static-build | 4 +-
dev/docker/Dockerfile.centos8-dynamic-build | 10 +-
dev/docker/Dockerfile.centos8-gcc13-static-build | 7 +-
dev/docker/Dockerfile.centos8-static-build | 5 +-
dev/docker/Dockerfile.centos9-dynamic-build | 10 +-
dev/docker/Dockerfile.centos9-static-build | 5 +-
dev/docker/cudf/Dockerfile | 3 +-
18 files changed, 181 insertions(+), 282 deletions(-)
diff --git a/.github/workflows/build_bundle_package.yml
b/.github/workflows/build_bundle_package.yml
index 55634c2dfc..171d12d514 100644
--- a/.github/workflows/build_bundle_package.yml
+++ b/.github/workflows/build_bundle_package.yml
@@ -18,7 +18,6 @@ name: Build bundle package
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
CCACHE_DIR: "${{ github.workspace }}/.ccache"
- SETUP: 'bash .github/workflows/util/setup-helper.sh'
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{
github.workflow }}
@@ -95,16 +94,15 @@ jobs:
with:
name: velox-arrow-jar-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- - name: Setup java and maven
+ - name: Setup java
run: |
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \
sed -i
's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g'
/etc/yum.repos.d/CentOS-* && \
- yum update -y && yum install -y java-1.8.0-openjdk-devel wget
- $SETUP install_maven
+ yum update -y && yum install -y java-1.8.0-openjdk-devel
- name: Build for Spark ${{ github.event.inputs.spark }}
run: |
cd $GITHUB_WORKSPACE/ && \
- mvn clean install -P${{ github.event.inputs.spark }}
-Dhadoop.version=${{ github.event.inputs.hadoop }} -Pbackends-velox -Pceleborn
-Puniffle -DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -P${{ github.event.inputs.spark }}
-Dhadoop.version=${{ github.event.inputs.hadoop }} -Pbackends-velox -Pceleborn
-Puniffle -DskipTests -Dmaven.source.skip
- name: Upload bundle package
uses: actions/upload-artifact@v4
with:
diff --git a/.github/workflows/docker_image.yml
b/.github/workflows/docker_image.yml
index 5080e83a90..1d707fcdc6 100644
--- a/.github/workflows/docker_image.yml
+++ b/.github/workflows/docker_image.yml
@@ -21,7 +21,7 @@ on:
- main
paths:
- '.github/workflows/docker_image.yml'
- - '.github/workflows/util/install-spark-resources.sh'
+ - '.github/workflows/util/install-resources.sh'
- 'dev/docker/*'
- 'dev/docker/cudf/*'
schedule:
diff --git a/.github/workflows/flink.yml b/.github/workflows/flink.yml
index cd758ae614..77180e0cb6 100644
--- a/.github/workflows/flink.yml
+++ b/.github/workflows/flink.yml
@@ -19,6 +19,7 @@ on:
pull_request:
paths:
- '.github/workflows/flink.yml'
+ - 'build/mvn'
- 'gluten-flink/**'
env:
@@ -63,16 +64,16 @@ jobs:
git clone -b gluten-0530 https://github.com/bigo-sg/velox4j.git
cd velox4j && git reset --hard
288d181a1b05c47f1f17339eb498dd6375f7aec8
git apply $GITHUB_WORKSPACE/gluten-flink/patches/fix-velox4j.patch
- mvn clean install -DskipTests -Dgpg.skip -Dspotless.skip=true
+ $GITHUB_WORKSPACE/build/mvn clean install -DskipTests -Dgpg.skip
-Dspotless.skip=true
cd ..
git clone https://github.com/nexmark/nexmark.git
cd nexmark
- mvn clean install -DskipTests
+ $GITHUB_WORKSPACE/build/mvn clean install -DskipTests
- name: Build Gluten Flink
run: |
cd $GITHUB_WORKSPACE/gluten-flink
- mvn clean package -Dmaven.test.skip=true
+ $GITHUB_WORKSPACE/build/mvn clean package -Dmaven.test.skip=true
- name: Run Unit Tests
run: |
cd $GITHUB_WORKSPACE/gluten-flink
- mvn test
+ $GITHUB_WORKSPACE/build/mvn test
diff --git a/.github/workflows/scala_code_format.yml
b/.github/workflows/scala_code_format.yml
index 9f1fc711bb..dbaa623dda 100644
--- a/.github/workflows/scala_code_format.yml
+++ b/.github/workflows/scala_code_format.yml
@@ -37,9 +37,6 @@ concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{
github.workflow }}
cancel-in-progress: true
-env:
- SETUP: 'bash .github/workflows/util/setup-helper.sh'
-
jobs:
scala-format-check:
diff --git a/.github/workflows/util/install-spark-resources.sh
b/.github/workflows/util/install-resources.sh
similarity index 53%
rename from .github/workflows/util/install-spark-resources.sh
rename to .github/workflows/util/install-resources.sh
index 627fd70a6a..0a5240d132 100755
--- a/.github/workflows/util/install-spark-resources.sh
+++ b/.github/workflows/util/install-resources.sh
@@ -16,9 +16,83 @@
# Download Spark resources, required by some Spark UTs. The resource path
should be set
# for spark.test.home in mvn test.
+#
+# This file can be:
+# 1. Executed directly: ./install-resources.sh <spark-version> [install-dir]
+# 2. Sourced to use functions: source install-resources.sh; install_hadoop;
setup_hdfs
set -e
+# Install Hadoop binary
+function install_hadoop() {
+ echo "Installing Hadoop..."
+
+ apt-get update -y
+ apt-get install -y curl tar gzip
+
+ local HADOOP_VERSION=3.3.6
+ curl -fsSL -o hadoop.tgz
"https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz"
+ tar -xzf hadoop.tgz --no-same-owner --no-same-permissions
+ rm -f hadoop.tgz
+
+ export HADOOP_HOME="$PWD/hadoop-${HADOOP_VERSION}"
+ export PATH="$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH"
+ export LD_LIBRARY_PATH="$HADOOP_HOME/lib/native:$LD_LIBRARY_PATH"
+
+ if [ -n "$GITHUB_ENV" ]; then
+ echo "HADOOP_HOME=$HADOOP_HOME" >> $GITHUB_ENV
+ echo "LD_LIBRARY_PATH=$HADOOP_HOME/lib/native:$LD_LIBRARY_PATH" >>
$GITHUB_ENV
+ echo "$HADOOP_HOME/bin" >> $GITHUB_PATH
+ fi
+}
+
+# Setup HDFS namenode and datanode
+function setup_hdfs() {
+ export HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop"
+
+ cat > "$HADOOP_CONF_DIR/core-site.xml" <<'EOF'
+<configuration>
+ <property>
+ <name>fs.defaultFS</name>
+ <value>hdfs://localhost:9000</value>
+ </property>
+</configuration>
+EOF
+
+ cat > "$HADOOP_CONF_DIR/hdfs-site.xml" <<'EOF'
+<configuration>
+ <property><name>dfs.replication</name><value>1</value></property>
+
<property><name>dfs.namenode.rpc-address</name><value>localhost:9000</value></property>
+
<property><name>dfs.namenode.http-address</name><value>localhost:9870</value></property>
+
<property><name>dfs.datanode.address</name><value>localhost:9866</value></property>
+
<property><name>dfs.datanode.http.address</name><value>localhost:9864</value></property>
+ <property><name>dfs.permissions.enabled</name><value>false</value></property>
+</configuration>
+EOF
+
+ HDFS_TMP="${RUNNER_TEMP:-/tmp}/hdfs"
+ mkdir -p "$HDFS_TMP/nn" "$HDFS_TMP/dn"
+
+ perl -0777 -i -pe 's#</configuration># <property>\n
<name>dfs.namenode.name.dir</name>\n <value>file:'"$HDFS_TMP"'/nn</value>\n
</property>\n <property>\n <name>dfs.datanode.data.dir</name>\n
<value>file:'"$HDFS_TMP"'/dn</value>\n </property>\n</configuration>#s' \
+ "$HADOOP_CONF_DIR/hdfs-site.xml"
+
+ if [ -n "${GITHUB_ENV:-}" ]; then
+ echo "HADOOP_CONF_DIR=$HADOOP_CONF_DIR" >> "$GITHUB_ENV"
+ echo "HADOOP_HOME=$HADOOP_HOME" >> "$GITHUB_ENV"
+ fi
+
+ "$HADOOP_HOME/bin/hdfs" namenode -format -force -nonInteractive
+ "$HADOOP_HOME/sbin/hadoop-daemon.sh" start namenode
+ "$HADOOP_HOME/sbin/hadoop-daemon.sh" start datanode
+
+ for i in {1..60}; do
+ "$HADOOP_HOME/bin/hdfs" dfs -ls / >/dev/null 2>&1 && break
+ sleep 1
+ done
+
+ "$HADOOP_HOME/bin/hdfs" dfs -ls /
+}
+
# Installs Spark binary and source releases with:
# 1 - spark version
# 2 - hadoop version
@@ -85,42 +159,45 @@ function install_spark() {
rm -rf "${local_source}"
}
-INSTALL_DIR=${2:-/opt/}
-mkdir -p ${INSTALL_DIR}
-
-case "$1" in
-3.3)
- # Spark-3.3
- cd ${INSTALL_DIR} && \
- install_spark "3.3.1" "3" "2.12"
- ;;
-3.4)
- # Spark-3.4
- cd ${INSTALL_DIR} && \
- install_spark "3.4.4" "3" "2.12"
- ;;
-3.5)
- # Spark-3.5
- cd ${INSTALL_DIR} && \
- install_spark "3.5.5" "3" "2.12"
- ;;
-3.5-scala2.13)
- # Spark-3.5, scala 2.13
- cd ${INSTALL_DIR} && \
- install_spark "3.5.5" "3" "2.13"
- ;;
-4.0)
- # Spark-4.0, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13
suffix
- cd ${INSTALL_DIR} && \
- install_spark "4.0.1" "3" "2.12"
- ;;
-4.1)
- # Spark-4.x, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13
suffix
- cd ${INSTALL_DIR} && \
- install_spark "4.1.1" "3" "2.12"
- ;;
-*)
- echo "Spark version is expected to be specified."
- exit 1
- ;;
-esac
+# Only run install_spark when script is executed directly (not sourced)
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+ INSTALL_DIR=${2:-/opt/}
+ mkdir -p ${INSTALL_DIR}
+
+ case "$1" in
+ 3.3)
+ # Spark-3.3
+ cd ${INSTALL_DIR} && \
+ install_spark "3.3.1" "3" "2.12"
+ ;;
+ 3.4)
+ # Spark-3.4
+ cd ${INSTALL_DIR} && \
+ install_spark "3.4.4" "3" "2.12"
+ ;;
+ 3.5)
+ # Spark-3.5
+ cd ${INSTALL_DIR} && \
+ install_spark "3.5.5" "3" "2.12"
+ ;;
+ 3.5-scala2.13)
+ # Spark-3.5, scala 2.13
+ cd ${INSTALL_DIR} && \
+ install_spark "3.5.5" "3" "2.13"
+ ;;
+ 4.0)
+ # Spark-4.0, scala 2.12 // using 2.12 as a hack as 4.0 does not have
2.13 suffix
+ cd ${INSTALL_DIR} && \
+ install_spark "4.0.1" "3" "2.12"
+ ;;
+ 4.1)
+ # Spark-4.x, scala 2.12 // using 2.12 as a hack as 4.0 does not have
2.13 suffix
+ cd ${INSTALL_DIR} && \
+ install_spark "4.1.1" "3" "2.12"
+ ;;
+ *)
+ echo "Spark version is expected to be specified."
+ exit 1
+ ;;
+ esac
+fi
diff --git a/.github/workflows/util/setup-helper.sh
b/.github/workflows/util/setup-helper.sh
deleted file mode 100644
index 338ae057c8..0000000000
--- a/.github/workflows/util/setup-helper.sh
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-function install_maven {
- (
- local maven_version="3.9.12"
- local local_binary="apache-maven-${maven_version}-bin.tar.gz"
- local mirror_host="https://www.apache.org/dyn/closer.lua"
- local
url="${mirror_host}/maven/maven-3/${maven_version}/binaries/${local_binary}?action=download"
- cd /opt/
- wget -nv -O ${local_binary} ${url}
- tar -xvf ${local_binary} && mv apache-maven-${maven_version} /usr/lib/maven
- )
- export PATH=/usr/lib/maven/bin:$PATH
- if [ -n "$GITHUB_ENV" ]; then
- echo "PATH=/usr/lib/maven/bin:$PATH" >> $GITHUB_ENV
- else
- echo "Warning: GITHUB_ENV is not set. Skipping environment variable
export."
- fi
-}
-
-function install_hadoop {
- echo "Installing Hadoop..."
-
- apt-get update -y
- apt-get install -y curl tar gzip
-
- local HADOOP_VERSION=3.3.6
- curl -fsSL -o hadoop.tgz
"https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz"
- tar -xzf hadoop.tgz --no-same-owner --no-same-permissions
-
- export HADOOP_HOME="$PWD/hadoop-${HADOOP_VERSION}"
- export PATH="$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH"
-
- export LD_LIBRARY_PATH="$HADOOP_HOME/lib/native:$LD_LIBRARY_PATH"
-
- if [ -n "$GITHUB_ENV" ]; then
- echo "HADOOP_HOME=$HADOOP_HOME" >> $GITHUB_ENV
- echo "LD_LIBRARY_PATH=$HADOOP_HOME/lib/native:$LD_LIBRARY_PATH" >>
$GITHUB_ENV
- echo "$HADOOP_HOME/bin" >> $GITHUB_PATH
- fi
-}
-
-function setup_hdfs {
- export HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop"
-
- cat > "$HADOOP_CONF_DIR/core-site.xml" <<'EOF'
-<configuration>
- <property>
- <name>fs.defaultFS</name>
- <value>hdfs://localhost:9000</value>
- </property>
-</configuration>
-EOF
-
- cat > "$HADOOP_CONF_DIR/hdfs-site.xml" <<'EOF'
-<configuration>
- <property>
- <name>dfs.replication</name>
- <value>1</value>
- </property>
-
- <property>
- <name>dfs.namenode.rpc-address</name>
- <value>localhost:9000</value>
- </property>
-
- <property>
- <name>dfs.namenode.http-address</name>
- <value>localhost:9870</value>
- </property>
-
- <property>
- <name>dfs.datanode.address</name>
- <value>localhost:9866</value>
- </property>
-
- <property>
- <name>dfs.datanode.http.address</name>
- <value>localhost:9864</value>
- </property>
-
- <property>
- <name>dfs.permissions.enabled</name>
- <value>false</value>
- </property>
-</configuration>
-EOF
-
- export HDFS_TMP="${RUNNER_TEMP:-/tmp}/hdfs"
- mkdir -p "$HDFS_TMP/nn" "$HDFS_TMP/dn"
-
- perl -0777 -i -pe 's#</configuration># <property>\n
<name>dfs.namenode.name.dir</name>\n <value>file:'"$HDFS_TMP"'/nn</value>\n
</property>\n <property>\n <name>dfs.datanode.data.dir</name>\n
<value>file:'"$HDFS_TMP"'/dn</value>\n </property>\n</configuration>#s' \
- "$HADOOP_CONF_DIR/hdfs-site.xml"
-
- if [ -n "${GITHUB_ENV:-}" ]; then
- echo "HADOOP_CONF_DIR=$HADOOP_CONF_DIR" >> "$GITHUB_ENV"
- echo "HADOOP_HOME=$HADOOP_HOME" >> "$GITHUB_ENV"
- fi
-
- "$HADOOP_HOME/bin/hdfs" namenode -format -force -nonInteractive
- "$HADOOP_HOME/sbin/hadoop-daemon.sh" start namenode
- "$HADOOP_HOME/sbin/hadoop-daemon.sh" start datanode
-
- for i in {1..60}; do
- "$HADOOP_HOME/bin/hdfs" dfs -ls / >/dev/null 2>&1 && break
- sleep 1
- done
-
- "$HADOOP_HOME/bin/hdfs" dfs -ls /
-}
-for cmd in "$@"
-do
- echo "Running: $cmd"
- "$cmd"
-done
diff --git a/.github/workflows/velox_backend_enhanced.yml
b/.github/workflows/velox_backend_enhanced.yml
index 0e6efa5100..2337993db3 100644
--- a/.github/workflows/velox_backend_enhanced.yml
+++ b/.github/workflows/velox_backend_enhanced.yml
@@ -229,7 +229,7 @@ jobs:
- name: Prepare Spark Resources for Spark 3.5.5
run: |
rm -rf /opt/shims/spark35
- bash .github/workflows/util/install-spark-resources.sh 3.5
+ bash .github/workflows/util/install-resources.sh 3.5
- name: Build and Run unit test for Spark 3.5.5 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
diff --git a/.github/workflows/velox_backend_x86.yml
b/.github/workflows/velox_backend_x86.yml
index 459efb730d..12e9e610fe 100644
--- a/.github/workflows/velox_backend_x86.yml
+++ b/.github/workflows/velox_backend_x86.yml
@@ -19,7 +19,7 @@ on:
pull_request:
paths:
- '.github/workflows/velox_backend_x86.yml'
- - '.github/workflows/util/install-spark-resources.sh' #TODO remove after
image update
+ - '.github/workflows/util/install-resources.sh' #TODO remove after image
update
- 'pom.xml'
- 'backends-velox/**'
- 'gluten-uniffle/**'
@@ -177,9 +177,12 @@ jobs:
ls -l
/root/.m2/repository/org/apache/arrow/arrow-dataset/15.0.0-gluten/
- name: Install Hadoop & Setup HDFS
if: matrix.os == 'ubuntu:22.04' && matrix.spark == 'spark-3.5' &&
matrix.java == 'java-8'
+ shell: bash
run: |
export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64
- bash .github/workflows/util/setup-helper.sh install_hadoop setup_hdfs
+ source .github/workflows/util/install-resources.sh
+ install_hadoop
+ setup_hdfs
- name: Build and run TPC-H / TPC-DS
run: |
cd $GITHUB_WORKSPACE/
@@ -1339,7 +1342,7 @@ jobs:
- name: Prepare Spark Resources for Spark 4.0.1 #TODO remove after image
update
run: |
rm -rf /opt/shims/spark40
- bash .github/workflows/util/install-spark-resources.sh 4.0
+ bash .github/workflows/util/install-resources.sh 4.0
mv /opt/shims/spark40/spark_home/assembly/target/scala-2.12
/opt/shims/spark40/spark_home/assembly/target/scala-2.13
- name: Build and Run unit test for Spark 4.0.0 with scala-2.13 (other
tests)
run: |
@@ -1389,7 +1392,7 @@ jobs:
- name: Prepare Spark Resources for Spark 4.0.1 #TODO remove after image
update
run: |
rm -rf /opt/shims/spark40
- bash .github/workflows/util/install-spark-resources.sh 4.0
+ bash .github/workflows/util/install-resources.sh 4.0
mv /opt/shims/spark40/spark_home/assembly/target/scala-2.12
/opt/shims/spark40/spark_home/assembly/target/scala-2.13
- name: Build and Run unit test for Spark 4.0 (slow tests)
run: |
@@ -1445,7 +1448,7 @@ jobs:
- name: Prepare Spark Resources for Spark 4.1.0 #TODO remove after image
update
run: |
rm -rf /opt/shims/spark41
- bash .github/workflows/util/install-spark-resources.sh 4.1
+ bash .github/workflows/util/install-resources.sh 4.1
mv /opt/shims/spark41/spark_home/assembly/target/scala-2.12
/opt/shims/spark41/spark_home/assembly/target/scala-2.13
- name: Build and Run unit test for Spark 4.1.0 with scala-2.13 (other
tests)
run: |
@@ -1495,7 +1498,7 @@ jobs:
- name: Prepare Spark Resources for Spark 4.1.0 #TODO remove after image
update
run: |
rm -rf /opt/shims/spark41
- bash .github/workflows/util/install-spark-resources.sh 4.1
+ bash .github/workflows/util/install-resources.sh 4.1
mv /opt/shims/spark41/spark_home/assembly/target/scala-2.12
/opt/shims/spark41/spark_home/assembly/target/scala-2.13
- name: Build and Run unit test for Spark 4.0 (slow tests)
run: |
diff --git a/.github/workflows/velox_nightly.yml
b/.github/workflows/velox_nightly.yml
index f2022f0a75..2b45b40f0f 100644
--- a/.github/workflows/velox_nightly.yml
+++ b/.github/workflows/velox_nightly.yml
@@ -27,7 +27,6 @@ on:
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
CCACHE_DIR: "${{ github.workspace }}/.ccache"
- SETUP: 'bash .github/workflows/util/setup-helper.sh'
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{
github.workflow }}
@@ -86,23 +85,20 @@ jobs:
with:
name: velox-arrow-jar-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- - name: Setup java and maven
+ - name: Setup java
run: |
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \
sed -i
's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g'
/etc/yum.repos.d/CentOS-* && \
- yum update -y && yum install -y java-1.8.0-openjdk-devel wget
- $SETUP install_maven
+ yum update -y && yum install -y java-1.8.0-openjdk-devel
- name: Get current date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Build package for Spark
run: |
cd $GITHUB_WORKSPACE/ && \
- export MAVEN_HOME=/usr/lib/maven && \
- export PATH=${PATH}:${MAVEN_HOME}/bin && \
- mvn clean install -Pspark-3.3 -Pbackends-velox -Pceleborn -Puniffle
-DskipTests -Dmaven.source.skip
- mvn clean install -Pspark-3.4 -Pbackends-velox -Pceleborn -Puniffle
-DskipTests -Dmaven.source.skip
- mvn clean install -Pspark-3.5 -Pbackends-velox -Pceleborn -Puniffle
-DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-3.3 -Pbackends-velox -Pceleborn
-Puniffle -DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-3.4 -Pbackends-velox -Pceleborn
-Puniffle -DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-3.5 -Pbackends-velox -Pceleborn
-Puniffle -DskipTests -Dmaven.source.skip
- name: Upload bundle package
uses: actions/upload-artifact@v4
with:
@@ -127,23 +123,20 @@ jobs:
with:
name: velox-arrow-jar-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- - name: Setup java and maven
+ - name: Setup java
run: |
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \
sed -i
's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g'
/etc/yum.repos.d/CentOS-* && \
- yum update -y && yum install -y java-17-openjdk-devel wget
- $SETUP install_maven
+ yum update -y && yum install -y java-17-openjdk-devel
- name: Get current date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Build package for Spark
run: |
cd $GITHUB_WORKSPACE/ && \
- export MAVEN_HOME=/usr/lib/maven && \
- export PATH=${PATH}:${MAVEN_HOME}/bin && \
- mvn clean install -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn
-Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
- mvn clean install -Pspark-3.5 -Pjava-17 -Pbackends-velox -Pceleborn
-Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
- mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-17
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-3.4 -Pjava-17 -Pbackends-velox
-Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests
-Dmaven.source.skip
+ ./build/mvn clean install -Pspark-3.5 -Pjava-17 -Pbackends-velox
-Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests
-Dmaven.source.skip
+ ./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-17
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
- name: Upload bundle package
uses: actions/upload-artifact@v4
with:
@@ -168,20 +161,17 @@ jobs:
with:
name: velox-arrow-jar-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- - name: Setup java and maven
+ - name: Setup java
run: |
- yum update -y && yum install -y java-21-openjdk-devel wget
- $SETUP install_maven
+ yum update -y && yum install -y java-21-openjdk-devel
- name: Get current date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Build package for Spark
run: |
cd $GITHUB_WORKSPACE/ && \
- export MAVEN_HOME=/usr/lib/maven && \
- export PATH=${PATH}:${MAVEN_HOME}/bin && \
- mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-21
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
- mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-21
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
- name: Upload bundle package
uses: actions/upload-artifact@v4
with:
@@ -245,23 +235,20 @@ jobs:
with:
name: velox-arrow-jar-centos-8-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- - name: Setup java and maven
+ - name: Setup java
run: |
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \
sed -i
's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g'
/etc/yum.repos.d/CentOS-* && \
- yum update -y && yum install -y java-1.8.0-openjdk-devel wget
- $SETUP install_maven
+ yum update -y && yum install -y java-1.8.0-openjdk-devel
- name: Get current date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Build package for Spark
run: |
cd $GITHUB_WORKSPACE/ && \
- export MAVEN_HOME=/usr/lib/maven && \
- export PATH=${PATH}:${MAVEN_HOME}/bin && \
- mvn clean install -Pspark-3.3 -Pbackends-velox -Pceleborn -Puniffle
-DskipTests -Dmaven.source.skip
- mvn clean install -Pspark-3.4 -Pbackends-velox -Pceleborn -Puniffle
-DskipTests -Dmaven.source.skip
- mvn clean install -Pspark-3.5 -Pbackends-velox -Pceleborn -Puniffle
-DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-3.3 -Pbackends-velox -Pceleborn
-Puniffle -DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-3.4 -Pbackends-velox -Pceleborn
-Puniffle -DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-3.5 -Pbackends-velox -Pceleborn
-Puniffle -DskipTests -Dmaven.source.skip
- name: Upload bundle package
uses: actions/upload-artifact@v4
with:
@@ -286,23 +273,20 @@ jobs:
with:
name: velox-arrow-jar-centos-8-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- - name: Setup java and maven
+ - name: Setup java
run: |
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \
sed -i
's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g'
/etc/yum.repos.d/CentOS-* && \
- yum update -y && yum install -y java-17-openjdk-devel wget
- $SETUP install_maven
+ yum update -y && yum install -y java-17-openjdk-devel
- name: Get current date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Build package for Spark
run: |
cd $GITHUB_WORKSPACE/ && \
- export MAVEN_HOME=/usr/lib/maven && \
- export PATH=${PATH}:${MAVEN_HOME}/bin && \
- mvn clean install -Pspark-3.4 -Pjava-17 -Pbackends-velox -Pceleborn
-Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
- mvn clean install -Pspark-3.5 -Pjava-17 -Pbackends-velox -Pceleborn
-Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests -Dmaven.source.skip
- mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-17
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-3.4 -Pjava-17 -Pbackends-velox
-Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests
-Dmaven.source.skip
+ ./build/mvn clean install -Pspark-3.5 -Pjava-17 -Pbackends-velox
-Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon -DskipTests
-Dmaven.source.skip
+ ./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-17
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
- name: Upload bundle package
uses: actions/upload-artifact@v4
with:
@@ -327,20 +311,17 @@ jobs:
with:
name: velox-arrow-jar-centos-8-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- - name: Setup java and maven
+ - name: Setup java
run: |
- yum update -y && yum install -y java-21-openjdk-devel wget
- $SETUP install_maven
+ yum update -y && yum install -y java-21-openjdk-devel
- name: Get current date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Build package for Spark
run: |
cd $GITHUB_WORKSPACE/ && \
- export MAVEN_HOME=/usr/lib/maven && \
- export PATH=${PATH}:${MAVEN_HOME}/bin && \
- mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-21
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
- mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-4.0 -Pscala-2.13 -Pjava-21
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
+ ./build/mvn clean install -Pspark-4.1 -Pscala-2.13 -Pjava-21
-Pbackends-velox -Pceleborn -Puniffle -Piceberg -Phudi -Pdelta -Ppaimon
-DskipTests -Dmaven.source.skip
- name: Upload bundle package
uses: actions/upload-artifact@v4
with:
diff --git a/.github/workflows/velox_weekly.yml
b/.github/workflows/velox_weekly.yml
index e598c52d6b..3ae0139671 100644
--- a/.github/workflows/velox_weekly.yml
+++ b/.github/workflows/velox_weekly.yml
@@ -19,13 +19,12 @@ on:
pull_request:
paths:
- '.github/workflows/velox_weekly.yml'
+ - 'build/mvn'
schedule:
- cron: '0 20 * * 0'
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
- MVN_CMD: 'mvn -ntp'
- SETUP: 'source .github/workflows/util/setup-helper.sh'
TPCH_TEST: "env GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare
--local --preset=velox --benchmark-type=h --error-on-memleak
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1"
INSTALL_PREFIX: /usr/local
@@ -67,7 +66,6 @@ jobs:
if [ ${{ github.event_name }} = "pull_request" ]; then
git fetch origin ${{ github.ref }}:pr_branch && git checkout
pr_branch
fi
- $SETUP install_maven
./dev/package.sh --spark_version=3.5
build-on-centos:
@@ -109,7 +107,6 @@ jobs:
yum install -y java-1.8.0-openjdk-devel patch wget git perl
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk && \
export PATH=$JAVA_HOME/bin:$PATH
- $SETUP install_maven
cd $GITHUB_WORKSPACE/ && ./dev/package.sh --spark_version=3.5
@@ -163,8 +160,7 @@ jobs:
- uses: actions/checkout@v4
- name: Install dependencies
run: |
- dnf update -y && dnf install -y java-1.8.0-openjdk-devel sudo patch
wget git perl
- $SETUP install_maven
+ dnf update -y && dnf install -y java-1.8.0-openjdk-devel sudo patch
git perl
echo "JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk" >> $GITHUB_ENV
- name: Build
run: |
@@ -172,7 +168,7 @@ jobs:
cd $GITHUB_WORKSPACE/
./dev/package.sh --spark_version=3.5
cd $GITHUB_WORKSPACE/tools/gluten-it
- $MVN_CMD clean install -Pspark-3.5
+ $GITHUB_WORKSPACE/build/mvn -ntp clean install -Pspark-3.5
cp
$GITHUB_WORKSPACE/package/target/thirdparty-lib/gluten-thirdparty-lib-*.jar
package/target/lib/
- name: Run TPC-H
run: |
@@ -193,8 +189,7 @@ jobs:
- uses: actions/checkout@v4
- name: Install dependencies
run: |
- dnf update -y && dnf install -y java-1.8.0-openjdk-devel sudo patch
wget git perl
- $SETUP install_maven
+ dnf update -y && dnf install -y java-1.8.0-openjdk-devel sudo patch
git perl
echo "JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk" >> $GITHUB_ENV
- name: Build
run: |
@@ -203,7 +198,7 @@ jobs:
./dev/vcpkg/setup-build-depends.sh
./dev/package-vcpkg.sh --build_arrow=ON --spark_version=3.5
cd $GITHUB_WORKSPACE/tools/gluten-it
- $MVN_CMD clean install -Pspark-3.5
+ $GITHUB_WORKSPACE/build/mvn -ntp clean install -Pspark-3.5
- name: Run TPC-H
run: |
echo "JAVA_HOME: $JAVA_HOME"
diff --git a/dev/docker/Dockerfile.centos7-gcc13-static-build
b/dev/docker/Dockerfile.centos7-gcc13-static-build
index 7704d691dc..89ac3f5012 100644
--- a/dev/docker/Dockerfile.centos7-gcc13-static-build
+++ b/dev/docker/Dockerfile.centos7-gcc13-static-build
@@ -34,7 +34,7 @@ RUN set -ex; \
-e 's/^#baseurl/baseurl/' \
-e 's/mirror\.centos\.org/vault.centos.org/' \
/etc/yum.repos.d/CentOS-SCLo-scl-rh.repo; \
- yum install -y git patch wget sudo java-1.8.0-openjdk-devel ccache; \
+ yum install -y git patch sudo java-1.8.0-openjdk-devel ccache; \
git clone --depth=1 https://github.com/apache/incubator-gluten
/opt/gluten; \
echo "check_certificate = off" >> ~/.wgetrc; \
cd /opt/gluten && bash ./dev/vcpkg/setup-build-depends.sh; \
@@ -45,7 +45,7 @@ RUN set -ex; \
cd /opt/gluten; \
bash ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=ON
--enable_s3=ON --enable_gcs=ON \
--enable_hdfs=ON --enable_abfs=ON
build_arrow; \
- mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta -Pspark-3.5
-DskipTests; \
+ ./build/mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta
-Pspark-3.5 -DskipTests; \
dnf clean all; \
rm -rf /opt/gluten; \
rm -rf /root/.cache/ccache;
diff --git a/dev/docker/Dockerfile.centos7-static-build
b/dev/docker/Dockerfile.centos7-static-build
index e015dc5178..dcf6124a0b 100644
--- a/dev/docker/Dockerfile.centos7-static-build
+++ b/dev/docker/Dockerfile.centos7-static-build
@@ -34,7 +34,7 @@ RUN set -ex; \
-e 's/^#baseurl/baseurl/' \
-e 's/mirror\.centos\.org/vault.centos.org/' \
/etc/yum.repos.d/CentOS-SCLo-scl-rh.repo; \
- yum install -y git patch wget sudo java-1.8.0-openjdk-devel ccache; \
+ yum install -y git patch sudo java-1.8.0-openjdk-devel ccache; \
git clone --depth=1 https://github.com/apache/incubator-gluten
/opt/gluten; \
echo "check_certificate = off" >> ~/.wgetrc; \
cd /opt/gluten && bash ./dev/vcpkg/setup-build-depends.sh; \
@@ -45,7 +45,7 @@ RUN set -ex; \
source /opt/rh/rh-git227/enable; \
bash ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=ON
--enable_s3=ON --enable_gcs=ON \
--enable_hdfs=ON --enable_abfs=ON
build_arrow; \
- mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta -Pspark-3.5
-DskipTests; \
+ ./build/mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta
-Pspark-3.5 -DskipTests; \
dnf clean all; \
rm -rf /opt/gluten; \
rm -rf /root/.cache/ccache;
diff --git a/dev/docker/Dockerfile.centos8-dynamic-build
b/dev/docker/Dockerfile.centos8-dynamic-build
index 34c06b2beb..b6597bb108 100644
--- a/dev/docker/Dockerfile.centos8-dynamic-build
+++ b/dev/docker/Dockerfile.centos8-dynamic-build
@@ -22,7 +22,6 @@ ARG JAVA_VERSION=1.8.0
ENV JAVA_HOME=/usr/lib/jvm/java-${JAVA_VERSION}-openjdk
ENV PATH=$JAVA_HOME/bin:$PATH
-ENV PATH=${PATH}:/usr/lib/maven/bin
RUN set -ex; \
@@ -32,14 +31,7 @@ RUN set -ex; \
dnf install -y --setopt=install_weak_deps=False gcc-toolset-11; \
echo "check_certificate = off" >> ~/.wgetrc; \
yum install -y java-${JAVA_VERSION}-openjdk-devel patch wget git perl; \
- maven_version=3.9.12; \
- local_binary="apache-maven-${maven_version}-bin.tar.gz"; \
mirror_host="https://www.apache.org/dyn/closer.lua"; \
-
url="${mirror_host}/maven/maven-3/${maven_version}/binaries/${local_binary}?action=download";
\
- wget -nv -O ${local_binary} ${url}; \
- tar -xvf ${local_binary}; \
- mv apache-maven-${maven_version} /usr/lib/maven; \
- rm -rf ${local_binary}; \
wget -nv
${mirror_host}/celeborn/celeborn-0.5.4/apache-celeborn-0.5.4-bin.tgz?action=download
-O /opt/apache-celeborn-0.5.4-bin.tgz; \
wget -nv
${mirror_host}/celeborn/celeborn-0.6.1/apache-celeborn-0.6.1-bin.tgz?action=download
-O /opt/apache-celeborn-0.6.1-bin.tgz; \
wget -nv
${mirror_host}/uniffle/0.10.0/apache-uniffle-0.10.0-bin.tar.gz?action=download
-O /opt/apache-uniffle-0.10.0-bin.tar.gz; \
@@ -57,7 +49,7 @@ RUN set -ex; \
cd /opt/gluten; \
source /opt/rh/gcc-toolset-11/enable; \
./dev/builddeps-veloxbe.sh --run_setup_script=ON build_arrow; \
- mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta -Pspark-3.5
-DskipTests; \
+ ./build/mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta
-Pspark-3.5 -DskipTests; \
dnf clean all; \
rm -rf /opt/gluten; \
rm -rf /root/.cache/ccache;
diff --git a/dev/docker/Dockerfile.centos8-gcc13-static-build
b/dev/docker/Dockerfile.centos8-gcc13-static-build
index 2a79f99fb9..1e92be37fd 100644
--- a/dev/docker/Dockerfile.centos8-gcc13-static-build
+++ b/dev/docker/Dockerfile.centos8-gcc13-static-build
@@ -18,7 +18,6 @@ FROM inteldpo/gluten-ci-images:centos-8_gcc13
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
ENV PATH=$JAVA_HOME/bin:$PATH
-ENV PATH=${PATH}:/usr/lib/maven/bin
# An actual path used for vcpkg cache.
ENV VCPKG_PATH=/var/cache/vcpkg
@@ -28,15 +27,13 @@ ENV VCPKG_BINARY_SOURCES=clear;files,${VCPKG_PATH},readwrite
RUN set -ex; \
yum update -y && yum install -y epel-release sudo dnf && yum install -y
ccache; \
echo "check_certificate = off" >> ~/.wgetrc; \
- yum install -y java-1.8.0-openjdk-devel patch wget git perl python3
automake libtool flex; \
+ yum install -y java-1.8.0-openjdk-devel patch git perl python3 automake
libtool flex; \
dnf -y --enablerepo=powertools install autoconf-archive ninja-build; \
pip3 install --upgrade pip; \
pip3 install cmake; \
rpm -qa | grep tzdata; \
dnf clean all; \
git clone --depth=1 https://github.com/apache/incubator-gluten
/opt/gluten; \
- cd /opt/gluten; bash .github/workflows/util/setup-helper.sh install_maven;
\
- export PATH=/usr/lib/maven/bin:$PATH; \
mkdir -p ${VCPKG_PATH}; \
echo "Build arrow, then install the native libs to system paths and jar
package to .m2/ directory."; \
if [ "$(uname -m)" = "aarch64" ]; then \
@@ -46,6 +43,6 @@ RUN set -ex; \
cd /opt/gluten; \
bash ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=ON
--enable_s3=ON --enable_gcs=ON \
--enable_hdfs=ON --enable_abfs=ON
build_arrow; \
- mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta -Pspark-3.5
-DskipTests; \
+ ./build/mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta
-Pspark-3.5 -DskipTests; \
rm -rf /opt/gluten; \
rm -rf /root/.cache/ccache;
diff --git a/dev/docker/Dockerfile.centos8-static-build
b/dev/docker/Dockerfile.centos8-static-build
index 3007462126..40884d82dd 100644
--- a/dev/docker/Dockerfile.centos8-static-build
+++ b/dev/docker/Dockerfile.centos8-static-build
@@ -18,7 +18,6 @@ FROM centos:8
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
ENV PATH=$JAVA_HOME/bin:$PATH
-ENV PATH=${PATH}:/usr/lib/maven/bin
# An actual path used for vcpkg cache.
ENV VCPKG_PATH=/var/cache/vcpkg
@@ -31,7 +30,7 @@ RUN set -ex; \
yum update -y && yum install -y epel-release sudo dnf && yum install -y
ccache; \
dnf install -y --setopt=install_weak_deps=False gcc-toolset-11; \
echo "check_certificate = off" >> ~/.wgetrc; \
- yum install -y java-1.8.0-openjdk-devel patch wget git perl; \
+ yum install -y java-1.8.0-openjdk-devel patch git perl; \
rpm -qa | grep tzdata; \
dnf clean all; \
git clone --depth=1 https://github.com/apache/incubator-gluten
/opt/gluten; \
@@ -47,6 +46,6 @@ RUN set -ex; \
export SSL_VERIFY=false; \
bash ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=ON
--enable_s3=ON --enable_gcs=ON \
--enable_hdfs=ON --enable_abfs=ON
build_arrow; \
- mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta -Pspark-3.5
-DskipTests; \
+ ./build/mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta
-Pspark-3.5 -DskipTests; \
rm -rf /opt/gluten; \
rm -rf /root/.cache/ccache;
diff --git a/dev/docker/Dockerfile.centos9-dynamic-build
b/dev/docker/Dockerfile.centos9-dynamic-build
index c4e74d6b7c..9e9b8c485d 100644
--- a/dev/docker/Dockerfile.centos9-dynamic-build
+++ b/dev/docker/Dockerfile.centos9-dynamic-build
@@ -22,7 +22,6 @@ ARG JAVA_VERSION=1.8.0
ENV JAVA_HOME=/usr/lib/jvm/java-${JAVA_VERSION}-openjdk
ENV PATH=$JAVA_HOME/bin:$PATH
-ENV PATH=${PATH}:/usr/lib/maven/bin
RUN set -ex; \
@@ -30,14 +29,7 @@ RUN set -ex; \
dnf install -y --setopt=install_weak_deps=False gcc-toolset-12
gcc-toolset-13; \
echo "check_certificate = off" >> ~/.wgetrc; \
yum install -y java-${JAVA_VERSION}-openjdk-devel patch wget git perl; \
- maven_version=3.9.12; \
- local_binary="apache-maven-${maven_version}-bin.tar.gz"; \
mirror_host="https://www.apache.org/dyn/closer.lua"; \
-
url="${mirror_host}/maven/maven-3/${maven_version}/binaries/${local_binary}?action=download";
\
- wget -nv -O ${local_binary} ${url}; \
- tar -xvf ${local_binary}; \
- mv apache-maven-${maven_version} /usr/lib/maven; \
- rm -rf ${local_binary}; \
wget -nv
${mirror_host}/celeborn/celeborn-0.5.4/apache-celeborn-0.5.4-bin.tgz?action=download
-O /opt/apache-celeborn-0.5.4-bin.tgz; \
wget -nv
${mirror_host}/celeborn/celeborn-0.6.1/apache-celeborn-0.6.1-bin.tgz?action=download
-O /opt/apache-celeborn-0.6.1-bin.tgz; \
wget -nv
${mirror_host}/uniffle/0.10.0/apache-uniffle-0.10.0-bin.tar.gz?action=download
-O /opt/apache-uniffle-0.10.0-bin.tar.gz; \
@@ -55,7 +47,7 @@ RUN set -ex; \
cd /opt/gluten; \
source /opt/rh/gcc-toolset-12/enable; \
./dev/builddeps-veloxbe.sh --run_setup_script=ON build_arrow; \
- mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta -Pspark-3.5
-DskipTests; \
+ ./build/mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta
-Pspark-3.5 -DskipTests; \
dnf clean all; \
rm -rf /opt/gluten; \
rm -rf /root/.cache/ccache;
diff --git a/dev/docker/Dockerfile.centos9-static-build
b/dev/docker/Dockerfile.centos9-static-build
index 3e8ff26309..43e7cac088 100644
--- a/dev/docker/Dockerfile.centos9-static-build
+++ b/dev/docker/Dockerfile.centos9-static-build
@@ -18,7 +18,6 @@ FROM quay.io/centos/centos:stream9
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk
ENV PATH=$JAVA_HOME/bin:$PATH
-ENV PATH=${PATH}:/usr/lib/maven/bin
# An actual path used for vcpkg cache.
ENV VCPKG_PATH=/var/cache/vcpkg
@@ -29,7 +28,7 @@ RUN set -ex; \
yum update -y && yum install -y epel-release sudo dnf && yum install -y
ccache; \
dnf install -y --setopt=install_weak_deps=False gcc-toolset-12
gcc-toolset-13; \
echo "check_certificate = off" >> ~/.wgetrc; \
- yum install -y java-17-openjdk-devel patch wget git perl; \
+ yum install -y java-17-openjdk-devel patch git perl; \
dnf clean all; \
git clone --depth=1 https://github.com/apache/incubator-gluten
/opt/gluten; \
cd /opt/gluten && bash ./dev/vcpkg/setup-build-depends.sh; \
@@ -43,6 +42,6 @@ RUN set -ex; \
cd /opt/gluten; \
bash ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=ON
--enable_s3=ON --enable_gcs=ON \
--enable_hdfs=ON --enable_abfs=ON
build_arrow; \
- mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta -Pspark-3.5
-DskipTests; \
+ ./build/mvn dependency:go-offline -Pbackends-velox -Piceberg -Pdelta
-Pspark-3.5 -DskipTests; \
rm -rf /opt/gluten; \
rm -rf /root/.cache/ccache;
diff --git a/dev/docker/cudf/Dockerfile b/dev/docker/cudf/Dockerfile
index f8b05e75fb..41598046ac 100644
--- a/dev/docker/cudf/Dockerfile
+++ b/dev/docker/cudf/Dockerfile
@@ -20,13 +20,12 @@ ENV CUDA_ARCHITECTURES=70
ENV
LD_LIBRARY_PATH=/opt/gluten/ep/build-velox/build/velox_ep/_build/release/_deps/curl-build/lib:$LD_LIBRARY_PATH
-RUN yum install -y sudo patch maven perl && \
+RUN yum install -y sudo patch perl && \
dnf remove -y cuda-toolkit-12* && dnf install -y cuda-toolkit-13-1; \
dnf autoremove -y && dnf clean all; \
rm -rf /opt/rh/gcc-toolset-12 && ln -s /opt/rh/gcc-toolset-14
/opt/rh/gcc-toolset-12; \
ln -sf /usr/local/bin/cmake /usr/bin && \
git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten
&& \
- cd /opt/gluten/.github/workflows/util/ && \
cd /opt/gluten && \
source /opt/rh/gcc-toolset-14/enable && \
bash ./dev/buildbundle-veloxbe.sh --run_setup_script=OFF --build_arrow=ON
--spark_version=3.4 --build_tests=ON --build_benchmarks=ON --enable_gpu=ON && \
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]