This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 4e35abc48a [GLUTEN-11302][VL] Fix Gluten-GPU build by bumping to
CUDA-13.1 (#11275)
4e35abc48a is described below
commit 4e35abc48ac5b7f3549d959dbbf72b6d5aee9edc
Author: Yuan <[email protected]>
AuthorDate: Thu Dec 18 21:55:00 2025 +0800
[GLUTEN-11302][VL] Fix Gluten-GPU build by bumping to CUDA-13.1 (#11275)
Velox upgraded to CUDF-25.12, this patch fix Gluten-GPU build by:
- switch to use gcc-14
- bumping to cuda-toolkit-13.1
The new cuda-toolkit-13.1 requires larger disk spaces, so this patch also
modified GHA to clean up the disk space firstly
---------
Signed-off-by: Yuan <[email protected]>
---
.github/workflows/velox_backend_cache.yml | 100 +++++++-----------------------
.github/workflows/velox_backend_x86.yml | 30 ++++++---
cpp/velox/CMakeLists.txt | 3 +-
dev/docker/cudf/Dockerfile | 5 +-
ep/build-velox/src/build-velox.sh | 4 +-
5 files changed, 53 insertions(+), 89 deletions(-)
diff --git a/.github/workflows/velox_backend_cache.yml
b/.github/workflows/velox_backend_cache.yml
index eb060c3a90..8d0a3da0c9 100644
--- a/.github/workflows/velox_backend_cache.yml
+++ b/.github/workflows/velox_backend_cache.yml
@@ -145,9 +145,13 @@ jobs:
strategy:
matrix:
os: [ ubuntu-22.04 ]
- container: apache/gluten:centos-9-jdk8-cudf
steps:
- - uses: actions/checkout@v2
+ - name: "node-cleanup" # by default the free runner does not have enough
disk space
+ run: |
+ sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc
/opt/hostedtoolcache/CodeQL
+ sudo docker image prune --all --force
+ sudo docker builder prune -a
+ - uses: actions/checkout@v4
- name: Get Ccache
uses: actions/cache/restore@v3
with:
@@ -157,14 +161,26 @@ jobs:
ccache-centos9-release-shared-${{runner.arch}}
- name: Build Gluten shared libraries
run: |
- export CCACHE_MAXSIZE=1G
- dnf autoremove -y
+ docker run -v $GITHUB_WORKSPACE:/work -w /work
apache/gluten:centos-9-jdk8-cudf bash -c "
+ rm -rf /opt/rh/gcc-toolset-12 && ln -s /opt/rh/gcc-toolset-14
/opt/rh/gcc-toolset-12 # hack to use gcc 14, should upgrade in Velox build
script later
df -a
- rm -rf /opt/rh/gcc-toolset-12 && cp -r /opt/rh/gcc-toolset-14
/opt/rh/gcc-toolset-12 # hack to use gcc 14, should upgrade in Velox build
script later
+ dnf autoremove -y && dnf clean all
+ dnf remove -y cuda-toolkit-12* && dnf install -y cuda-toolkit-13-1
+ ls -l /usr/local/
source /opt/rh/gcc-toolset-12/enable
+
+ export CMAKE_BUILD_PARALLEL_LEVEL=4
export NUM_THREADS=4
- bash dev/builddeps-veloxbe.sh --run_setup_script=OFF
--build_arrow=OFF --build_tests=OFF --build_benchmarks=ON --enable_gpu=ON #
TODO: re-enable tests with more disk space
+ export CCACHE_MAXSIZE=1G
+ export CCACHE_DIR=/work/.ccache
+ mkdir -p /work/.ccache
+
+ cd /work
+ bash dev/builddeps-veloxbe.sh --run_setup_script=OFF
--build_arrow=OFF --build_tests=ON --build_benchmarks=ON --enable_gpu=ON #
TODO: re-enable tests with more disk space
rm -rf ep/build-velox/build/velox_ep
+ mvn clean package -Pbackends-velox -Pspark-3.4 -DskipTests
+ ccache -s
+ "
- name: Save Ccache
if: always()
uses: actions/cache/save@v3
@@ -172,75 +188,3 @@ jobs:
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos9-release-shared-${{runner.arch}}-${{github.sha}}
-
- # ccache-native-lib-ubuntu-velox-ut:
- # runs-on: ubuntu-22.04
- # env:
- # CCACHE_DIR: "${{ github.workspace }}/.ccache"
- # container: ghcr.io/facebookincubator/velox-dev:amd64-ubuntu-22.04-avx
- # steps:
- # - uses: actions/checkout@v2
- # - name: Get Ccache
- # uses: actions/cache/restore@v3
- # with:
- # path: '${{ env.CCACHE_DIR }}'
- # key: ccache-ubuntu-release-default
- # - name: Ensure Cache Dirs Exists
- # working-directory: ${{ github.workspace }}
- # run: |
- # mkdir -p '${{ env.CCACHE_DIR }}'
- # - name: Build Gluten native libraries
- # run: |
- # rm -rf /opt/miniconda-for-velox/
- # cd ep/build-velox/src && \
- # ./get-velox.sh
- # cd ../build/velox_ep/
- # make EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_PARQUET=ON
-DVELOX_BUILD_TESTING=ON -DVELOX_BUILD_TEST_UTILS=ON"
-
- # - name: CCache after
- # run: |
- # ccache -vs
-
- # - uses: actions/cache/save@v3
- # with:
- # path: '${{ env.CCACHE_DIR }}'
- # key: ccache-ubuntu-release-default
-# ccache-native-lib-centos-velox-ut:
-# runs-on: ubuntu-22.04
-# env:
-# CCACHE_DIR: "${{ github.workspace }}/.ccache"
-# container: ghcr.io/facebookincubator/velox-dev:centos8
-# steps:
-# - uses: actions/checkout@v2
-# - name: Setup java and maven
-# run: |
-# yum install sudo patch java-1.8.0-openjdk-devel wget -y && \
-# wget
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-# tar -xvf apache-maven-3.8.8-bin.tar.gz
-# mv apache-maven-3.8.8 /usr/lib/maven
-# - name: Get Ccache
-# uses: actions/cache/restore@v3
-# with:
-# path: '${{ env.CCACHE_DIR }}'
-# key: ccache-centos-release-default
-# - name: Ensure Cache Dirs Exists
-# working-directory: ${{ github.workspace }}
-# run: |
-# mkdir -p '${{ env.CCACHE_DIR }}'
-# - name: Build Gluten native libraries
-# run: |
-# rm -rf /opt/miniconda-for-velox/
-# cd ep/build-velox/src && \
-# ./get-velox.sh
-# cd ../build/velox_ep/
-# source /opt/rh/gcc-toolset-9/enable
-# make EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_PARQUET=ON
-DVELOX_BUILD_TESTING=ON -DVELOX_BUILD_TEST_UTILS=ON"
-#
-# - name: CCache after
-# run: |
-# ccache -s
-#
-# - uses: actions/cache/save@v3
-# with:
-# path: '${{ env.CCACHE_DIR }}'
-# key: ccache-centos-release-default
diff --git a/.github/workflows/velox_backend_x86.yml
b/.github/workflows/velox_backend_x86.yml
index 5618aba857..1bcf1f97ef 100644
--- a/.github/workflows/velox_backend_x86.yml
+++ b/.github/workflows/velox_backend_x86.yml
@@ -1314,9 +1314,15 @@ jobs:
build-cudf-centos-9:
runs-on: ubuntu-22.04
- container: apache/gluten:centos-9-jdk8-cudf
steps:
- - uses: actions/checkout@v2
+ - name: "node-cleanup" # by default the free runner does not have enough
disk space
+ run: |
+ sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc
/opt/hostedtoolcache/CodeQL
+ sudo docker image prune --all --force
+ sudo docker builder prune -a
+ - run: df -h | sort -k 5 -nr # check disk space for debug
+
+ - uses: actions/checkout@v4
- name: Get Ccache
uses: actions/cache/restore@v4
with:
@@ -1326,15 +1332,25 @@ jobs:
ccache-centos9-release-shared-${{runner.arch}}
- name: Build Gluten native libraries
run: |
- rm -rf /opt/rh/gcc-toolset-12 && cp -r /opt/rh/gcc-toolset-14
/opt/rh/gcc-toolset-12 # hack to use gcc 14, should upgrade in Velox build
script later
- dnf autoremove -y
+ docker run -v $GITHUB_WORKSPACE:/work -w /work
apache/gluten:centos-9-jdk8-cudf bash -c "
+ rm -rf /opt/rh/gcc-toolset-12 && ln -s /opt/rh/gcc-toolset-14
/opt/rh/gcc-toolset-12 # hack to use gcc 14, should upgrade in Velox build
script later
df -a
+ dnf autoremove -y && dnf clean all
+ dnf remove -y cuda-toolkit-12* && dnf install -y cuda-toolkit-13-1
+ ls -l /usr/local/
source /opt/rh/gcc-toolset-12/enable
+
+ export CMAKE_BUILD_PARALLEL_LEVEL=4
export NUM_THREADS=4
- # bash dev/builddeps-veloxbe.sh --run_setup_script=OFF
--build_arrow=OFF --build_tests=OFF --build_benchmarks=ON --enable_gpu=ON #
TODO: re-enable tests with more disk space
- # rm -rf ep/build-velox/build/velox_ep
- # mvn clean package -Pbackends-velox -Pspark-3.4 -DskipTests
+ export CCACHE_DIR=/work/.ccache
+ mkdir -p /work/.ccache
+
+ cd /work
+ bash dev/builddeps-veloxbe.sh --run_setup_script=OFF
--build_arrow=OFF --build_tests=ON --build_benchmarks=ON --enable_gpu=ON #
TODO: re-enable tests with more disk space
+ rm -rf ep/build-velox/build/velox_ep
+ mvn clean package -Pbackends-velox -Pspark-3.4 -DskipTests
ccache -s
+ "
spark-test-spark40:
needs: build-native-lib-centos-7
diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt
index e389113c4e..9547cde0e2 100644
--- a/cpp/velox/CMakeLists.txt
+++ b/cpp/velox/CMakeLists.txt
@@ -439,6 +439,7 @@ if(ENABLE_GPU)
${VELOX_BUILD_PATH}/_deps/nvtx3-src/c/include
${VELOX_BUILD_PATH}/_deps/nvcomp_proprietary_binary-src/include
${VELOX_BUILD_PATH}/_deps/rapids_logger-src/include
+ /usr/local/cuda/include/cccl
/usr/local/cuda/include)
target_compile_definitions(
@@ -458,7 +459,7 @@ if(ENABLE_GPU)
${VELOX_BUILD_PATH}/_deps/nvcomp_proprietary_binary-src/lib64/libnvcomp.so
${VELOX_BUILD_PATH}/_deps/nvcomp_proprietary_binary-src/lib64/libnvcomp_cpu.so
${VELOX_BUILD_PATH}/_deps/rapids_logger-build/librapids_logger.so
- /usr/local/cuda-12.8/lib64/libcudart.so.12)
+ /usr/local/cuda/lib64/libcudart.so)
endif()
add_custom_command(
diff --git a/dev/docker/cudf/Dockerfile b/dev/docker/cudf/Dockerfile
index 42258a69c9..c900c632ea 100644
--- a/dev/docker/cudf/Dockerfile
+++ b/dev/docker/cudf/Dockerfile
@@ -28,7 +28,10 @@ ENV CUDA_ARCHITECTURES=70
WORKDIR /opt/gluten
-RUN rm -rf /opt/rh/gcc-toolset-12 && cp -r /opt/rh/gcc-toolset-14
/opt/rh/gcc-toolset-12; \
+RUN rm -rf /opt/rh/gcc-toolset-12 && ln -s /opt/rh/gcc-toolset-14
/opt/rh/gcc-toolset-12; \
+ dnf remove -y cuda-toolkit-12* && dnf install -y cuda-toolkit-13-1; \
+ dnf autoremove -y && dnf clean all; \
+ source /opt/rh/gcc-toolset-12/enable; \
bash ./dev/buildbundle-veloxbe.sh --run_setup_script=OFF --build_arrow=ON
--spark_version=3.4 --build_tests=ON --build_benchmarks=ON --enable_gpu=ON &&
rm -rf /opt/gluten
# You can try the data in folder
backends-velox/src/test/resources/tpch-data-parquet
diff --git a/ep/build-velox/src/build-velox.sh
b/ep/build-velox/src/build-velox.sh
index 3e0f6be4fb..ac62f8fc27 100755
--- a/ep/build-velox/src/build-velox.sh
+++ b/ep/build-velox/src/build-velox.sh
@@ -134,8 +134,8 @@ function compile {
if [ $ENABLE_GPU == "ON" ]; then
# the cuda default options are for Centos9 image from Meta
echo "enable GPU support."
- COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_GPU=ON
-DVELOX_ENABLE_CUDF=ON -DCMAKE_CUDA_ARCHITECTURES=70 \
- -DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.8/bin/nvcc"
+ COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_GPU=ON
-DVELOX_ENABLE_CUDF=ON -DCMAKE_CUDA_ARCHITECTURES=75 \
+ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc"
fi
if [ -n "${GLUTEN_VCPKG_ENABLED:-}" ]; then
COMPILE_OPTION="$COMPILE_OPTION -DVELOX_GFLAGS_TYPE=static"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]