This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch branch-1.2
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/branch-1.2 by this push:
     new cdc058cda6 [VL] Port CI changes to branch-1.2 and pick simdjson 
related fix (#7314)
cdc058cda6 is described below

commit cdc058cda6fc5c76daabd41b6c2e762f5dfb61fe
Author: PHILO-HE <[email protected]>
AuthorDate: Tue Sep 24 10:21:17 2024 +0800

    [VL] Port CI changes to branch-1.2 and pick simdjson related fix (#7314)
---
 .github/workflows/util/install_spark_resources.sh  |  93 ++++
 .github/workflows/util/setup_helper.sh             |  32 ++
 .../{velox_docker.yml => velox_backend.yml}        | 554 ++++++++-------------
 dev/ci-velox-buildshared-centos-8.sh               |   7 +
 dev/ci-velox-buildstatic-centos-7.sh               |   8 +
 dev/vcpkg/ports/simdjson/vcpkg.json                |   3 +-
 6 files changed, 345 insertions(+), 352 deletions(-)

diff --git a/.github/workflows/util/install_spark_resources.sh 
b/.github/workflows/util/install_spark_resources.sh
new file mode 100755
index 0000000000..e1645b170d
--- /dev/null
+++ b/.github/workflows/util/install_spark_resources.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Download Spark resources, required by some Spark UTs. The resource path 
should be set
+# for spark.test.home in mvn test.
+
+set -e
+
+INSTALL_DIR=$GITHUB_WORKSPACE
+case "$1" in
+3.2)
+    # Spark-3.2
+    cd ${INSTALL_DIR} && \
+    wget -nv 
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz 
&& \
+    tar --strip-components=1 -xf spark-3.2.2-bin-hadoop3.2.tgz 
spark-3.2.2-bin-hadoop3.2/jars/ && \
+    rm -rf spark-3.2.2-bin-hadoop3.2.tgz && \
+    mkdir -p 
${INSTALL_DIR}/shims/spark32/spark_home/assembly/target/scala-2.12 && \
+    mv jars ${INSTALL_DIR}/shims/spark32/spark_home/assembly/target/scala-2.12 
&& \
+    wget -nv https://github.com/apache/spark/archive/refs/tags/v3.2.2.tar.gz 
&& \
+    tar --strip-components=1 -xf v3.2.2.tar.gz 
spark-3.2.2/sql/core/src/test/resources/  && \
+    mkdir -p shims/spark32/spark_home/ && \
+    mv sql shims/spark32/spark_home/
+    ;;
+3.3)
+    # Spark-3.3
+    cd ${INSTALL_DIR} && \
+    wget -nv 
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz 
&& \
+    tar --strip-components=1 -xf spark-3.3.1-bin-hadoop3.tgz 
spark-3.3.1-bin-hadoop3/jars/ && \
+    rm -rf spark-3.3.1-bin-hadoop3.tgz && \
+    mkdir -p 
${INSTALL_DIR}/shims/spark33/spark_home/assembly/target/scala-2.12 && \
+    mv jars ${INSTALL_DIR}/shims/spark33/spark_home/assembly/target/scala-2.12 
&& \
+    wget -nv https://github.com/apache/spark/archive/refs/tags/v3.3.1.tar.gz 
&& \
+    tar --strip-components=1 -xf v3.3.1.tar.gz 
spark-3.3.1/sql/core/src/test/resources/  && \
+    mkdir -p shims/spark33/spark_home/ && \
+    mv sql shims/spark33/spark_home/
+    ;;
+3.4)
+    # Spark-3.4
+    cd ${INSTALL_DIR} && \
+    wget -nv 
https://archive.apache.org/dist/spark/spark-3.4.2/spark-3.4.2-bin-hadoop3.tgz 
&& \
+    tar --strip-components=1 -xf spark-3.4.2-bin-hadoop3.tgz 
spark-3.4.2-bin-hadoop3/jars/ && \
+    rm -rf spark-3.4.2-bin-hadoop3.tgz && \
+    mkdir -p 
${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 && \
+    mv jars ${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 
&& \
+    wget -nv https://github.com/apache/spark/archive/refs/tags/v3.4.2.tar.gz 
&& \
+    tar --strip-components=1 -xf v3.4.2.tar.gz 
spark-3.4.2/sql/core/src/test/resources/  && \
+    mkdir -p shims/spark34/spark_home/ && \
+    mv sql shims/spark34/spark_home/
+    ;;
+3.5)
+    # Spark-3.5
+    cd ${INSTALL_DIR} && \
+    wget -nv 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz 
&& \
+    tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz 
spark-3.5.1-bin-hadoop3/jars/ && \
+    rm -rf spark-3.5.1-bin-hadoop3.tgz && \
+    mkdir -p 
${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.12 && \
+    mv jars ${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.12 
&& \
+    wget -nv https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz 
&& \
+    tar --strip-components=1 -xf v3.5.1.tar.gz 
spark-3.5.1/sql/core/src/test/resources/  && \
+    mkdir -p shims/spark35/spark_home/ && \
+    mv sql shims/spark35/spark_home/
+    ;;
+3.5-scala2.13)
+    # Spark-3.5, scala 2.13
+    cd ${INSTALL_DIR} && \
+    wget -nv 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz 
&& \
+    tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz 
spark-3.5.1-bin-hadoop3/jars/ && \
+    rm -rf spark-3.5.1-bin-hadoop3.tgz && \
+    mkdir -p 
${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.13 && \
+    mv jars ${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.13 
&& \
+    wget -nv https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz 
&& \
+    tar --strip-components=1 -xf v3.5.1.tar.gz 
spark-3.5.1/sql/core/src/test/resources/  && \
+    mkdir -p shims/spark35/spark_home/ && \
+    mv sql shims/spark35/spark_home/
+    ;;
+*)
+    echo "Spark version is expected to be specified."
+    exit 1
+    ;;
+esac
diff --git a/.github/workflows/util/setup_helper.sh 
b/.github/workflows/util/setup_helper.sh
new file mode 100644
index 0000000000..8b41d83264
--- /dev/null
+++ b/.github/workflows/util/setup_helper.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+function install_maven {
+  (
+    cd /opt/
+    wget -nv 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+    tar -xvf apache-maven-3.8.8-bin.tar.gz && mv apache-maven-3.8.8 
/usr/lib/maven
+  )
+  echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
+}
+
+for cmd in "$@"
+do
+    echo "Running: $cmd"
+    "$cmd"
+done
diff --git a/.github/workflows/velox_docker.yml 
b/.github/workflows/velox_backend.yml
similarity index 66%
rename from .github/workflows/velox_docker.yml
rename to .github/workflows/velox_backend.yml
index a5778fc8d4..3b4cc56d45 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_backend.yml
@@ -18,7 +18,7 @@ name: Velox backend Github Runner
 on:
   pull_request:
     paths:
-      - '.github/workflows/velox_docker.yml'
+      - '.github/workflows/velox_backend.yml'
       - 'pom.xml'
       - 'backends-velox/**'
       - 'gluten-uniffle/**'
@@ -27,13 +27,14 @@ on:
       - 'gluten-celeborn/velox/**'
       - 'gluten-ras/**'
       - 'gluten-core/**'
-      - 'gluten-data/**'
+      - 'gluten-substrait/**'
+      - 'gluten-arrow/**'
       - 'gluten-delta/**'
       - 'gluten-iceberg/**'
+      - 'gluten-hudi/**'
       - 'gluten-ut/**'
       - 'shims/**'
       - 'tools/gluten-it/**'
-      - 'tools/gluten-te/**'
       - 'ep/build-velox/**'
       - 'cpp/*'
       - 'cpp/CMake/**'
@@ -44,6 +45,8 @@ on:
 env:
   ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
   MVN_CMD: 'mvn -ntp'
+  WGET_CMD: 'wget -nv'
+  SETUP: 'bash .github/workflows/util/setup_helper.sh'
 
 concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ 
github.workflow }}
@@ -52,34 +55,33 @@ concurrency:
 jobs:
   build-native-lib-centos-7:
     runs-on: ubuntu-20.04
-    container: apache/gluten:gluten-vcpkg-builder_2024_08_05 # centos7 with 
dependencies installed
+    container: apache/gluten:vcpkg-centos-7
     steps:
       - uses: actions/checkout@v2
       - name: Generate cache key
         run: |
-          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./github/workflows/*') }} > cache-key
+          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./.github/workflows/*') }} > cache-key
       - name: Cache
         id: cache
         uses: actions/cache/restore@v3
         with:
           path: |
             ./cpp/build/releases/
-            ~/.m2/repository/org/apache/arrow/
-          key: cache-velox-build-${{ hashFiles('./cache-key') }}
-      - name: Build Gluten Velox third party
+          key: cache-velox-build-centos-7-${{ hashFiles('./cache-key') }}
+      - name: Build Gluten native libraries
         if: ${{ steps.cache.outputs.cache-hit != 'true' }}
         run: |
-          source dev/ci-velox-buildstatic.sh
-      - name: Upload Artifact Native
-        uses: actions/upload-artifact@v3
+          df -a
+          cd $GITHUB_WORKSPACE/
+          bash dev/ci-velox-buildstatic-centos-7.sh
+      - uses: actions/upload-artifact@v3
         with:
-          path: ./cpp/build/releases/
           name: velox-native-lib-centos-7-${{github.sha}}
-      - name: Upload Artifact Arrow Jar
-        uses: actions/upload-artifact@v3
+          path: ./cpp/build/releases/
+      - uses: actions/upload-artifact@v3
         with:
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
-          name: velox-arrow-jar-centos-7-${{github.sha}}
 
   run-tpc-test-ubuntu:
     needs: build-native-lib-centos-7
@@ -119,7 +121,7 @@ jobs:
       - name: Download All Arrow Jar Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Setup tzdata
         run: |
@@ -139,7 +141,7 @@ jobs:
             apt remove openjdk-11* -y
           fi
           ls -l 
/root/.m2/repository/org/apache/arrow/arrow-dataset/15.0.0-gluten/
-      - name: Build and run TPCH/DS
+      - name: Build and run TPC-H / TPC-DS
         run: |
           cd $GITHUB_WORKSPACE/
           export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64
@@ -190,7 +192,7 @@ jobs:
       - name: Download All Arrow Jar Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Update mirror list
         run: |
@@ -207,12 +209,9 @@ jobs:
           else
             yum update -y && yum install -y java-1.8.0-openjdk-devel wget
           fi
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
+          $SETUP install_maven
       - name: Set environment variables
         run: |
-          echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
           if [ "${{ matrix.java }}" = "java-17" ]; then
             echo "JAVA_HOME=/usr/lib/jvm/java-17-openjdk" >> $GITHUB_ENV
           elif [ "${{ matrix.java }}" = "java-11" ]; then
@@ -246,6 +245,48 @@ jobs:
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
             --extra-conf=spark.gluten.ras.enabled=true 
 
+  run-tpc-test-ubuntu-iothreads:
+    needs: build-native-lib-centos-7
+    strategy:
+      fail-fast: false
+      matrix:
+        spark: [ "spark-3.5" ]
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v2
+      - name: Download All Native Artifacts
+        uses: actions/download-artifact@v3
+        with:
+          name: velox-native-lib-centos-7-${{github.sha}}
+          path: ./cpp/build/releases/
+      - name: Download All Arrow Jar Artifacts
+        uses: actions/download-artifact@v3
+        with:
+          name: arrow-jars-centos-7-${{github.sha}}
+          path: /home/runner/.m2/repository/org/apache/arrow/
+      - name: Setup java and maven
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y openjdk-8-jdk maven
+      - name: Set environment variables
+        run: |
+          echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV
+      - name: Build for Spark ${{ matrix.spark }}
+        run: |
+          cd $GITHUB_WORKSPACE/ 
+          $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox 
-DskipTests
+          cd $GITHUB_WORKSPACE/tools/gluten-it
+          $MVN_CMD clean install -P${{ matrix.spark }}
+      - name: Build and run TPC-H / TPC-DS
+        run: |
+          cd $GITHUB_WORKSPACE/tools/gluten-it
+          GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
+            --local --preset=velox --benchmark-type=h --error-on-memleak 
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
+            --extra-conf=spark.gluten.sql.columnar.backend.velox.IOThreads=16
+          GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
+            --local --preset=velox --benchmark-type=ds --error-on-memleak 
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
+            --extra-conf=spark.gluten.sql.columnar.backend.velox.IOThreads=16
+
   run-tpc-test-ubuntu-oom:
     needs: build-native-lib-centos-7
     strategy:
@@ -275,7 +316,7 @@ jobs:
       - name: Download All Arrow Jar Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /home/runner/.m2/repository/org/apache/arrow/
       - name: Setup java and maven
         run: |
@@ -296,7 +337,7 @@ jobs:
           cd tools/gluten-it \
           && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
--queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
-            --skip-data-gen -m=OffHeapExecutionMemory \
+            -m=OffHeapExecutionMemory \
             -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
             -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
             -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
@@ -308,55 +349,59 @@ jobs:
           cd tools/gluten-it \
           && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
--queries=q67 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
-            --skip-data-gen -m=OffHeapExecutionMemory \
+            -m=OffHeapExecutionMemory \
             
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1
 \
             -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
             -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
             
-d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \
             -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5
       - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q95 low 
memory, memory isolation on
+        continue-on-error: true
         run: |
           cd tools/gluten-it \
           && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
--queries=q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
-            --skip-data-gen -m=OffHeapExecutionMemory \
+            -m=OffHeapExecutionMemory \
             
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1
 \
             -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
             -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
             
-d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \
-            
-d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 || true
+            -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5
       - name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory
         run: |
           cd tools/gluten-it \
           && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
--queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 
\
-            --skip-data-gen -m=OffHeapExecutionMemory \
+            -m=OffHeapExecutionMemory \
             -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
             -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
             
-d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
 \
             
-d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
 \
             
-d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
-      - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low 
memory, memory isolation on # Disabled as error 
https://gist.github.com/zhztheplayer/abd5e83ccdc48730678ae7ebae479fcc
+      - name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory, 
memory isolation on
         run: |
           cd tools/gluten-it \
           && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
--queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 
\
-            --skip-data-gen -m=OffHeapExecutionMemory \
+            -m=OffHeapExecutionMemory \
             
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1
 \
             -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
             
-d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
 \
             
-d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
 \
-            
-d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
 || true
+            
-d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
       - name: TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory
         run: |
           cd tools/gluten-it \
           && GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
--queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
-            --skip-data-gen -m=OffHeapExecutionMemory \
+            -m=OffHeapExecutionMemory \
+            --extra-conf=spark.gluten.sql.columnar.backend.velox.IOThreads=0 \
             -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
             
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1
 \
             -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
-            -d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g || true
+            -d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g \
+            
-d=IO_THREADS:12,spark.gluten.sql.columnar.backend.velox.IOThreads=12 \
+            -d=IO_THREADS:0,spark.gluten.sql.columnar.backend.velox.IOThreads=0
 
   run-tpc-test-ubuntu-randomkill:
     needs: build-native-lib-centos-7
@@ -387,7 +432,7 @@ jobs:
       - name: Download All Arrow Jar Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /home/runner/.m2/repository/org/apache/arrow/
       - name: Setup java and maven
         run: |
@@ -408,59 +453,7 @@ jobs:
           cd tools/gluten-it \
           && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries \
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
-s=30.0  --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 
\
-            --skip-data-gen  --random-kill-tasks --no-session-reuse
-
-  # run-tpc-test-ubuntu-sf30:
-  #   needs: build-native-lib-centos-7
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       spark: [ "spark-3.4" ]
-  #       shard: [ "1/4", "2/4", "3/4", "4/4" ]
-  #   runs-on: ubuntu-20.04
-  #   steps:
-  #     - name: Maximize build disk space
-  #       shell: bash
-  #       run: |
-  #         df -h
-  #         set -euo pipefail
-  #         echo "Removing unwanted software... "
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo rm -rf /opt/ghc
-  #         sudo rm -rf /opt/hostedtoolcache/CodeQL
-  #         sudo docker image prune --all --force > /dev/null
-  #         df -h
-  #     - uses: actions/checkout@v2
-  #     - name: Download All Artifacts
-  #       uses: actions/download-artifact@v3
-  #       with:
-  #         name: velox-native-lib-centos-7-${{github.sha}}
-  #         path: ./cpp/build/releases
-  #     - name: Setup java and maven
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install -y openjdk-8-jdk maven
-  #     - name: Set environment variables
-  #       run: |
-  #         echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV
-  #     - name: Build for Spark ${{ matrix.spark }}
-  #       run: |
-  #         cd $GITHUB_WORKSPACE/ 
-  #         $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox 
-DskipTests
-  #         cd $GITHUB_WORKSPACE/tools/gluten-it
-  #         $MVN_CMD clean install -P${{ matrix.spark }}
-  #         GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local 
--benchmark-type=h -s=30.0 --threads=12
-  #         GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local 
--benchmark-type=ds -s=30.0 --threads=12
-  #     - name: TPC-H / TPC-DS SF30.0 Parquet local ${{ matrix.spark }}
-  #       run: |
-  #         cd tools/gluten-it \
-  #         && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
-  #           --local --preset=velox --benchmark-type=h --error-on-memleak 
-s=30.0  --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 
\
-  #           --skip-data-gen --shard=${{ matrix.shard }} \
-  #         && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
-  #           --local --preset=velox --benchmark-type=ds --error-on-memleak 
-s=30.0  --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 
\
-  #           --skip-data-gen --shard=${{ matrix.shard }}
+            --random-kill-tasks --no-session-reuse
 
   run-tpc-test-centos8-uniffle:
     needs: build-native-lib-centos-7
@@ -480,7 +473,7 @@ jobs:
       - name: Download All Arrow Jar Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Update mirror list
         run: |
@@ -489,9 +482,7 @@ jobs:
       - name: Setup java and maven
         run: |
           yum update -y && yum install -y java-1.8.0-openjdk-devel wget git
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
+          $SETUP install_maven
       - name: Build for Spark ${{ matrix.spark }}
         run: |
           cd $GITHUB_WORKSPACE/ && \
@@ -500,8 +491,6 @@ jobs:
           $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox 
-Puniffle -DskipTests
       - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 with uniffle 
0.8.0
         run: |
-          export MAVEN_HOME=/usr/lib/maven && \
-          export PATH=${PATH}:${MAVEN_HOME}/bin && \
           export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk && \
           cd /opt && \
           git clone -b branch-0.8 
https://github.com/apache/incubator-uniffle.git && \
@@ -511,9 +500,9 @@ jobs:
           sed -i '226d' 
./server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java && \
           $MVN_CMD clean install -Phadoop2.8 -DskipTests
           cd /opt && \
-          wget -nv 
https://archive.apache.org/dist/incubator/uniffle/0.8.0/apache-uniffle-0.8.0-incubating-bin.tar.gz
 && \
+          ${WGET_CMD} 
https://archive.apache.org/dist/incubator/uniffle/0.8.0/apache-uniffle-0.8.0-incubating-bin.tar.gz
 && \
           tar xzf apache-uniffle-0.8.0-incubating-bin.tar.gz -C /opt/ && mv 
/opt/rss-0.8.0-hadoop2.8 /opt/uniffle && \
-          wget -nv 
https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz 
&& \
+          ${WGET_CMD} 
https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz 
&& \
           tar xzf hadoop-2.8.5.tar.gz -C /opt/
           rm -f /opt/uniffle/jars/server/shuffle-server-0.8.0-SNAPSHOT.jar
           cp 
/opt/incubator-uniffle/server/target/shuffle-server-0.8.1-SNAPSHOT.jar 
/opt/uniffle/jars/server/
@@ -546,7 +535,7 @@ jobs:
       - name: Download All Arrow Jar Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-arrow-jar-centos-7-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Setup tzdata
         run: |
@@ -571,7 +560,7 @@ jobs:
           fi
           echo "EXTRA_PROFILE: ${EXTRA_PROFILE}"
           cd /opt && mkdir -p celeborn && \
-          wget https://archive.apache.org/dist/celeborn/${{ matrix.celeborn 
}}/apache-${{ matrix.celeborn }}-bin.tgz && \
+          ${WGET_CMD} https://archive.apache.org/dist/celeborn/${{ 
matrix.celeborn }}/apache-${{ matrix.celeborn }}-bin.tgz && \
           tar xzf apache-${{ matrix.celeborn }}-bin.tgz -C /opt/celeborn 
--strip-components=1 && cd celeborn && \
           mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \
           bash -c "echo -e 
'CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g'
 > ./conf/celeborn-env.sh" && \
@@ -583,88 +572,21 @@ jobs:
           GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
             --local --preset=velox-with-celeborn --benchmark-type=ds 
--error-on-memleak --off-heap-size=10g -s=1.0 --threads=8 --iterations=1
 
-  build-native-lib-centos-8:
-    runs-on: ubuntu-20.04
-    container: ghcr.io/facebookincubator/velox-dev:centos8
-    steps:
-      - uses: actions/checkout@v2
-      - name: Generate cache key
-        run: |
-          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./github/workflows/*') }} > cache-key
-      - name: Cache
-        id: cache
-        uses: actions/cache/restore@v3
-        with:
-          path: |
-            ./cpp/build/releases/
-            /root/.m2/repository/org/apache/arrow/
-          key: cache-velox-build-centos-8-${{ hashFiles('./cache-key') }}
-      - name: Update mirror list
-        run: |
-          sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* 
|| true
-          sed -i -e 
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" 
/etc/yum.repos.d/CentOS-* || true
-      - name: Setup build dependency
-        if: ${{ steps.cache.outputs.cache-hit != 'true' }}
-        run: |
-          yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          # Required by building arrow java.
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz && mv apache-maven-3.8.8 
/usr/lib/maven
-          echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
-      - name: Build Gluten Velox third party
-        if: ${{ steps.cache.outputs.cache-hit != 'true' }}
-        run: |
-          source /opt/rh/gcc-toolset-9/enable
-          ./dev/builddeps-veloxbe.sh --run_setup_script=OFF 
--enable_ep_cache=OFF --build_tests=ON \
-              --build_examples=ON --build_benchmarks=ON --build_protobuf=ON
-      - name: Gluten CPP Test
-        run: |
-          cd ./cpp/build && \
-          ctest -V
-      - uses: actions/upload-artifact@v3
-        with:
-          name: velox-native-lib-centos-8-${{github.sha}}
-          path: ./cpp/build/releases/
-      - uses: actions/upload-artifact@v3
-        with:
-          name: udf-example-lib-centos-8-${{github.sha}}
-          path: ./cpp/build/velox/udf/examples/
-      - uses: actions/upload-artifact@v3
-        with:
-          name: benchmark-centos-8-${{github.sha}}
-          path: ./cpp/build/velox/benchmarks/
-      - uses: actions/upload-artifact@v3
-        with:
-          name: arrow-jars-centos-8-${{github.sha}}
-          path: /root/.m2/repository/org/apache/arrow/
-
   run-spark-test-spark32:
-    needs: build-native-lib-centos-8
+    needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: ghcr.io/facebookincubator/velox-dev:centos8
     env:
       CCACHE_DIR: "${{ github.workspace }}/.ccache"
     steps:
       - uses: actions/checkout@v2
-      - name: Download All Artifacts
-        uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v3
         with:
-          name: velox-native-lib-centos-8-${{github.sha}}
+          name: velox-native-lib-centos-7-${{github.sha}}
           path: ./cpp/build/releases
-      - name: Download UDF Example Lib
-        uses: actions/download-artifact@v3
-        with:
-          name: udf-example-lib-centos-8-${{github.sha}}
-          path: ./cpp/build/velox/udf/examples/
-      - name: Download Benchmark
-        uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v3
         with:
-          name: benchmark-centos-8-${{github.sha}}
-          path: ./cpp/build/velox/benchmarks/
-      - name: Download Arrow Jars
-        uses: actions/download-artifact@v3
-        with:
-          name: arrow-jars-centos-8-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Update mirror list
         run: |
@@ -673,10 +595,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
-          echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
+          $SETUP install_maven
       - name: Get Ccache
         uses: actions/cache/restore@v3
         with:
@@ -688,17 +607,7 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.2.2 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.2.2-bin-hadoop3.2.tgz 
spark-3.2.2-bin-hadoop3.2/jars/ && \
-          rm -rf spark-3.2.2-bin-hadoop3.2.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark32/spark_home/assembly/target/scala-2.12 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark32/spark_home/assembly/target/scala-2.12 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.2.2.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.2.2.tar.gz 
spark-3.2.2/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark32/spark_home/ && \
-          mv sql shims/spark32/spark_home/ && \
+          bash .github/workflows/util/install_spark_resources.sh 3.2
           dnf module -y install python39 && \
           alternatives --set python3 /usr/bin/python3.9 && \
           pip3 install setuptools && \
@@ -708,23 +617,18 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox 
-Pceleborn -Piceberg -Pdelta 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" 
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
 && \
-          $MVN_CMD test -Pspark-3.2 -Pbackends-velox -Piceberg -Pdelta 
-DtagsToExclude=None -DtagsToInclude=org.apache.gluten.tags.UDFTest
+          $MVN_CMD clean test -Pspark-3.2 -Pspark-ut -Pbackends-velox 
-Pceleborn -Piceberg \
+          -Pdelta -Phudi 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" \
+          
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload golden files
         if: failure()
         uses: actions/upload-artifact@v4
         with:
           name: golden-files-spark32
           path: /tmp/tpch-approved-plan/**
-      - name: Gluten CPP Benchmark Test
-        run: |
-          # This test depends on example.json generated by the above mvn test.
-          cd $GITHUB_WORKSPACE/cpp/build/velox/benchmarks && \
-          sudo chmod +x ./generic_benchmark && \
-          ./generic_benchmark --run-example --with-shuffle --threads 1 
--iterations 1
 
   run-spark-test-spark32-slow:
-    needs: build-native-lib-centos-8
+    needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: ghcr.io/facebookincubator/velox-dev:centos8
     env:
@@ -734,12 +638,12 @@ jobs:
       - name: Download All Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-native-lib-centos-8-${{github.sha}}
+          name: velox-native-lib-centos-7-${{github.sha}}
           path: ./cpp/build/releases
       - name: Download Arrow Jars
         uses: actions/download-artifact@v3
         with:
-          name: arrow-jars-centos-8-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Update mirror list
         run: |
@@ -748,10 +652,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
-          echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
+          $SETUP install_maven
       - name: Get Ccache
         uses: actions/cache/restore@v3
         with:
@@ -763,18 +664,15 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.2.2 (slow tests)
         run: |
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.2.2.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.2.2.tar.gz 
spark-3.2.2/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark32/spark_home/ && \
-          mv sql shims/spark32/spark_home/
+          bash .github/workflows/util/install_spark_resources.sh 3.2
       - name: Build and run unit test for Spark 3.2.2 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox 
-Pceleborn -Piceberg -Pdelta 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" 
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
+          $MVN_CMD clean test -Pspark-3.2 -Pspark-ut -Pbackends-velox 
-Pceleborn -Piceberg -Pdelta -Phudi \
+          
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" 
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
 
   run-spark-test-spark33:
-    needs: build-native-lib-centos-8
+    needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: ghcr.io/facebookincubator/velox-dev:centos8
     env:
@@ -784,17 +682,12 @@ jobs:
       - name: Download All Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-native-lib-centos-8-${{github.sha}}
+          name: velox-native-lib-centos-7-${{github.sha}}
           path: ./cpp/build/releases
-      - name: Download UDF Example Lib
-        uses: actions/download-artifact@v3
-        with:
-          name: udf-example-lib-centos-8-${{github.sha}}
-          path: ./cpp/build/velox/udf/examples/
       - name: Download Arrow Jars
         uses: actions/download-artifact@v3
         with:
-          name: arrow-jars-centos-8-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Update mirror list
         run: |
@@ -803,10 +696,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
-          echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
+          $SETUP install_maven
       - name: Get Ccache
         uses: actions/cache/restore@v3
         with:
@@ -818,17 +708,7 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.3.1 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.3.1-bin-hadoop3.tgz 
spark-3.3.1-bin-hadoop3/jars/ && \
-          rm -rf spark-3.3.1-bin-hadoop3.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark33/spark_home/assembly/target/scala-2.12 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark33/spark_home/assembly/target/scala-2.12 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.3.1.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.3.1.tar.gz 
spark-3.3.1/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark33/spark_home/ && \
-          mv sql shims/spark33/spark_home/ && \
+          bash .github/workflows/util/install_spark_resources.sh 3.3
           dnf module -y install python39 && \
           alternatives --set python3 /usr/bin/python3.9 && \
           pip3 install setuptools && \
@@ -838,8 +718,9 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean install -Pspark-3.3 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" 
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
 && \
-          $MVN_CMD test -Pspark-3.3 -Pbackends-velox -Piceberg -Pdelta 
-DtagsToExclude=None -DtagsToInclude=org.apache.gluten.tags.UDFTest
+          $MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Phudi -Pspark-ut \
+          
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" \
+          
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload golden files
         if: failure()
         uses: actions/upload-artifact@v4
@@ -849,7 +730,7 @@ jobs:
 
 
   run-spark-test-spark33-slow:
-    needs: build-native-lib-centos-8
+    needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: ghcr.io/facebookincubator/velox-dev:centos8
     env:
@@ -859,12 +740,12 @@ jobs:
       - name: Download All Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-native-lib-centos-8-${{github.sha}}
+          name: velox-native-lib-centos-7-${{github.sha}}
           path: ./cpp/build/releases
       - name: Download Arrow Jars
         uses: actions/download-artifact@v3
         with:
-          name: arrow-jars-centos-8-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Update mirror list
         run: |
@@ -873,10 +754,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
-          echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
+          $SETUP install_maven
       - name: Get Ccache
         uses: actions/cache/restore@v3
         with:
@@ -888,18 +766,16 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.3.1 (slow tests)
         run: |
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.3.1.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.3.1.tar.gz 
spark-3.3.1/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark33/spark_home/ && \
-          mv sql shims/spark33/spark_home/
+          bash .github/workflows/util/install_spark_resources.sh 3.3
       - name: Build and Run unit test for Spark 3.3.1 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean install -Pspark-3.3 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" 
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
+          $MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Phudi -Pspark-ut \
+          
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" \
+          -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
 
   run-spark-test-spark34:
-    needs: build-native-lib-centos-8
+    needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: ghcr.io/facebookincubator/velox-dev:centos8
     env:
@@ -909,17 +785,12 @@ jobs:
       - name: Download All Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-native-lib-centos-8-${{github.sha}}
+          name: velox-native-lib-centos-7-${{github.sha}}
           path: ./cpp/build/releases
-      - name: Download UDF Example Lib
-        uses: actions/download-artifact@v3
-        with:
-          name: udf-example-lib-centos-8-${{github.sha}}
-          path: ./cpp/build/velox/udf/examples/
       - name: Download Arrow Jars
         uses: actions/download-artifact@v3
         with:
-          name: arrow-jars-centos-8-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Update mirror list
         run: |
@@ -928,10 +799,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
-          echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
+          $SETUP install_maven
       - name: Get Ccache
         uses: actions/cache/restore@v3
         with:
@@ -943,17 +811,7 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.4.2 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.4.2/spark-3.4.2-bin-hadoop3.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.4.2-bin-hadoop3.tgz 
spark-3.4.2-bin-hadoop3/jars/ && \
-          rm -rf spark-3.4.2-bin-hadoop3.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark34/spark_home/assembly/target/scala-2.12 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark34/spark_home/assembly/target/scala-2.12 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.4.2.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.4.2.tar.gz 
spark-3.4.2/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark34/spark_home/ && \
-          mv sql shims/spark34/spark_home/ && \
+          bash .github/workflows/util/install_spark_resources.sh 3.4
           dnf module -y install python39 && \
           alternatives --set python3 /usr/bin/python3.9 && \
           pip3 install setuptools && \
@@ -963,8 +821,9 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean install -Pspark-3.4 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" 
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
 && \
-          $MVN_CMD test -Pspark-3.4 -Pbackends-velox -Piceberg -Pdelta 
-DtagsToExclude=None -DtagsToInclude=org.apache.gluten.tags.UDFTest
+          $MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Phudi -Pspark-ut \
+          
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" \
+          
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload golden files
         if: failure()
         uses: actions/upload-artifact@v4
@@ -974,7 +833,7 @@ jobs:
 
 
   run-spark-test-spark34-slow:
-    needs: build-native-lib-centos-8
+    needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: ghcr.io/facebookincubator/velox-dev:centos8
     env:
@@ -984,12 +843,12 @@ jobs:
       - name: Download All Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-native-lib-centos-8-${{github.sha}}
+          name: velox-native-lib-centos-7-${{github.sha}}
           path: ./cpp/build/releases
       - name: Download Arrow Jars
         uses: actions/download-artifact@v3
         with:
-          name: arrow-jars-centos-8-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Update mirror list
         run: |
@@ -998,10 +857,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
-          echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
+          $SETUP install_maven
       - name: Get Ccache
         uses: actions/cache/restore@v3
         with:
@@ -1013,18 +869,16 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.4.2 (slow tests)
         run: |
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.4.2.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.4.2.tar.gz 
spark-3.4.2/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark34/spark_home/ && \
-          mv sql shims/spark34/spark_home/
+          bash .github/workflows/util/install_spark_resources.sh 3.4
       - name: Build and Run unit test for Spark 3.4.2 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean install -Pspark-3.4 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" 
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
+          $MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut -Phudi \
+          
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" \
+          -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
 
   run-spark-test-spark35:
-    needs: build-native-lib-centos-8
+    needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: ghcr.io/facebookincubator/velox-dev:centos8
     env:
@@ -1034,17 +888,12 @@ jobs:
       - name: Download All Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-native-lib-centos-8-${{github.sha}}
+          name: velox-native-lib-centos-7-${{github.sha}}
           path: ./cpp/build/releases
-      - name: Download UDF Example Lib
-        uses: actions/download-artifact@v3
-        with:
-          name: udf-example-lib-centos-8-${{github.sha}}
-          path: ./cpp/build/velox/udf/examples/
       - name: Download Arrow Jars
         uses: actions/download-artifact@v3
         with:
-          name: arrow-jars-centos-8-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Update mirror list
         run: |
@@ -1053,10 +902,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
-          echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
+          $SETUP install_maven
       - name: Get Ccache
         uses: actions/cache/restore@v3
         with:
@@ -1068,17 +914,7 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.5.1 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz 
spark-3.5.1-bin-hadoop3/jars/ && \
-          rm -rf spark-3.5.1-bin-hadoop3.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.5.1.tar.gz 
spark-3.5.1/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark35/spark_home/ && \
-          mv sql shims/spark35/spark_home/ && \
+          bash .github/workflows/util/install_spark_resources.sh 3.5
           dnf module -y install python39 && \
           alternatives --set python3 /usr/bin/python3.9 && \
           pip3 install setuptools && \
@@ -1088,8 +924,9 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean install -Pspark-3.5 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" 
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
 && \
-          $MVN_CMD test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta 
-DtagsToExclude=None -DtagsToInclude=org.apache.gluten.tags.UDFTest
+          $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Phudi -Pspark-ut \
+          
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
+          
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload golden files
         if: failure()
         uses: actions/upload-artifact@v4
@@ -1098,7 +935,7 @@ jobs:
           path: /tmp/tpch-approved-plan/**
 
   run-spark-test-spark35-scala213:
-    needs: build-native-lib-centos-8
+    needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: ghcr.io/facebookincubator/velox-dev:centos8
     env:
@@ -1108,17 +945,12 @@ jobs:
       - name: Download All Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-native-lib-centos-8-${{github.sha}}
+          name: velox-native-lib-centos-7-${{github.sha}}
           path: ./cpp/build/releases
-      - name: Download UDF Example Lib
-        uses: actions/download-artifact@v3
-        with:
-          name: udf-example-lib-centos-8-${{github.sha}}
-          path: ./cpp/build/velox/udf/examples/
       - name: Download Arrow Jars
         uses: actions/download-artifact@v3
         with:
-          name: arrow-jars-centos-8-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Update mirror list
         run: |
@@ -1127,10 +959,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
-          echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
+          $SETUP install_maven
       - name: Get Ccache
         uses: actions/cache/restore@v3
         with:
@@ -1142,17 +971,7 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.5.1 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz 
spark-3.5.1-bin-hadoop3/jars/ && \
-          rm -rf spark-3.5.1-bin-hadoop3.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.13 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.13 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.5.1.tar.gz 
spark-3.5.1/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark35/spark_home/ && \
-          mv sql shims/spark35/spark_home/ && \
+          bash .github/workflows/util/install_spark_resources.sh 3.5-scala2.13
           dnf module -y install python39 && \
           alternatives --set python3 /usr/bin/python3.9 && \
           pip3 install setuptools && \
@@ -1162,11 +981,12 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.13
-          $MVN_CMD clean install -Pspark-3.5 -Pscala-2.13 -Pbackends-velox 
-Pceleborn -Piceberg -Pdelta -Pspark-ut 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" 
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
 && \
-          $MVN_CMD test -Pspark-3.5 -Pscala-2.13 -Pbackends-velox -Piceberg 
-Pdelta -DtagsToExclude=None -DtagsToInclude=org.apache.gluten.tags.UDFTest
+          $MVN_CMD clean test -Pspark-3.5 -Pscala-2.13 -Pbackends-velox 
-Pceleborn -Piceberg \
+          -Pdelta -Pspark-ut 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
+          
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
 
   run-spark-test-spark35-slow:
-    needs: build-native-lib-centos-8
+    needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: ghcr.io/facebookincubator/velox-dev:centos8
     env:
@@ -1176,12 +996,12 @@ jobs:
       - name: Download All Artifacts
         uses: actions/download-artifact@v3
         with:
-          name: velox-native-lib-centos-8-${{github.sha}}
+          name: velox-native-lib-centos-7-${{github.sha}}
           path: ./cpp/build/releases
       - name: Download Arrow Jars
         uses: actions/download-artifact@v3
         with:
-          name: arrow-jars-centos-8-${{github.sha}}
+          name: arrow-jars-centos-7-${{github.sha}}
           path: /root/.m2/repository/org/apache/arrow/
       - name: Update mirror list
         run: |
@@ -1190,10 +1010,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-          tar -xvf apache-maven-3.8.8-bin.tar.gz
-          mv apache-maven-3.8.8 /usr/lib/maven
-          echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
+          $SETUP install_maven
       - name: Get Ccache
         uses: actions/cache/restore@v3
         with:
@@ -1205,18 +1022,55 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.5.1 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz 
spark-3.5.1-bin-hadoop3/jars/ && \
-          rm -rf spark-3.5.1-bin-hadoop3.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.5.1.tar.gz 
spark-3.5.1/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark35/spark_home/ && \
-          mv sql shims/spark35/spark_home/
+          bash .github/workflows/util/install_spark_resources.sh 3.5
       - name: Build and Run unit test for Spark 3.5.1 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean install -Pspark-3.5 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" 
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
+          $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Phudi -Pspark-ut \
+          
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
+          -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
+
+  run-cpp-test-udf-test:
+    runs-on: ubuntu-20.04
+    container: ghcr.io/facebookincubator/velox-dev:centos8
+    steps:
+      - uses: actions/checkout@v2
+      - name: Generate cache key
+        run: |
+          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./.github/workflows/*') }} > cache-key
+      - name: Cache
+        id: cache
+        uses: actions/cache/restore@v3
+        with:
+          path: |
+            ./cpp/build/releases/
+            ./cpp/build/velox/udf/examples/
+            ./cpp/build/velox/benchmarks/
+            /root/.m2/repository/org/apache/arrow/
+          key: cache-velox-build-centos-8-${{ hashFiles('./cache-key') }}
+      - name: Setup java and maven
+        run: |
+          sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* 
|| true
+          sed -i -e 
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" 
/etc/yum.repos.d/CentOS-* || true
+          yum install sudo patch java-1.8.0-openjdk-devel wget -y
+          $SETUP install_maven
+      - name: Build Gluten native libraries
+        if: steps.cache.outputs.cache-hit != 'true'
+        run: |
+          df -a
+          bash dev/ci-velox-buildshared-centos-8.sh
+      - name: Run CPP unit test
+        run: |
+          cd ./cpp/build && ctest -V
+      - name: Run CPP benchmark test
+        run: |
+          $MVN_CMD test -Pspark-3.5 -Pbackends-velox -pl backends-velox -am \
+          -DtagsToInclude="org.apache.gluten.tags.GenerateExample" -Dtest=none 
-DfailIfNoTests=false -Dexec.skip
+          # This test depends on example.json generated by the above mvn test.
+          cd cpp/build/velox/benchmarks && sudo chmod +x ./generic_benchmark
+          ./generic_benchmark --run-example --with-shuffle --threads 1 
--iterations 1
+      - name: Run UDF test
+        run: |
+          # Depends on --build_example=ON.
+          $MVN_CMD test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta 
-DtagsToExclude=None \
+          -DtagsToInclude=org.apache.gluten.tags.UDFTest
diff --git a/dev/ci-velox-buildshared-centos-8.sh 
b/dev/ci-velox-buildshared-centos-8.sh
new file mode 100755
index 0000000000..b6b0cda02d
--- /dev/null
+++ b/dev/ci-velox-buildshared-centos-8.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+set -e
+
+source /opt/rh/gcc-toolset-9/enable
+./dev/builddeps-veloxbe.sh --run_setup_script=OFF --enable_ep_cache=OFF 
--build_tests=ON \
+    --build_examples=ON --build_benchmarks=ON --build_protobuf=ON
diff --git a/dev/ci-velox-buildstatic-centos-7.sh 
b/dev/ci-velox-buildstatic-centos-7.sh
new file mode 100755
index 0000000000..3272de95d9
--- /dev/null
+++ b/dev/ci-velox-buildstatic-centos-7.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -e
+
+source /opt/rh/devtoolset-9/enable
+export NUM_THREADS=4
+./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=OFF 
--build_tests=OFF --build_benchmarks=OFF \
+                           --build_examples=OFF --enable_s3=ON --enable_gcs=ON 
--enable_hdfs=ON --enable_abfs=ON
diff --git a/dev/vcpkg/ports/simdjson/vcpkg.json 
b/dev/vcpkg/ports/simdjson/vcpkg.json
index 6e46382e42..2b74be554c 100644
--- a/dev/vcpkg/ports/simdjson/vcpkg.json
+++ b/dev/vcpkg/ports/simdjson/vcpkg.json
@@ -17,8 +17,7 @@
   "default-features": [
     "deprecated",
     "exceptions",
-    "threads",
-    "utf8-validation"
+    "threads"
   ],
   "features": {
     "deprecated": {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to