This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new fbd066a8e [GLUTEN-6893][VL] Fix wrong github workflows path for 
hashing and minor code refactor (#6952)
fbd066a8e is described below

commit fbd066a8e665d1dd93b70e4cb7babeffce2b7e49
Author: PHILO-HE <[email protected]>
AuthorDate: Wed Aug 21 22:27:30 2024 +0800

    [GLUTEN-6893][VL] Fix wrong github workflows path for hashing and minor 
code refactor (#6952)
---
 .github/workflows/util/install_spark_resources.sh  |  90 +++++++
 .../{velox_docker.yml => velox_backend.yml}        | 272 ++++++---------------
 ...ox_docker_cache.yml => velox_backend_cache.yml} |   4 +-
 3 files changed, 167 insertions(+), 199 deletions(-)

diff --git a/.github/workflows/util/install_spark_resources.sh 
b/.github/workflows/util/install_spark_resources.sh
new file mode 100755
index 000000000..242952d60
--- /dev/null
+++ b/.github/workflows/util/install_spark_resources.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Download Spark resources, required by some Spark UTs. The resource path 
should be set
+# for spark.test.home in mvn test.
+INSTALL_DIR=$GITHUB_WORKSPACE
+case "$1" in
+3.2)
+    # Spark-3.2
+    cd ${INSTALL_DIR} && \
+    wget -nv 
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz 
&& \
+    tar --strip-components=1 -xf spark-3.2.2-bin-hadoop3.2.tgz 
spark-3.2.2-bin-hadoop3.2/jars/ && \
+    rm -rf spark-3.2.2-bin-hadoop3.2.tgz && \
+    mkdir -p 
${INSTALL_DIR}/shims/spark32/spark_home/assembly/target/scala-2.12 && \
+    mv jars ${INSTALL_DIR}/shims/spark32/spark_home/assembly/target/scala-2.12 
&& \
+    wget -nv https://github.com/apache/spark/archive/refs/tags/v3.2.2.tar.gz 
&& \
+    tar --strip-components=1 -xf v3.2.2.tar.gz 
spark-3.2.2/sql/core/src/test/resources/  && \
+    mkdir -p shims/spark32/spark_home/ && \
+    mv sql shims/spark32/spark_home/
+    ;;
+3.3)
+    # Spark-3.3
+    cd ${INSTALL_DIR} && \
+    wget -nv 
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz 
&& \
+    tar --strip-components=1 -xf spark-3.3.1-bin-hadoop3.tgz 
spark-3.3.1-bin-hadoop3/jars/ && \
+    rm -rf spark-3.3.1-bin-hadoop3.tgz && \
+    mkdir -p 
${INSTALL_DIR}/shims/spark33/spark_home/assembly/target/scala-2.12 && \
+    mv jars ${INSTALL_DIR}/shims/spark33/spark_home/assembly/target/scala-2.12 
&& \
+    wget -nv https://github.com/apache/spark/archive/refs/tags/v3.3.1.tar.gz 
&& \
+    tar --strip-components=1 -xf v3.3.1.tar.gz 
spark-3.3.1/sql/core/src/test/resources/  && \
+    mkdir -p shims/spark33/spark_home/ && \
+    mv sql shims/spark33/spark_home/
+    ;;
+3.4)
+    # Spark-3.4
+    cd ${INSTALL_DIR} && \
+    wget -nv 
https://archive.apache.org/dist/spark/spark-3.4.2/spark-3.4.2-bin-hadoop3.tgz 
&& \
+    tar --strip-components=1 -xf spark-3.4.2-bin-hadoop3.tgz 
spark-3.4.2-bin-hadoop3/jars/ && \
+    rm -rf spark-3.4.2-bin-hadoop3.tgz && \
+    mkdir -p 
${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 && \
+    mv jars ${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 
&& \
+    wget -nv https://github.com/apache/spark/archive/refs/tags/v3.4.2.tar.gz 
&& \
+    tar --strip-components=1 -xf v3.4.2.tar.gz 
spark-3.4.2/sql/core/src/test/resources/  && \
+    mkdir -p shims/spark34/spark_home/ && \
+    mv sql shims/spark34/spark_home/
+    ;;
+3.5)
+    # Spark-3.5
+    cd ${INSTALL_DIR} && \
+    wget -nv 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz 
&& \
+    tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz 
spark-3.5.1-bin-hadoop3/jars/ && \
+    rm -rf spark-3.5.1-bin-hadoop3.tgz && \
+    mkdir -p 
${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.12 && \
+    mv jars ${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.12 
&& \
+    wget -nv https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz 
&& \
+    tar --strip-components=1 -xf v3.5.1.tar.gz 
spark-3.5.1/sql/core/src/test/resources/  && \
+    mkdir -p shims/spark35/spark_home/ && \
+    mv sql shims/spark35/spark_home/
+    ;;
+3.5-scala2.13)
+    # Spark-3.5, scala 2.13
+    cd ${INSTALL_DIR} && \
+    wget -nv 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz 
&& \
+    tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz 
spark-3.5.1-bin-hadoop3/jars/ && \
+    rm -rf spark-3.5.1-bin-hadoop3.tgz && \
+    mkdir -p 
${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.13 && \
+    mv jars ${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.13 
&& \
+    wget -nv https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz 
&& \
+    tar --strip-components=1 -xf v3.5.1.tar.gz 
spark-3.5.1/sql/core/src/test/resources/  && \
+    mkdir -p shims/spark35/spark_home/ && \
+    mv sql shims/spark35/spark_home/
+    ;;
+*)
+    echo "Spark version is expected to be specified."
+    exit 1
+    ;;
+esac
diff --git a/.github/workflows/velox_docker.yml 
b/.github/workflows/velox_backend.yml
similarity index 81%
rename from .github/workflows/velox_docker.yml
rename to .github/workflows/velox_backend.yml
index d16981db5..34ceb6d46 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_backend.yml
@@ -18,7 +18,7 @@ name: Velox backend Github Runner
 on:
   pull_request:
     paths:
-      - '.github/workflows/velox_docker.yml'
+      - '.github/workflows/velox_backend.yml'
       - 'pom.xml'
       - 'backends-velox/**'
       - 'gluten-uniffle/**'
@@ -43,6 +43,7 @@ on:
 env:
   ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
   MVN_CMD: 'mvn -ntp'
+  WGET_CMD: 'wget -nv'
 
 concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ 
github.workflow }}
@@ -56,7 +57,7 @@ jobs:
       - uses: actions/checkout@v2
       - name: Generate cache key
         run: |
-          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./github/workflows/*') }} > cache-key
+          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./.github/workflows/*') }} > cache-key
       - name: Cache
         id: cache
         uses: actions/cache/restore@v3
@@ -205,7 +206,7 @@ jobs:
           else
             yum update -y && yum install -y java-1.8.0-openjdk-devel wget
           fi
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+          ${WGET_CMD} 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
       - name: Set environment variables
@@ -408,58 +409,6 @@ jobs:
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
-s=30.0  --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 
\
             --data-gen=skip  --random-kill-tasks --no-session-reuse
 
-  # run-tpc-test-ubuntu-sf30:
-  #   needs: build-native-lib-centos-7
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       spark: [ "spark-3.4" ]
-  #       shard: [ "1/4", "2/4", "3/4", "4/4" ]
-  #   runs-on: ubuntu-20.04
-  #   steps:
-  #     - name: Maximize build disk space
-  #       shell: bash
-  #       run: |
-  #         df -h
-  #         set -euo pipefail
-  #         echo "Removing unwanted software... "
-  #         sudo rm -rf /usr/share/dotnet
-  #         sudo rm -rf /usr/local/lib/android
-  #         sudo rm -rf /opt/ghc
-  #         sudo rm -rf /opt/hostedtoolcache/CodeQL
-  #         sudo docker image prune --all --force > /dev/null
-  #         df -h
-  #     - uses: actions/checkout@v2
-  #     - name: Download All Artifacts
-  #       uses: actions/download-artifact@v2
-  #       with:
-  #         name: velox-native-lib-centos-7-${{github.sha}}
-  #         path: ./cpp/build/releases
-  #     - name: Setup java and maven
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install -y openjdk-8-jdk maven
-  #     - name: Set environment variables
-  #       run: |
-  #         echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV
-  #     - name: Build for Spark ${{ matrix.spark }}
-  #       run: |
-  #         cd $GITHUB_WORKSPACE/ 
-  #         $MVN_CMD clean install -P${{ matrix.spark }} -Pbackends-velox 
-DskipTests
-  #         cd $GITHUB_WORKSPACE/tools/gluten-it
-  #         $MVN_CMD clean install -P${{ matrix.spark }}
-  #         GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local 
--benchmark-type=h -s=30.0 --threads=12
-  #         GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local 
--benchmark-type=ds -s=30.0 --threads=12
-  #     - name: TPC-H / TPC-DS SF30.0 Parquet local ${{ matrix.spark }}
-  #       run: |
-  #         cd tools/gluten-it \
-  #         && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
-  #           --local --preset=velox --benchmark-type=h --error-on-memleak 
-s=30.0  --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 
\
-  #           --data-gen=skip --shard=${{ matrix.shard }} \
-  #         && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
-  #           --local --preset=velox --benchmark-type=ds --error-on-memleak 
-s=30.0  --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 
\
-  #           --data-gen=skip --shard=${{ matrix.shard }}
-
   run-tpc-test-centos8-uniffle:
     needs: build-native-lib-centos-7
     strategy:
@@ -487,7 +436,7 @@ jobs:
       - name: Setup java and maven
         run: |
           yum update -y && yum install -y java-1.8.0-openjdk-devel wget git
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+          ${WGET_CMD} 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
       - name: Build for Uniffle 0.9.0
@@ -500,9 +449,9 @@ jobs:
           cd incubator-uniffle && \
           $MVN_CMD clean install -Phadoop2.8,spark3 -DskipTests
           cd /opt && \
-          wget -nv 
https://archive.apache.org/dist/incubator/uniffle/0.9.0/apache-uniffle-0.9.0-incubating-bin.tar.gz
 && \
+          ${WGET_CMD} 
https://archive.apache.org/dist/incubator/uniffle/0.9.0/apache-uniffle-0.9.0-incubating-bin.tar.gz
 && \
           tar xzf apache-uniffle-0.9.0-incubating-bin.tar.gz -C /opt/ && mv 
/opt/rss-0.9.0-hadoop2.8 /opt/uniffle && \
-          wget -nv 
https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz 
&& \
+          ${WGET_CMD} 
https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz 
&& \
           tar xzf hadoop-2.8.5.tar.gz -C /opt/
           rm -rf /opt/incubator-uniffle
           cd /opt/uniffle && mkdir shuffle_data && \
@@ -570,7 +519,7 @@ jobs:
           fi
           echo "EXTRA_PROFILE: ${EXTRA_PROFILE}"
           cd /opt && mkdir -p celeborn && \
-          wget https://archive.apache.org/dist/celeborn/${{ matrix.celeborn 
}}/apache-${{ matrix.celeborn }}-bin.tgz && \
+          ${WGET_CMD} https://archive.apache.org/dist/celeborn/${{ 
matrix.celeborn }}/apache-${{ matrix.celeborn }}-bin.tgz && \
           tar xzf apache-${{ matrix.celeborn }}-bin.tgz -C /opt/celeborn 
--strip-components=1 && cd celeborn && \
           mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \
           bash -c "echo -e 
'CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g'
 > ./conf/celeborn-env.sh" && \
@@ -582,45 +531,6 @@ jobs:
           GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
             --local --preset=velox-with-celeborn --benchmark-type=ds 
--error-on-memleak --off-heap-size=10g -s=1.0 --threads=8 --iterations=1
 
-  run-cpp-test-udf-test:
-    runs-on: ubuntu-20.04
-    container: ghcr.io/facebookincubator/velox-dev:centos8
-    steps:
-      - uses: actions/checkout@v2
-      - name: Generate cache key
-        run: |
-          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./github/workflows/*') }} > cache-key
-      - name: Cache
-        id: cache
-        uses: actions/cache/restore@v3
-        with:
-          path: |
-            ./cpp/build/releases/
-            ./cpp/build/velox/udf/examples/
-            ./cpp/build/velox/benchmarks/
-            /root/.m2/repository/org/apache/arrow/
-          key: cache-velox-build-centos-8-${{ hashFiles('./cache-key') }}
-      - name: Build Gluten native libraries
-        if: steps.cache.outputs.cache-hit != 'true'
-        run: |
-          df -a
-          bash dev/ci-velox-buildshared-centos-8.sh
-      - name: Run CPP unit test
-        run: |
-          cd ./cpp/build && ctest -V
-      - name: Run CPP benchmark test
-        run: |
-          $MVN_CMD test -Pspark-3.5 -Pbackends-velox -pl backends-velox -am \
-          -DtagsToInclude="org.apache.gluten.tags.GenerateExample" -Dtest=none 
-DfailIfNoTests=false -Dexec.skip
-          # This test depends on example.json generated by the above mvn test.
-          cd cpp/build/velox/benchmarks && sudo chmod +x ./generic_benchmark
-          ./generic_benchmark --run-example --with-shuffle --threads 1 
--iterations 1
-      - name: Run UDF test
-        run: |
-          # Depends on --build_example=ON.
-          $MVN_CMD test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta 
-DtagsToExclude=None \
-          -DtagsToInclude=org.apache.gluten.tags.UDFTest
-
   run-spark-test-spark32:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
@@ -644,7 +554,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+          ${WGET_CMD} 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
           echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
@@ -659,17 +569,7 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.2.2 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.2.2-bin-hadoop3.2.tgz 
spark-3.2.2-bin-hadoop3.2/jars/ && \
-          rm -rf spark-3.2.2-bin-hadoop3.2.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark32/spark_home/assembly/target/scala-2.12 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark32/spark_home/assembly/target/scala-2.12 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.2.2.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.2.2.tar.gz 
spark-3.2.2/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark32/spark_home/ && \
-          mv sql shims/spark32/spark_home/ && \
+          bash .github/workflows/util/install_spark_resources.sh 3.2
           dnf module -y install python39 && \
           alternatives --set python3 /usr/bin/python3.9 && \
           pip3 install setuptools && \
@@ -679,7 +579,7 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox 
-Pceleborn -Piceberg \
+          $MVN_CMD clean test -Pspark-3.2 -Pspark-ut -Pbackends-velox 
-Pceleborn -Piceberg \
           -Pdelta 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" \
           
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload golden files
@@ -714,7 +614,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+          ${WGET_CMD} 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
           echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
@@ -729,15 +629,12 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.2.2 (slow tests)
         run: |
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.2.2.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.2.2.tar.gz 
spark-3.2.2/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark32/spark_home/ && \
-          mv sql shims/spark32/spark_home/
+          bash .github/workflows/util/install_spark_resources.sh 3.2
       - name: Build and run unit test for Spark 3.2.2 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox 
-Pceleborn -Piceberg -Pdelta 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" 
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
+          $MVN_CMD clean test -Pspark-3.2 -Pspark-ut -Pbackends-velox 
-Pceleborn -Piceberg -Pdelta \
+          
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" 
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
 
   run-spark-test-spark33:
     needs: build-native-lib-centos-7
@@ -764,7 +661,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+          ${WGET_CMD} 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
           echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
@@ -779,17 +676,7 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.3.1 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.3.1-bin-hadoop3.tgz 
spark-3.3.1-bin-hadoop3/jars/ && \
-          rm -rf spark-3.3.1-bin-hadoop3.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark33/spark_home/assembly/target/scala-2.12 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark33/spark_home/assembly/target/scala-2.12 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.3.1.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.3.1.tar.gz 
spark-3.3.1/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark33/spark_home/ && \
-          mv sql shims/spark33/spark_home/ && \
+          bash .github/workflows/util/install_spark_resources.sh 3.3
           dnf module -y install python39 && \
           alternatives --set python3 /usr/bin/python3.9 && \
           pip3 install setuptools && \
@@ -799,7 +686,7 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean install -Pspark-3.3 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
+          $MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
           
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" \
           
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload golden files
@@ -835,7 +722,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+          ${WGET_CMD} 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
           echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
@@ -850,15 +737,11 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.3.1 (slow tests)
         run: |
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.3.1.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.3.1.tar.gz 
spark-3.3.1/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark33/spark_home/ && \
-          mv sql shims/spark33/spark_home/
+          bash .github/workflows/util/install_spark_resources.sh 3.3
       - name: Build and Run unit test for Spark 3.3.1 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean install -Pspark-3.3 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
+          $MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
           
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" \
           -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
 
@@ -887,7 +770,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+          ${WGET_CMD} 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
           echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
@@ -902,17 +785,7 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.4.2 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.4.2/spark-3.4.2-bin-hadoop3.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.4.2-bin-hadoop3.tgz 
spark-3.4.2-bin-hadoop3/jars/ && \
-          rm -rf spark-3.4.2-bin-hadoop3.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark34/spark_home/assembly/target/scala-2.12 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark34/spark_home/assembly/target/scala-2.12 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.4.2.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.4.2.tar.gz 
spark-3.4.2/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark34/spark_home/ && \
-          mv sql shims/spark34/spark_home/ && \
+          bash .github/workflows/util/install_spark_resources.sh 3.4
           dnf module -y install python39 && \
           alternatives --set python3 /usr/bin/python3.9 && \
           pip3 install setuptools && \
@@ -922,7 +795,7 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean install -Pspark-3.4 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
+          $MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
           
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" \
           
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload golden files
@@ -958,7 +831,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+          ${WGET_CMD} 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
           echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
@@ -973,15 +846,11 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.4.2 (slow tests)
         run: |
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.4.2.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.4.2.tar.gz 
spark-3.4.2/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark34/spark_home/ && \
-          mv sql shims/spark34/spark_home/
+          bash .github/workflows/util/install_spark_resources.sh 3.4
       - name: Build and Run unit test for Spark 3.4.2 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean install -Pspark-3.4 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
+          $MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
           
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" \
           -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
 
@@ -1010,7 +879,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+          ${WGET_CMD} 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
           echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
@@ -1025,17 +894,7 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.5.1 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz 
spark-3.5.1-bin-hadoop3/jars/ && \
-          rm -rf spark-3.5.1-bin-hadoop3.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.5.1.tar.gz 
spark-3.5.1/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark35/spark_home/ && \
-          mv sql shims/spark35/spark_home/ && \
+          bash .github/workflows/util/install_spark_resources.sh 3.5
           dnf module -y install python39 && \
           alternatives --set python3 /usr/bin/python3.9 && \
           pip3 install setuptools && \
@@ -1045,7 +904,7 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean install -Pspark-3.5 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
+          $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
           
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
           
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload golden files
@@ -1080,7 +939,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+          ${WGET_CMD} 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
           echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
@@ -1095,17 +954,7 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.5.1 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz 
spark-3.5.1-bin-hadoop3/jars/ && \
-          rm -rf spark-3.5.1-bin-hadoop3.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.13 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.13 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.5.1.tar.gz 
spark-3.5.1/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark35/spark_home/ && \
-          mv sql shims/spark35/spark_home/ && \
+          bash .github/workflows/util/install_spark_resources.sh 3.5-scala2.13
           dnf module -y install python39 && \
           alternatives --set python3 /usr/bin/python3.9 && \
           pip3 install setuptools && \
@@ -1115,7 +964,7 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.13
-          $MVN_CMD clean install -Pspark-3.5 -Pscala-2.13 -Pbackends-velox 
-Pceleborn -Piceberg \
+          $MVN_CMD clean test -Pspark-3.5 -Pscala-2.13 -Pbackends-velox 
-Pceleborn -Piceberg \
           -Pdelta -Pspark-ut 
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
           
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
 
@@ -1144,7 +993,7 @@ jobs:
       - name: Setup build dependency
         run: |
           yum install sudo patch java-1.8.0-openjdk-devel wget -y
-          wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+          ${WGET_CMD} 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
           tar -xvf apache-maven-3.8.8-bin.tar.gz
           mv apache-maven-3.8.8 /usr/lib/maven
           echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV
@@ -1159,20 +1008,49 @@ jobs:
           mkdir -p '${{ env.CCACHE_DIR }}'
       - name: Prepare spark.test.home for Spark 3.5.1 (other tests)
         run: |
-          cd $GITHUB_WORKSPACE/ && \
-          wget 
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz 
&& \
-          tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz 
spark-3.5.1-bin-hadoop3/jars/ && \
-          rm -rf spark-3.5.1-bin-hadoop3.tgz && \
-          mkdir -p 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \
-          mv jars 
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \
-          cd $GITHUB_WORKSPACE// && \
-          wget https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz 
&& \
-          tar --strip-components=1 -xf v3.5.1.tar.gz 
spark-3.5.1/sql/core/src/test/resources/  && \
-          mkdir -p shims/spark35/spark_home/ && \
-          mv sql shims/spark35/spark_home/
+          bash .github/workflows/util/install_spark_resources.sh 3.5
       - name: Build and Run unit test for Spark 3.5.1 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean install -Pspark-3.5 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
+          $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn 
-Piceberg -Pdelta -Pspark-ut \
           
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
           -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
+
+  run-cpp-test-udf-test:
+    runs-on: ubuntu-20.04
+    container: ghcr.io/facebookincubator/velox-dev:centos8
+    steps:
+      - uses: actions/checkout@v2
+      - name: Generate cache key
+        run: |
+          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./.github/workflows/*') }} > cache-key
+      - name: Cache
+        id: cache
+        uses: actions/cache/restore@v3
+        with:
+          path: |
+            ./cpp/build/releases/
+            ./cpp/build/velox/udf/examples/
+            ./cpp/build/velox/benchmarks/
+            /root/.m2/repository/org/apache/arrow/
+          key: cache-velox-build-centos-8-${{ hashFiles('./cache-key') }}
+      - name: Build Gluten native libraries
+        if: steps.cache.outputs.cache-hit != 'true'
+        run: |
+          df -a
+          bash dev/ci-velox-buildshared-centos-8.sh
+      - name: Run CPP unit test
+        run: |
+          cd ./cpp/build && ctest -V
+      - name: Run CPP benchmark test
+        run: |
+          $MVN_CMD test -Pspark-3.5 -Pbackends-velox -pl backends-velox -am \
+          -DtagsToInclude="org.apache.gluten.tags.GenerateExample" -Dtest=none 
-DfailIfNoTests=false -Dexec.skip
+          # This test depends on example.json generated by the above mvn test.
+          cd cpp/build/velox/benchmarks && sudo chmod +x ./generic_benchmark
+          ./generic_benchmark --run-example --with-shuffle --threads 1 
--iterations 1
+      - name: Run UDF test
+        run: |
+          # Depends on --build_example=ON.
+          $MVN_CMD test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta 
-DtagsToExclude=None \
+          -DtagsToInclude=org.apache.gluten.tags.UDFTest
diff --git a/.github/workflows/velox_docker_cache.yml 
b/.github/workflows/velox_backend_cache.yml
similarity index 98%
rename from .github/workflows/velox_docker_cache.yml
rename to .github/workflows/velox_backend_cache.yml
index 1aca4d6bd..0cb922f65 100644
--- a/.github/workflows/velox_docker_cache.yml
+++ b/.github/workflows/velox_backend_cache.yml
@@ -35,7 +35,7 @@ jobs:
       - uses: actions/checkout@v2
       - name: Generate cache key
         run: |
-          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./github/workflows/*') }} > cache-key
+          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./.github/workflows/*') }} > cache-key
       - name: Check existing caches
         id: check-cache
         uses: actions/cache/restore@v3
@@ -65,7 +65,7 @@ jobs:
       - uses: actions/checkout@v2
       - name: Generate cache key
         run: |
-          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./github/workflows/*') }} > cache-key
+          echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', 
'./.github/workflows/*') }} > cache-key
       - name: Check existing caches
         id: check-cache
         uses: actions/cache/restore@v3


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to