This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new a5ad4a4f1e [VL][CI] Change to use JDK-17 for Spark 3.3/3.4/3.5 tests 
(#9209)
a5ad4a4f1e is described below

commit a5ad4a4f1ea59777b58d65df0adddc3d90125704
Author: PHILO-HE <[email protected]>
AuthorDate: Mon Apr 7 17:03:16 2025 +0800

    [VL][CI] Change to use JDK-17 for Spark 3.3/3.4/3.5 tests (#9209)
---
 .github/workflows/velox_backend.yml                | 276 +++++++--------------
 docs/developers/NewToGluten.md                     |  18 +-
 .../gluten/utils/velox/VeloxTestSettings.scala     |   9 +-
 3 files changed, 105 insertions(+), 198 deletions(-)

diff --git a/.github/workflows/velox_backend.yml 
b/.github/workflows/velox_backend.yml
index d403b75e24..f208303064 100644
--- a/.github/workflows/velox_backend.yml
+++ b/.github/workflows/velox_backend.yml
@@ -114,7 +114,7 @@ jobs:
           path: .m2/repository/org/apache/arrow/
           if-no-files-found: error
 
-  run-tpc-test-ubuntu:
+  tpc-test-ubuntu:
     needs: build-native-lib-centos-7
     strategy:
       fail-fast: false
@@ -185,7 +185,7 @@ jobs:
           && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1
 
-  run-tpc-test-centos8:
+  tpc-test-centos8:
     needs: build-native-lib-centos-7
     strategy:
       fail-fast: false
@@ -277,7 +277,7 @@ jobs:
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
             --extra-conf=spark.gluten.ras.enabled=true 
 
-  run-tpc-test-centos7:
+  tpc-test-centos7:
     needs: build-native-lib-centos-7
     strategy:
       fail-fast: false
@@ -343,7 +343,7 @@ jobs:
               --extra-conf=spark.gluten.ras.enabled=true 
           "
 
-  run-tpc-test-ubuntu-oom:
+  tpc-test-ubuntu-oom:
     needs: build-native-lib-centos-7
     strategy:
       fail-fast: false
@@ -458,7 +458,7 @@ jobs:
             
-d=IO_THREADS:12,spark.gluten.sql.columnar.backend.velox.IOThreads=12 \
             -d=IO_THREADS:0,spark.gluten.sql.columnar.backend.velox.IOThreads=0
 
-  run-tpc-test-ubuntu-randomkill:
+  tpc-test-ubuntu-randomkill:
     needs: build-native-lib-centos-7
     strategy:
       fail-fast: false
@@ -510,7 +510,7 @@ jobs:
             --local --preset=velox --benchmark-type=ds --error-on-memleak 
-s=30.0  --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 
\
             --data-gen=skip  --random-kill-tasks --no-session-reuse
 
-  run-tpc-test-centos8-uniffle:
+  tpc-test-centos8-uniffle:
     needs: build-native-lib-centos-7
     strategy:
       fail-fast: false
@@ -561,7 +561,7 @@ jobs:
           GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
             --local --preset=velox-with-uniffle --benchmark-type=h 
--error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1
 
-  run-tpc-test-ubuntu-2204-celeborn:
+  tpc-test-ubuntu-2204-celeborn:
     needs: build-native-lib-centos-7
     strategy:
       fail-fast: false
@@ -615,7 +615,7 @@ jobs:
             --extra-conf=spark.celeborn.push.sortMemory.threshold=8m 
--benchmark-type=ds --error-on-memleak \
             --off-heap-size=10g -s=1.0 --threads=8 --iterations=1
 
-  run-spark-test-spark32:
+  spark-test-spark32:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -647,7 +647,7 @@ jobs:
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark32
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -659,10 +659,10 @@ jobs:
         if: failure()
         uses: actions/upload-artifact@v4
         with:
-          name: golden-files-spark32
+          name: ${{ github.job }}-golden-files
           path: /tmp/tpch-approved-plan/**
 
-  run-spark-test-spark32-slow:
+  spark-test-spark32-slow:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -687,7 +687,7 @@ jobs:
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark32-slow
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -696,7 +696,7 @@ jobs:
           name: ${{ github.job }}-test-log
           path: "**/target/*.log"
 
-  run-spark-test-spark33:
+  spark-test-spark33:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -723,14 +723,18 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Piceberg -Pdelta 
-Phudi -Pspark-ut \
+          yum install -y java-17-openjdk-devel
+          export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+          export PATH=$JAVA_HOME/bin:$PATH
+          java -version
+          $MVN_CMD clean test -Pspark-3.3 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Phudi -Pspark-ut \
           -DargLine="-Dspark.test.home=/opt/shims/spark33/spark_home/" \
           
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload test report
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark33
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -742,11 +746,10 @@ jobs:
         if: failure()
         uses: actions/upload-artifact@v4
         with:
-          name: golden-files-spark33
+          name: ${{ github.job }}-golden-files
           path: /tmp/tpch-approved-plan/**
 
-
-  run-spark-test-spark33-slow:
+  spark-test-spark33-slow:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -766,14 +769,18 @@ jobs:
       - name: Build and Run unit test for Spark 3.3.1 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Piceberg -Pdelta 
-Phudi -Pspark-ut \
+          yum install -y java-17-openjdk-devel
+          export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+          export PATH=$JAVA_HOME/bin:$PATH
+          java -version
+          $MVN_CMD clean test -Pspark-3.3 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Phudi -Pspark-ut \
           -DargLine="-Dspark.test.home=/opt/shims/spark33/spark_home/" \
           -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
       - name: Upload test report
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark33-slow
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -782,7 +789,7 @@ jobs:
           name: ${{ github.job }}-test-log
           path: "**/target/*.log"
 
-  run-spark-test-spark34:
+  spark-test-spark34:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -814,67 +821,15 @@ jobs:
           export PATH=$JAVA_HOME/bin:$PATH
           java -version
           export SPARK_HOME=/opt/shims/spark34/spark_home/
-          ls -l /opt/shims/spark34/spark_home/
+          ls -l $SPARK_HOME
           $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Phudi -Pspark-ut \
           
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
 \
-          -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ 
${EXTRA_FLAGS}" 
-      - name: Upload test report
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: test-report-spark34-jdk17
-          path: '**/surefire-reports/TEST-*.xml'
-      - name: Upload unit tests log files
-        if: ${{ !success() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ github.job }}-test-log
-          path: "**/target/*.log"
-      - name: Upload golden files
-        if: failure()
-        uses: actions/upload-artifact@v4
-        with:
-          name: golden-files-spark34
-          path: /tmp/tpch-approved-plan/**
-
-  run-spark-test-spark34-jdk8:
-    needs: build-native-lib-centos-7
-    runs-on: ubuntu-20.04
-    container: apache/gluten:centos-8-jdk8
-    steps:
-      - uses: actions/checkout@v2
-      - name: Download All Artifacts
-        uses: actions/download-artifact@v4
-        with:
-          name: velox-native-lib-centos-7-${{github.sha}}
-          path: ./cpp/build/releases
-      - name: Download Arrow Jars
-        uses: actions/download-artifact@v4
-        with:
-          name: arrow-jars-centos-7-${{github.sha}}
-          path: /root/.m2/repository/org/apache/arrow/
-      - name: Prepare spark.test.home for Spark 3.4.4 (other tests)
-        run: |
-          dnf module -y install python39 && \
-          alternatives --set python3 /usr/bin/python3.9 && \
-          pip3 install setuptools==77.0.3 && \
-          pip3 install pyspark==3.4.4 cython && \
-          pip3 install pandas pyarrow
-      - name: Build and Run unit test for Spark 3.4.4 (other tests)
-        run: |
-          cd $GITHUB_WORKSPACE/
-          export SPARK_SCALA_VERSION=2.12
-          export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
-          export SPARK_HOME=/opt/shims/spark34/spark_home/
-          ls -l /opt/shims/spark34/spark_home/
-          $MVN_CMD clean test -Pspark-3.4 -Pjava-8 -Pbackends-velox -Pdelta 
-Phudi -Pspark-ut \
-          
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
 \
-          -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" 
+          -DargLine="-Dspark.test.home=$SPARK_HOME ${EXTRA_FLAGS}"
       - name: Upload test report
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark34
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -886,10 +841,10 @@ jobs:
         if: failure()
         uses: actions/upload-artifact@v4
         with:
-          name: golden-files-spark34-jdk8
+          name: ${{ github.job }}-golden-files
           path: /tmp/tpch-approved-plan/**
 
-  run-spark-test-spark34-slow:
+  spark-test-spark34-slow:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -908,101 +863,20 @@ jobs:
       - name: Build and Run unit test for Spark 3.4.4 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          export SPARK_HOME=/opt/shims/spark34/spark_home/
           yum install -y java-17-openjdk-devel
           export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
           export PATH=$JAVA_HOME/bin:$PATH
           java -version
-          ls -l /opt/shims/spark34/spark_home/
-          $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Pspark-ut -Phudi \
-          -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \
-          -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/ 
${EXTRA_FLAGS}" 
-      - name: Upload test report
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: test-report-spark34-slow-jdk17
-          path: '**/surefire-reports/TEST-*.xml'
-      - name: Upload unit tests log files
-        if: ${{ !success() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ github.job }}-test-log
-          path: "**/target/*.log"
-
-  run-spark-test-spark34-slow-jdk8:
-    needs: build-native-lib-centos-7
-    runs-on: ubuntu-20.04
-    container: apache/gluten:centos-8-jdk8
-    steps:
-      - uses: actions/checkout@v2
-      - name: Download All Artifacts
-        uses: actions/download-artifact@v4
-        with:
-          name: velox-native-lib-centos-7-${{github.sha}}
-          path: ./cpp/build/releases
-      - name: Download Arrow Jars
-        uses: actions/download-artifact@v4
-        with:
-          name: arrow-jars-centos-7-${{github.sha}}
-          path: /root/.m2/repository/org/apache/arrow/
-      - name: Build and Run unit test for Spark 3.4.4 (slow tests)
-        run: |
-          cd $GITHUB_WORKSPACE/
-          export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
           export SPARK_HOME=/opt/shims/spark34/spark_home/
-          ls -l /opt/shims/spark34/spark_home/
-          $MVN_CMD clean test -Pspark-3.4 -Pjava-8 -Pbackends-velox -Pdelta 
-Pspark-ut -Phudi \
+          ls -l $SPARK_HOME
+          $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Pspark-ut -Phudi \
           -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \
-          -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" 
-      - name: Upload test report
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: test-report-spark34-slow-jdk8
-          path: '**/surefire-reports/TEST-*.xml'
-      - name: Upload unit tests log files
-        if: ${{ !success() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ github.job }}-test-log
-          path: "**/target/*.log"
-
-  run-spark-test-spark35:
-    needs: build-native-lib-centos-7
-    runs-on: ubuntu-20.04
-    container: apache/gluten:centos-8-jdk8
-    steps:
-      - uses: actions/checkout@v2
-      - name: Download All Artifacts
-        uses: actions/download-artifact@v4
-        with:
-          name: velox-native-lib-centos-7-${{github.sha}}
-          path: ./cpp/build/releases
-      - name: Download Arrow Jars
-        uses: actions/download-artifact@v4
-        with:
-          name: arrow-jars-centos-7-${{github.sha}}
-          path: /root/.m2/repository/org/apache/arrow/
-      - name: Prepare
-        run: |
-          dnf module -y install python39 && \
-          alternatives --set python3 /usr/bin/python3.9 && \
-          pip3 install setuptools==77.0.3 && \
-          pip3 install pyspark==3.5.2 cython && \
-          pip3 install pandas pyarrow
-      - name: Build and Run unit test for Spark 3.5.2 (other tests)
-        run: |
-          cd $GITHUB_WORKSPACE/
-          export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta 
-Phudi -Pspark-ut \
-          -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" \
-          
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
+          -DargLine="-Dspark.test.home=$SPARK_HOME ${EXTRA_FLAGS}"
       - name: Upload test report
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark35
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -1010,14 +884,8 @@ jobs:
         with:
           name: ${{ github.job }}-test-log
           path: "**/target/*.log"
-      - name: Upload golden files
-        if: failure()
-        uses: actions/upload-artifact@v4
-        with:
-          name: golden-files-spark35
-          path: /tmp/tpch-approved-plan/**
 
-  run-spark-test-spark35-jdk17:
+  spark-test-spark35:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -1055,7 +923,7 @@ jobs:
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark35-jdk17
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -1067,10 +935,10 @@ jobs:
         if: failure()
         uses: actions/upload-artifact@v4
         with:
-          name: golden-files-spark35
+          name: ${{ github.job }}-golden-files
           path: /tmp/tpch-approved-plan/**
 
-  run-spark-test-spark35-scala213:
+  spark-test-spark35-scala213:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -1097,14 +965,18 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.13
-          $MVN_CMD clean test -Pspark-3.5 -Pscala-2.13 -Pbackends-velox 
-Piceberg \
+          yum install -y java-17-openjdk-devel
+          export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+          export PATH=$JAVA_HOME/bin:$PATH
+          java -version
+          $MVN_CMD clean test -Pspark-3.5 -Pscala-2.13 -Pjava-17 
-Pbackends-velox -Piceberg \
           -Pdelta -Pspark-ut 
-DargLine="-Dspark.test.home=/opt/shims/spark35-scala-2.13/spark_home/" \
           
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload test report
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark35-scala213
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -1113,7 +985,7 @@ jobs:
           name: ${{ github.job }}-test-log
           path: "**/target/*.log"
 
-  run-spark-test-spark35-slow:
+  spark-test-spark35-slow:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -1132,14 +1004,18 @@ jobs:
       - name: Build and Run unit test for Spark 3.5.2 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta 
-Phudi -Pspark-ut \
+          yum install -y java-17-openjdk-devel
+          export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+          export PATH=$JAVA_HOME/bin:$PATH
+          java -version
+          $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Phudi -Pspark-ut \
           -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" \
           -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
       - name: Upload test report
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark35-slow
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -1148,7 +1024,7 @@ jobs:
           name: ${{ github.job }}-test-log
           path: "**/target/*.log"
 
-  run-spark-test-spark35-ras:
+  spark-test-spark35-ras:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -1175,13 +1051,17 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta 
-Pspark-ut \
+          yum install -y java-17-openjdk-devel
+          export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+          export PATH=$JAVA_HOME/bin:$PATH
+          java -version
+          $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Pspark-ut \
           -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ 
-Dspark.gluten.ras.enabled=true" \
           
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload test report
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark35-ras
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -1190,7 +1070,7 @@ jobs:
           name: ${{ github.job }}-test-log
           path: "**/target/*.log"
 
-  run-spark-test-spark35-slow-ras:
+  spark-test-spark35-slow-ras:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -1209,13 +1089,17 @@ jobs:
       - name: Build and Run unit test for Spark 3.5.2 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta 
-Pspark-ut \
+          yum install -y java-17-openjdk-devel
+          export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+          export PATH=$JAVA_HOME/bin:$PATH
+          java -version
+          $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Pspark-ut \
           -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ 
-Dspark.gluten.ras.enabled=true" \
           -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
       - name: Upload test report
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark35-slow-ras
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -1224,7 +1108,7 @@ jobs:
           name: ${{ github.job }}-test-log
           path: "**/target/*.log"
 
-  run-spark-test-spark35-smj:
+  spark-test-spark35-smj:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -1251,13 +1135,17 @@ jobs:
         run: |
           cd $GITHUB_WORKSPACE/
           export SPARK_SCALA_VERSION=2.12
-          $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta 
-Pspark-ut \
+          yum install -y java-17-openjdk-devel
+          export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+          export PATH=$JAVA_HOME/bin:$PATH
+          java -version
+          $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Pspark-ut \
           -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ 
-Dspark.gluten.sql.columnar.forceShuffledHashJoin=false" \
           
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
       - name: Upload test report
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark35-smj
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -1266,7 +1154,7 @@ jobs:
           name: ${{ github.job }}-test-log
           path: "**/target/*.log"
 
-  run-spark-test-spark35-slow-smj:
+  spark-test-spark35-slow-smj:
     needs: build-native-lib-centos-7
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
@@ -1285,13 +1173,17 @@ jobs:
       - name: Build and Run unit test for Spark 3.5.2 (slow tests)
         run: |
           cd $GITHUB_WORKSPACE/
-          $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta 
-Pspark-ut \
+          yum install -y java-17-openjdk-devel
+          export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+          export PATH=$JAVA_HOME/bin:$PATH
+          java -version
+          $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Pspark-ut \
           -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ 
-Dspark.gluten.sql.columnar.forceShuffledHashJoin=false" \
           -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
       - name: Upload test report
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark35-slow-smj
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
@@ -1300,7 +1192,7 @@ jobs:
           name: ${{ github.job }}-test-log
           path: "**/target/*.log"
 
-  run-cpp-test-udf-test:
+  cpp-test-udf-test:
     runs-on: ubuntu-20.04
     container: apache/gluten:centos-8-jdk8
     steps:
@@ -1343,7 +1235,7 @@ jobs:
       - name: Upload test report
         uses: actions/upload-artifact@v4
         with:
-          name: test-report-spark35-udf
+          name: ${{ github.job }}-report
           path: '**/surefire-reports/TEST-*.xml'
       - name: Upload unit tests log files
         if: ${{ !success() }}
diff --git a/docs/developers/NewToGluten.md b/docs/developers/NewToGluten.md
index 26418a45e5..da4e1fbdb8 100644
--- a/docs/developers/NewToGluten.md
+++ b/docs/developers/NewToGluten.md
@@ -10,15 +10,23 @@ Help users to debug and test with gluten.
 
 Now gluten supports Ubuntu20.04, Ubuntu22.04, centos8, centos7 and macOS.
 
-## OpenJDK 8
+## JDK
 
-### Environment Setting
+Currently, Gluten supports JDK 8 for Spark 3.2/3.3/3.4/3.5. For Spark 3.3 and 
higher versions, Gluten
+supports JDK 11 and 17. Please note since Spark 4.0, JDK 8 will not be 
supported. So we recommend Velox
+backend users to use higher JDK version now to ease the migration for 
deploying Gluten with Spark-4.0
+in the future. And we may probably upgrade Arrow from 15.0.0 to some higher 
version, which also requires
+JDK 11 is the minimum version.
+
+### JDK 8
+
+#### Environment Setting
 
 For root user, the environment variables file is `/etc/profile`, it will take 
effect for all the users.
 
 For other user, you can set in `~/.bashrc`.
 
-### Guide for Ubuntu
+#### Guide for Ubuntu
 
 The default JDK version in ubuntu is java11, we need to set to java8.
 
@@ -41,9 +49,9 @@ export PATH="$PATH:$JAVA_HOME/bin"
 
 > Must set PATH with double quote in ubuntu.
 
-## OpenJDK 17
+### JDK 11/17
 
-By default, Gluten compiles package using JDK8. Enable maven profile by 
`-Pjava-17` to use JDK17 or `-Pjava-11` to use JDK 11, and please make sure 
your JAVA_HOME points to jdk17 or jdk11 respectively.
+By default, Gluten compiles package using JDK8. Enable maven profile by 
`-Pjava-17` to use JDK17 or `-Pjava-11` to use JDK 11, and please make sure 
your JAVA_HOME is set correctly.
 
 Apache Spark and Arrow requires setting java args 
`-Dio.netty.tryReflectionSetAccessible=true`, see 
[SPARK-29924](https://issues.apache.org/jira/browse/SPARK-29924) and 
[ARROW-6206](https://issues.apache.org/jira/browse/ARROW-6206).
 So please add following configs in `spark-defaults.conf`:
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index b010a6d615..8f0d562f95 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -650,7 +650,10 @@ class VeloxTestSettings extends BackendTestSettings {
       // for ObjectHashAggregateExec will fail.
       "SPARK-22223: ObjectHashAggregate should not introduce unnecessary 
shuffle",
       "SPARK-31620: agg with subquery (whole-stage-codegen = true)",
-      "SPARK-31620: agg with subquery (whole-stage-codegen = false)"
+      "SPARK-31620: agg with subquery (whole-stage-codegen = false)",
+      // The below test just verifies Spark's scala code. The involved toString
+      // implementation has different result on Java 17.
+      "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs 
should not fail"
     )
   enableSuite[GlutenDataFrameAsOfJoinSuite]
   enableSuite[GlutenDataFrameComplexTypeSuite]
@@ -739,6 +742,10 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite the following two tests in GlutenDatasetSuite.
     .exclude("dropDuplicates: columns with same column name")
     .exclude("groupBy.as")
+    // The below two tests just verify Spark's scala code. The involved 
toString
+    // implementation has different result on Java 17.
+    .exclude("Check RelationalGroupedDataset toString: Single data")
+    .exclude("Check RelationalGroupedDataset toString: over length schema ")
   enableSuite[GlutenDateFunctionsSuite]
     // The below two are replaced by two modified versions.
     .exclude("unix_timestamp")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to