This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new a5ad4a4f1e [VL][CI] Change to use JDK-17 for Spark 3.3/3.4/3.5 tests
(#9209)
a5ad4a4f1e is described below
commit a5ad4a4f1ea59777b58d65df0adddc3d90125704
Author: PHILO-HE <[email protected]>
AuthorDate: Mon Apr 7 17:03:16 2025 +0800
[VL][CI] Change to use JDK-17 for Spark 3.3/3.4/3.5 tests (#9209)
---
.github/workflows/velox_backend.yml | 276 +++++++--------------
docs/developers/NewToGluten.md | 18 +-
.../gluten/utils/velox/VeloxTestSettings.scala | 9 +-
3 files changed, 105 insertions(+), 198 deletions(-)
diff --git a/.github/workflows/velox_backend.yml
b/.github/workflows/velox_backend.yml
index d403b75e24..f208303064 100644
--- a/.github/workflows/velox_backend.yml
+++ b/.github/workflows/velox_backend.yml
@@ -114,7 +114,7 @@ jobs:
path: .m2/repository/org/apache/arrow/
if-no-files-found: error
- run-tpc-test-ubuntu:
+ tpc-test-ubuntu:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
@@ -185,7 +185,7 @@ jobs:
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1
- run-tpc-test-centos8:
+ tpc-test-centos8:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
@@ -277,7 +277,7 @@ jobs:
--local --preset=velox --benchmark-type=ds --error-on-memleak
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
--extra-conf=spark.gluten.ras.enabled=true
- run-tpc-test-centos7:
+ tpc-test-centos7:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
@@ -343,7 +343,7 @@ jobs:
--extra-conf=spark.gluten.ras.enabled=true
"
- run-tpc-test-ubuntu-oom:
+ tpc-test-ubuntu-oom:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
@@ -458,7 +458,7 @@ jobs:
-d=IO_THREADS:12,spark.gluten.sql.columnar.backend.velox.IOThreads=12 \
-d=IO_THREADS:0,spark.gluten.sql.columnar.backend.velox.IOThreads=0
- run-tpc-test-ubuntu-randomkill:
+ tpc-test-ubuntu-randomkill:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
@@ -510,7 +510,7 @@ jobs:
--local --preset=velox --benchmark-type=ds --error-on-memleak
-s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1
\
--data-gen=skip --random-kill-tasks --no-session-reuse
- run-tpc-test-centos8-uniffle:
+ tpc-test-centos8-uniffle:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
@@ -561,7 +561,7 @@ jobs:
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox-with-uniffle --benchmark-type=h
--error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1
- run-tpc-test-ubuntu-2204-celeborn:
+ tpc-test-ubuntu-2204-celeborn:
needs: build-native-lib-centos-7
strategy:
fail-fast: false
@@ -615,7 +615,7 @@ jobs:
--extra-conf=spark.celeborn.push.sortMemory.threshold=8m
--benchmark-type=ds --error-on-memleak \
--off-heap-size=10g -s=1.0 --threads=8 --iterations=1
- run-spark-test-spark32:
+ spark-test-spark32:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -647,7 +647,7 @@ jobs:
if: always()
uses: actions/upload-artifact@v4
with:
- name: test-report-spark32
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -659,10 +659,10 @@ jobs:
if: failure()
uses: actions/upload-artifact@v4
with:
- name: golden-files-spark32
+ name: ${{ github.job }}-golden-files
path: /tmp/tpch-approved-plan/**
- run-spark-test-spark32-slow:
+ spark-test-spark32-slow:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -687,7 +687,7 @@ jobs:
if: always()
uses: actions/upload-artifact@v4
with:
- name: test-report-spark32-slow
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -696,7 +696,7 @@ jobs:
name: ${{ github.job }}-test-log
path: "**/target/*.log"
- run-spark-test-spark33:
+ spark-test-spark33:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -723,14 +723,18 @@ jobs:
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.12
- $MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Piceberg -Pdelta
-Phudi -Pspark-ut \
+ yum install -y java-17-openjdk-devel
+ export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+ export PATH=$JAVA_HOME/bin:$PATH
+ java -version
+ $MVN_CMD clean test -Pspark-3.3 -Pjava-17 -Pbackends-velox -Piceberg
-Pdelta -Phudi -Pspark-ut \
-DargLine="-Dspark.test.home=/opt/shims/spark33/spark_home/" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
- name: test-report-spark33
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -742,11 +746,10 @@ jobs:
if: failure()
uses: actions/upload-artifact@v4
with:
- name: golden-files-spark33
+ name: ${{ github.job }}-golden-files
path: /tmp/tpch-approved-plan/**
-
- run-spark-test-spark33-slow:
+ spark-test-spark33-slow:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -766,14 +769,18 @@ jobs:
- name: Build and Run unit test for Spark 3.3.1 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
- $MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Piceberg -Pdelta
-Phudi -Pspark-ut \
+ yum install -y java-17-openjdk-devel
+ export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+ export PATH=$JAVA_HOME/bin:$PATH
+ java -version
+ $MVN_CMD clean test -Pspark-3.3 -Pjava-17 -Pbackends-velox -Piceberg
-Pdelta -Phudi -Pspark-ut \
-DargLine="-Dspark.test.home=/opt/shims/spark33/spark_home/" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
- name: test-report-spark33-slow
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -782,7 +789,7 @@ jobs:
name: ${{ github.job }}-test-log
path: "**/target/*.log"
- run-spark-test-spark34:
+ spark-test-spark34:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -814,67 +821,15 @@ jobs:
export PATH=$JAVA_HOME/bin:$PATH
java -version
export SPARK_HOME=/opt/shims/spark34/spark_home/
- ls -l /opt/shims/spark34/spark_home/
+ ls -l $SPARK_HOME
$MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg
-Pdelta -Phudi -Pspark-ut \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
\
- -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/
${EXTRA_FLAGS}"
- - name: Upload test report
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: test-report-spark34-jdk17
- path: '**/surefire-reports/TEST-*.xml'
- - name: Upload unit tests log files
- if: ${{ !success() }}
- uses: actions/upload-artifact@v4
- with:
- name: ${{ github.job }}-test-log
- path: "**/target/*.log"
- - name: Upload golden files
- if: failure()
- uses: actions/upload-artifact@v4
- with:
- name: golden-files-spark34
- path: /tmp/tpch-approved-plan/**
-
- run-spark-test-spark34-jdk8:
- needs: build-native-lib-centos-7
- runs-on: ubuntu-20.04
- container: apache/gluten:centos-8-jdk8
- steps:
- - uses: actions/checkout@v2
- - name: Download All Artifacts
- uses: actions/download-artifact@v4
- with:
- name: velox-native-lib-centos-7-${{github.sha}}
- path: ./cpp/build/releases
- - name: Download Arrow Jars
- uses: actions/download-artifact@v4
- with:
- name: arrow-jars-centos-7-${{github.sha}}
- path: /root/.m2/repository/org/apache/arrow/
- - name: Prepare spark.test.home for Spark 3.4.4 (other tests)
- run: |
- dnf module -y install python39 && \
- alternatives --set python3 /usr/bin/python3.9 && \
- pip3 install setuptools==77.0.3 && \
- pip3 install pyspark==3.4.4 cython && \
- pip3 install pandas pyarrow
- - name: Build and Run unit test for Spark 3.4.4 (other tests)
- run: |
- cd $GITHUB_WORKSPACE/
- export SPARK_SCALA_VERSION=2.12
- export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
- export SPARK_HOME=/opt/shims/spark34/spark_home/
- ls -l /opt/shims/spark34/spark_home/
- $MVN_CMD clean test -Pspark-3.4 -Pjava-8 -Pbackends-velox -Pdelta
-Phudi -Pspark-ut \
-
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
\
- -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/"
+ -DargLine="-Dspark.test.home=$SPARK_HOME ${EXTRA_FLAGS}"
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
- name: test-report-spark34
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -886,10 +841,10 @@ jobs:
if: failure()
uses: actions/upload-artifact@v4
with:
- name: golden-files-spark34-jdk8
+ name: ${{ github.job }}-golden-files
path: /tmp/tpch-approved-plan/**
- run-spark-test-spark34-slow:
+ spark-test-spark34-slow:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -908,101 +863,20 @@ jobs:
- name: Build and Run unit test for Spark 3.4.4 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
- export SPARK_HOME=/opt/shims/spark34/spark_home/
yum install -y java-17-openjdk-devel
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
export PATH=$JAVA_HOME/bin:$PATH
java -version
- ls -l /opt/shims/spark34/spark_home/
- $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg
-Pdelta -Pspark-ut -Phudi \
- -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \
- -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/
${EXTRA_FLAGS}"
- - name: Upload test report
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: test-report-spark34-slow-jdk17
- path: '**/surefire-reports/TEST-*.xml'
- - name: Upload unit tests log files
- if: ${{ !success() }}
- uses: actions/upload-artifact@v4
- with:
- name: ${{ github.job }}-test-log
- path: "**/target/*.log"
-
- run-spark-test-spark34-slow-jdk8:
- needs: build-native-lib-centos-7
- runs-on: ubuntu-20.04
- container: apache/gluten:centos-8-jdk8
- steps:
- - uses: actions/checkout@v2
- - name: Download All Artifacts
- uses: actions/download-artifact@v4
- with:
- name: velox-native-lib-centos-7-${{github.sha}}
- path: ./cpp/build/releases
- - name: Download Arrow Jars
- uses: actions/download-artifact@v4
- with:
- name: arrow-jars-centos-7-${{github.sha}}
- path: /root/.m2/repository/org/apache/arrow/
- - name: Build and Run unit test for Spark 3.4.4 (slow tests)
- run: |
- cd $GITHUB_WORKSPACE/
- export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
export SPARK_HOME=/opt/shims/spark34/spark_home/
- ls -l /opt/shims/spark34/spark_home/
- $MVN_CMD clean test -Pspark-3.4 -Pjava-8 -Pbackends-velox -Pdelta
-Pspark-ut -Phudi \
+ ls -l $SPARK_HOME
+ $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg
-Pdelta -Pspark-ut -Phudi \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest \
- -DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/"
- - name: Upload test report
- if: always()
- uses: actions/upload-artifact@v4
- with:
- name: test-report-spark34-slow-jdk8
- path: '**/surefire-reports/TEST-*.xml'
- - name: Upload unit tests log files
- if: ${{ !success() }}
- uses: actions/upload-artifact@v4
- with:
- name: ${{ github.job }}-test-log
- path: "**/target/*.log"
-
- run-spark-test-spark35:
- needs: build-native-lib-centos-7
- runs-on: ubuntu-20.04
- container: apache/gluten:centos-8-jdk8
- steps:
- - uses: actions/checkout@v2
- - name: Download All Artifacts
- uses: actions/download-artifact@v4
- with:
- name: velox-native-lib-centos-7-${{github.sha}}
- path: ./cpp/build/releases
- - name: Download Arrow Jars
- uses: actions/download-artifact@v4
- with:
- name: arrow-jars-centos-7-${{github.sha}}
- path: /root/.m2/repository/org/apache/arrow/
- - name: Prepare
- run: |
- dnf module -y install python39 && \
- alternatives --set python3 /usr/bin/python3.9 && \
- pip3 install setuptools==77.0.3 && \
- pip3 install pyspark==3.5.2 cython && \
- pip3 install pandas pyarrow
- - name: Build and Run unit test for Spark 3.5.2 (other tests)
- run: |
- cd $GITHUB_WORKSPACE/
- export SPARK_SCALA_VERSION=2.12
- $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta
-Phudi -Pspark-ut \
- -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" \
-
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
+ -DargLine="-Dspark.test.home=$SPARK_HOME ${EXTRA_FLAGS}"
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
- name: test-report-spark35
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -1010,14 +884,8 @@ jobs:
with:
name: ${{ github.job }}-test-log
path: "**/target/*.log"
- - name: Upload golden files
- if: failure()
- uses: actions/upload-artifact@v4
- with:
- name: golden-files-spark35
- path: /tmp/tpch-approved-plan/**
- run-spark-test-spark35-jdk17:
+ spark-test-spark35:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -1055,7 +923,7 @@ jobs:
if: always()
uses: actions/upload-artifact@v4
with:
- name: test-report-spark35-jdk17
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -1067,10 +935,10 @@ jobs:
if: failure()
uses: actions/upload-artifact@v4
with:
- name: golden-files-spark35
+ name: ${{ github.job }}-golden-files
path: /tmp/tpch-approved-plan/**
- run-spark-test-spark35-scala213:
+ spark-test-spark35-scala213:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -1097,14 +965,18 @@ jobs:
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.13
- $MVN_CMD clean test -Pspark-3.5 -Pscala-2.13 -Pbackends-velox
-Piceberg \
+ yum install -y java-17-openjdk-devel
+ export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+ export PATH=$JAVA_HOME/bin:$PATH
+ java -version
+ $MVN_CMD clean test -Pspark-3.5 -Pscala-2.13 -Pjava-17
-Pbackends-velox -Piceberg \
-Pdelta -Pspark-ut
-DargLine="-Dspark.test.home=/opt/shims/spark35-scala-2.13/spark_home/" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
- name: test-report-spark35-scala213
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -1113,7 +985,7 @@ jobs:
name: ${{ github.job }}-test-log
path: "**/target/*.log"
- run-spark-test-spark35-slow:
+ spark-test-spark35-slow:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -1132,14 +1004,18 @@ jobs:
- name: Build and Run unit test for Spark 3.5.2 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
- $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta
-Phudi -Pspark-ut \
+ yum install -y java-17-openjdk-devel
+ export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+ export PATH=$JAVA_HOME/bin:$PATH
+ java -version
+ $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg
-Pdelta -Phudi -Pspark-ut \
-DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
- name: test-report-spark35-slow
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -1148,7 +1024,7 @@ jobs:
name: ${{ github.job }}-test-log
path: "**/target/*.log"
- run-spark-test-spark35-ras:
+ spark-test-spark35-ras:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -1175,13 +1051,17 @@ jobs:
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.12
- $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta
-Pspark-ut \
+ yum install -y java-17-openjdk-devel
+ export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+ export PATH=$JAVA_HOME/bin:$PATH
+ java -version
+ $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg
-Pdelta -Pspark-ut \
-DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/
-Dspark.gluten.ras.enabled=true" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
- name: Upload test report
uses: actions/upload-artifact@v4
with:
- name: test-report-spark35-ras
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -1190,7 +1070,7 @@ jobs:
name: ${{ github.job }}-test-log
path: "**/target/*.log"
- run-spark-test-spark35-slow-ras:
+ spark-test-spark35-slow-ras:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -1209,13 +1089,17 @@ jobs:
- name: Build and Run unit test for Spark 3.5.2 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
- $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta
-Pspark-ut \
+ yum install -y java-17-openjdk-devel
+ export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+ export PATH=$JAVA_HOME/bin:$PATH
+ java -version
+ $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg
-Pdelta -Pspark-ut \
-DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/
-Dspark.gluten.ras.enabled=true" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
uses: actions/upload-artifact@v4
with:
- name: test-report-spark35-slow-ras
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -1224,7 +1108,7 @@ jobs:
name: ${{ github.job }}-test-log
path: "**/target/*.log"
- run-spark-test-spark35-smj:
+ spark-test-spark35-smj:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -1251,13 +1135,17 @@ jobs:
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.12
- $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta
-Pspark-ut \
+ yum install -y java-17-openjdk-devel
+ export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+ export PATH=$JAVA_HOME/bin:$PATH
+ java -version
+ $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg
-Pdelta -Pspark-ut \
-DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/
-Dspark.gluten.sql.columnar.forceShuffledHashJoin=false" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
- name: Upload test report
uses: actions/upload-artifact@v4
with:
- name: test-report-spark35-smj
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -1266,7 +1154,7 @@ jobs:
name: ${{ github.job }}-test-log
path: "**/target/*.log"
- run-spark-test-spark35-slow-smj:
+ spark-test-spark35-slow-smj:
needs: build-native-lib-centos-7
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
@@ -1285,13 +1173,17 @@ jobs:
- name: Build and Run unit test for Spark 3.5.2 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
- $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta
-Pspark-ut \
+ yum install -y java-17-openjdk-devel
+ export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+ export PATH=$JAVA_HOME/bin:$PATH
+ java -version
+ $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg
-Pdelta -Pspark-ut \
-DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/
-Dspark.gluten.sql.columnar.forceShuffledHashJoin=false" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
uses: actions/upload-artifact@v4
with:
- name: test-report-spark35-slow-smj
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
@@ -1300,7 +1192,7 @@ jobs:
name: ${{ github.job }}-test-log
path: "**/target/*.log"
- run-cpp-test-udf-test:
+ cpp-test-udf-test:
runs-on: ubuntu-20.04
container: apache/gluten:centos-8-jdk8
steps:
@@ -1343,7 +1235,7 @@ jobs:
- name: Upload test report
uses: actions/upload-artifact@v4
with:
- name: test-report-spark35-udf
+ name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
diff --git a/docs/developers/NewToGluten.md b/docs/developers/NewToGluten.md
index 26418a45e5..da4e1fbdb8 100644
--- a/docs/developers/NewToGluten.md
+++ b/docs/developers/NewToGluten.md
@@ -10,15 +10,23 @@ Help users to debug and test with gluten.
Now gluten supports Ubuntu20.04, Ubuntu22.04, centos8, centos7 and macOS.
-## OpenJDK 8
+## JDK
-### Environment Setting
+Currently, Gluten supports JDK 8 for Spark 3.2/3.3/3.4/3.5. For Spark 3.3 and
higher versions, Gluten
+supports JDK 11 and 17. Please note since Spark 4.0, JDK 8 will not be
supported. So we recommend Velox
+backend users to use higher JDK version now to ease the migration for
deploying Gluten with Spark-4.0
+in the future. And we may probably upgrade Arrow from 15.0.0 to some higher
version, which also requires
+JDK 11 is the minimum version.
+
+### JDK 8
+
+#### Environment Setting
For root user, the environment variables file is `/etc/profile`, it will take
effect for all the users.
For other user, you can set in `~/.bashrc`.
-### Guide for Ubuntu
+#### Guide for Ubuntu
The default JDK version in ubuntu is java11, we need to set to java8.
@@ -41,9 +49,9 @@ export PATH="$PATH:$JAVA_HOME/bin"
> Must set PATH with double quote in ubuntu.
-## OpenJDK 17
+### JDK 11/17
-By default, Gluten compiles package using JDK8. Enable maven profile by
`-Pjava-17` to use JDK17 or `-Pjava-11` to use JDK 11, and please make sure
your JAVA_HOME points to jdk17 or jdk11 respectively.
+By default, Gluten compiles package using JDK8. Enable maven profile by
`-Pjava-17` to use JDK17 or `-Pjava-11` to use JDK 11, and please make sure
your JAVA_HOME is set correctly.
Apache Spark and Arrow requires setting java args
`-Dio.netty.tryReflectionSetAccessible=true`, see
[SPARK-29924](https://issues.apache.org/jira/browse/SPARK-29924) and
[ARROW-6206](https://issues.apache.org/jira/browse/ARROW-6206).
So please add following configs in `spark-defaults.conf`:
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index b010a6d615..8f0d562f95 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -650,7 +650,10 @@ class VeloxTestSettings extends BackendTestSettings {
// for ObjectHashAggregateExec will fail.
"SPARK-22223: ObjectHashAggregate should not introduce unnecessary
shuffle",
"SPARK-31620: agg with subquery (whole-stage-codegen = true)",
- "SPARK-31620: agg with subquery (whole-stage-codegen = false)"
+ "SPARK-31620: agg with subquery (whole-stage-codegen = false)",
+ // The below test just verifies Spark's scala code. The involved toString
+ // implementation has different result on Java 17.
+ "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs
should not fail"
)
enableSuite[GlutenDataFrameAsOfJoinSuite]
enableSuite[GlutenDataFrameComplexTypeSuite]
@@ -739,6 +742,10 @@ class VeloxTestSettings extends BackendTestSettings {
// Rewrite the following two tests in GlutenDatasetSuite.
.exclude("dropDuplicates: columns with same column name")
.exclude("groupBy.as")
+ // The below two tests just verify Spark's scala code. The involved
toString
+ // implementation has different result on Java 17.
+ .exclude("Check RelationalGroupedDataset toString: Single data")
+ .exclude("Check RelationalGroupedDataset toString: over length schema ")
enableSuite[GlutenDateFunctionsSuite]
// The below two are replaced by two modified versions.
.exclude("unix_timestamp")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]