This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 7942701c3 [CORE] Support JDK17 (#5120)
7942701c3 is described below
commit 7942701c3b67c72230f34286f837c3a6f13fd002
Author: Xiduo You <[email protected]>
AuthorDate: Wed Mar 27 11:10:11 2024 +0800
[CORE] Support JDK17 (#5120)
* Support JDK17
* address comment
---------
Co-authored-by: Kent Yao <[email protected]>
---
.github/workflows/velox_docker.yml | 114 ++++++++++++++++++-------------------
docs/developers/NewToGluten.md | 12 ++++
docs/get-started/Velox.md | 28 ++++-----
pom.xml | 47 ++++++++++-----
tools/gluten-it/pom.xml | 23 +++++++-
tools/gluten-it/sbin/gluten-it.sh | 21 ++++++-
6 files changed, 152 insertions(+), 93 deletions(-)
diff --git a/.github/workflows/velox_docker.yml
b/.github/workflows/velox_docker.yml
index f2b73e81d..6329750d2 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_docker.yml
@@ -73,6 +73,17 @@ jobs:
matrix:
os: ["ubuntu:20.04", "ubuntu:22.04"]
spark: ["spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5"]
+ java: [ "java-8", "java-17" ]
+ # Spark supports JDK17 since 3.3 and later, see
https://issues.apache.org/jira/browse/SPARK-33772
+ exclude:
+ - spark: spark-3.2
+ java: java-17
+ - spark: spark-3.4
+ java: java-17
+ - spark: spark-3.5
+ java: java-17
+ - os: ubuntu:22.04
+ java: java-17
runs-on: ubuntu-20.04
container: ${{ matrix.os }}
steps:
@@ -84,69 +95,45 @@ jobs:
path: ./cpp/build/releases
- name: Setup java and maven
run: |
- apt-get update && \
- apt-get install -y openjdk-8-jdk maven && \
+ if [ "${{ matrix.java }}" = "java-17" ]; then
+ apt-get update && apt-get install -y openjdk-17-jdk maven
+ else
+ apt-get update && apt-get install -y openjdk-8-jdk maven
+ fi
apt remove openjdk-11* -y
- - name: Build for Spark ${{ matrix.spark }}
- run: |
- cd $GITHUB_WORKSPACE/ && \
- mvn clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
- - name: Build and run TPCH/DS ${{ matrix.spark }}
- run: |
- cd $GITHUB_WORKSPACE/tools/gluten-it && \
- mvn clean install -P${{ matrix.spark }} \
- && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
- --local --preset=velox --benchmark-type=h --error-on-memleak
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
- && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
- --local --preset=velox --benchmark-type=ds --error-on-memleak
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1
-
-
- run-tpc-test-centos7:
- needs: build-native-lib
- strategy:
- fail-fast: false
- matrix:
- spark: ["spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5"]
- runs-on: ubuntu-20.04
- container: centos:7
- steps:
- - uses: actions/checkout@v2
- - name: Download All Artifacts
- uses: actions/download-artifact@v2
- with:
- name: velox-native-lib-${{github.sha}}
- path: ./cpp/build/releases
- - name: Setup java and maven
- run: |
- yum update -y && yum install -y java-1.8.0-openjdk-devel wget
- wget
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
- tar -xvf apache-maven-3.8.8-bin.tar.gz
- mv apache-maven-3.8.8 /usr/lib/maven
- - name: Build for Spark ${{ matrix.spark }}
+ - name: Build and run TPCH/DS
run: |
cd $GITHUB_WORKSPACE/
- export MAVEN_HOME=/usr/lib/maven
- export PATH=${PATH}:${MAVEN_HOME}/bin
- mvn clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
- - name: Build and run TPCH/DS ${{ matrix.spark }}
- run: |
- cd $GITHUB_WORKSPACE/tools/gluten-it
- export MAVEN_HOME=/usr/lib/maven
- export PATH=${PATH}:${MAVEN_HOME}/bin
- mvn clean install -P${{ matrix.spark }} \
+ export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64
+ echo "JAVA_HOME: $JAVA_HOME"
+ mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }}
-Pbackends-velox -DskipTests
+ cd $GITHUB_WORKSPACE/tools/gluten-it
+ mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }} \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=h --error-on-memleak
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1
- run-tpc-test-centos8:
+ run-tpc-test-centos:
needs: build-native-lib
strategy:
fail-fast: false
matrix:
+ os: ["centos:7", "centos:8"]
spark: ["spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5"]
+ java: ["java-8", "java-17"]
+ # Spark supports JDK17 since 3.3 and later, see
https://issues.apache.org/jira/browse/SPARK-33772
+ exclude:
+ - spark: spark-3.2
+ java: java-17
+ - spark: spark-3.4
+ java: java-17
+ - spark: spark-3.5
+ java: java-17
+ - os: centos:7
+ java: java-17
runs-on: ubuntu-20.04
- container: centos:8
+ container: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
@@ -155,32 +142,39 @@ jobs:
name: velox-native-lib-${{github.sha}}
path: ./cpp/build/releases
- name: Update mirror list
+ if: matrix.os == 'centos:8'
run: |
sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-*
|| true
sed -i -e
"s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g"
/etc/yum.repos.d/CentOS-* || true
- name: Setup java and maven
run: |
- yum update -y && yum install -y java-1.8.0-openjdk-devel wget
+ if [ "${{ matrix.java }}" = "java-17" ]; then
+ yum update -y && yum install -y java-17-openjdk-devel wget
+ else
+ yum update -y && yum install -y java-1.8.0-openjdk-devel wget
+ fi
wget
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
tar -xvf apache-maven-3.8.8-bin.tar.gz
mv apache-maven-3.8.8 /usr/lib/maven
- - name: Build for Spark ${{ matrix.spark }}
- run: |
- cd $GITHUB_WORKSPACE/
- export MAVEN_HOME=/usr/lib/maven
- export PATH=${PATH}:${MAVEN_HOME}/bin
- mvn clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests
- - name: Build and run TPCH/DS ${{ matrix.spark }}
+ - name: Build and run TPCH/DS
run: |
- cd $GITHUB_WORKSPACE/tools/gluten-it
+ cd $GITHUB_WORKSPACE/
export MAVEN_HOME=/usr/lib/maven
export PATH=${PATH}:${MAVEN_HOME}/bin
- mvn clean install -P${{ matrix.spark }} \
+ if [ "${{ matrix.java }}" = "java-17" ]; then
+ export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+ else
+ export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
+ fi
+ echo "JAVA_HOME: $JAVA_HOME"
+ mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }}
-Pbackends-velox -DskipTests
+ cd $GITHUB_WORKSPACE/tools/gluten-it
+ mvn clean install -P${{ matrix.spark }} -P${{ matrix.java }} \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=h --error-on-memleak
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak
--off-heap-size=10g -s=1.0 --threads=16 --iterations=1
-
+
# run-tpc-test-centos8-oom-randomkill:
# needs: build-native-lib
# strategy:
diff --git a/docs/developers/NewToGluten.md b/docs/developers/NewToGluten.md
index 04074d4e6..681f5f37e 100644
--- a/docs/developers/NewToGluten.md
+++ b/docs/developers/NewToGluten.md
@@ -43,6 +43,18 @@ export PATH="$PATH:$JAVA_HOME/bin"
> Must set PATH with double quote in ubuntu.
+## Openjdk17
+
+By defaults, Gluten compiles package using JDK8. Add maven profile `-Pjava-17`
changing to use JDK17, and please make sure your JAVA_HOME points to jdk17.
+
+Apache Spark and Arrow requires setting java args
`-Dio.netty.tryReflectionSetAccessible=true`, see
[SPARK-29924](https://issues.apache.org/jira/browse/SPARK-29924) and
[ARROW-6206](https://issues.apache.org/jira/browse/ARROW-6206).
+So please add following configs in `spark-defaults.conf`:
+
+```
+spark.driver.extraJavaOptions=-Dio.netty.tryReflectionSetAccessible=true
+spark.executor.extraJavaOptions=-Dio.netty.tryReflectionSetAccessible=true
+```
+
## Maven 3.6.3 or above
[Maven Dowload Page](https://maven.apache.org/docs/history.html)
diff --git a/docs/get-started/Velox.md b/docs/get-started/Velox.md
index e42ac16a2..7c3d77abc 100644
--- a/docs/get-started/Velox.md
+++ b/docs/get-started/Velox.md
@@ -5,28 +5,22 @@ nav_order: 1
parent: Getting-Started
---
# Supported Version
-| Type | Version |
-|-------|------------------------------|
-| Spark | 3.2.2, 3.3.1 |
-| OS | Ubuntu20.04/22.04, Centos7/8 |
-| jdk | openjdk8 |
-| scala | 2.12
-Spark3.4.0 support is still WIP. TPCH/DS can pass, UT is not yet passed.
-
-There are pending PRs for jdk11 support.
-
-
-Currently, the mvn script can automatically fetch and build all dependency
libraries incluing Velox. Our nightly build still use Velox under oap-project.
+| Type | Version |
+|-------|---------------------------------|
+| Spark | 3.2.2, 3.3.1, 3.4.2, 3.5.1(wip) |
+| OS | Ubuntu20.04/22.04, Centos7/8 |
+| jdk | openjdk8/jdk17 |
+| scala | 2.12 |
# Prerequisite
-Currently, Gluten+Velox backend is only tested on
**Ubuntu20.04/Ubuntu22.04/Centos8**. Other kinds of OS support are still in
progress. The long term goal is to support several
+Currently, Gluten+Velox backend is only tested on
**Ubuntu20.04/Ubuntu22.04/Centos7/Centos8**. Other kinds of OS support are
still in progress. The long term goal is to support several
common OS and conda env deployment.
-Gluten builds with Spark3.2.x and Spark3.3.x now but only fully tested in CI
with 3.2.2 and 3.3.1. We will add/update supported/tested versions according to
the upstream changes.
+Gluten only fully tested in CI with 3.2.2, 3.3.1 and 3.4.2. We will add/update
supported/tested versions according to the upstream changes.
-we need to set up the `JAVA_HOME` env. Currently, **java 8** is required and
the support for java 11/17 is not ready.
+We need to set up the `JAVA_HOME` env. Currently, Gluten supports **java 8**
and **java 17**.
**For x86_64**
@@ -63,7 +57,7 @@ It's recommended to use buildbundle-veloxbe.sh to build
gluten in one script.
```bash
cd /path/to/gluten
-## The script builds two jars for spark 3.2.2 and 3.3.1.
+## The script builds jars for all spark version
./dev/buildbundle-veloxbe.sh
## After a complete build, if you need to re-build the project and only some
gluten code is changed,
@@ -84,6 +78,8 @@ cd /path/to/gluten
**Build Velox separately**
+Gluten still uses Velox under oap-project and does daily update with
upstream(meta) Velox.
+
Scripts under `/path/to/gluten/ep/build-velox/src` provide `get_velox.sh` and
`build_velox.sh` to build Velox separately, you could use these scripts with
custom repo/branch/location.
Velox provides arrow/parquet lib. Gluten cpp module need a required VELOX_HOME
parsed by --velox_home, if you specify custom ep location, make sure these
variables be passed correctly.
diff --git a/pom.xml b/pom.xml
index 8975ce211..afe86e56a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -37,6 +37,9 @@
</modules>
<properties>
+ <java.version>1.8</java.version>
+ <maven.compiler.source>${java.version}</maven.compiler.source>
+ <maven.compiler.target>${java.version}</maven.compiler.target>
<caffeine.version.java8>2.9.3</caffeine.version.java8>
<delta.version>2.0.1</delta.version>
<delta.binary.version>20</delta.binary.version>
@@ -97,13 +100,31 @@
<!-- plugin version-->
<build-helper-maven-plugin.version>3.2.0</build-helper-maven-plugin.version>
<scala.compiler.version>4.8.0</scala.compiler.version>
- <maven.compiler.plugin>3.8.0</maven.compiler.plugin>
+ <maven.compiler.plugin>3.12.1</maven.compiler.plugin>
<maven.jar.plugin>3.2.2</maven.jar.plugin>
<scalastyle.version>1.0.0</scalastyle.version>
<scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version>
</properties>
<profiles>
+ <profile>
+ <id>java-8</id>
+ <activation>
+ <jdk>1.8</jdk>
+ </activation>
+ <properties>
+ <java.version>1.8</java.version>
+ </properties>
+ </profile>
+ <profile>
+ <id>java-17</id>
+ <activation>
+ <jdk>17</jdk>
+ </activation>
+ <properties>
+ <java.version>17</java.version>
+ </properties>
+ </profile>
<profile>
<id>spark-3.2</id>
<activation>
@@ -112,8 +133,8 @@
<properties>
<sparkbundle.version>3.2</sparkbundle.version>
<sparkshim.artifactId>spark-sql-columnar-shims-spark32</sparkshim.artifactId>
- <spark.version>3.2.2</spark.version>
- <iceberg.version>1.3.1</iceberg.version>
+ <spark.version>3.2.2</spark.version>
+ <iceberg.version>1.3.1</iceberg.version>
<delta.version>2.0.1</delta.version>
<delta.binary.version>20</delta.binary.version>
</properties>
@@ -123,10 +144,10 @@
<properties>
<sparkbundle.version>3.3</sparkbundle.version>
<sparkshim.artifactId>spark-sql-columnar-shims-spark33</sparkshim.artifactId>
- <spark.version>3.3.1</spark.version>
- <!-- keep using iceberg v1.3.1 for parquet compatibilty. -->
- <iceberg.version>1.3.1</iceberg.version>
- <delta.version>2.2.0</delta.version>
+ <spark.version>3.3.1</spark.version>
+ <!-- keep using iceberg v1.3.1 for parquet compatibilty. -->
+ <iceberg.version>1.3.1</iceberg.version>
+ <delta.version>2.2.0</delta.version>
<delta.binary.version>22</delta.binary.version>
</properties>
</profile>
@@ -135,9 +156,9 @@
<properties>
<sparkbundle.version>3.4</sparkbundle.version>
<sparkshim.artifactId>spark-sql-columnar-shims-spark34</sparkshim.artifactId>
- <spark.version>3.4.2</spark.version>
- <iceberg.version>1.4.3</iceberg.version>
- <delta.version>2.4.0</delta.version>
+ <spark.version>3.4.2</spark.version>
+ <iceberg.version>1.4.3</iceberg.version>
+ <delta.version>2.4.0</delta.version>
<delta.binary.version>24</delta.binary.version>
</properties>
</profile>
@@ -146,8 +167,8 @@
<properties>
<sparkbundle.version>3.5</sparkbundle.version>
<sparkshim.artifactId>spark-sql-columnar-shims-spark35</sparkshim.artifactId>
- <spark.version>3.5.1</spark.version>
- <iceberg.version>1.4.3</iceberg.version>
+ <spark.version>3.5.1</spark.version>
+ <iceberg.version>1.4.3</iceberg.version>
<delta.version>2.4.0</delta.version>
<delta.binary.version>24</delta.binary.version>
<hadoop.version>3.3.4</hadoop.version>
@@ -542,8 +563,6 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven.compiler.plugin}</version>
<configuration>
- <source>1.8</source>
- <target>1.8</target>
<encoding>UTF-8</encoding>
<maxmem>1024m</maxmem>
<fork>true</fork>
diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml
index 7823cd32f..74e1da2f4 100644
--- a/tools/gluten-it/pom.xml
+++ b/tools/gluten-it/pom.xml
@@ -14,8 +14,9 @@
</modules>
<properties>
- <maven.compiler.source>8</maven.compiler.source>
- <maven.compiler.target>8</maven.compiler.target>
+ <java.version>1.8</java.version>
+ <maven.compiler.source>${java.version}</maven.compiler.source>
+ <maven.compiler.target>${java.version}</maven.compiler.target>
<scala.library.version>2.12.15</scala.library.version>
<spark.version>3.4.2</spark.version>
<scala.binary.version>2.12</scala.binary.version>
@@ -89,6 +90,24 @@
</dependencyManagement>
<profiles>
+ <profile>
+ <id>java-8</id>
+ <activation>
+ <jdk>1.8</jdk>
+ </activation>
+ <properties>
+ <java.version>1.8</java.version>
+ </properties>
+ </profile>
+ <profile>
+ <id>java-17</id>
+ <activation>
+ <jdk>17</jdk>
+ </activation>
+ <properties>
+ <java.version>17</java.version>
+ </properties>
+ </profile>
<profile>
<id>spark-3.2</id>
<activation>
diff --git a/tools/gluten-it/sbin/gluten-it.sh
b/tools/gluten-it/sbin/gluten-it.sh
index e5f27ed6b..98a240878 100755
--- a/tools/gluten-it/sbin/gluten-it.sh
+++ b/tools/gluten-it/sbin/gluten-it.sh
@@ -28,4 +28,23 @@ fi
JAR_PATH=$LIB_DIR/*
-java $GLUTEN_IT_JVM_ARGS -cp $JAR_PATH io.glutenproject.integration.tpc.Tpc $@
+$JAVA_HOME/bin/java $GLUTEN_IT_JVM_ARGS \
+ -XX:+IgnoreUnrecognizedVMOptions \
+ --add-opens=java.base/java.lang=ALL-UNNAMED \
+ --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \
+ --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \
+ --add-opens=java.base/java.io=ALL-UNNAMED \
+ --add-opens=java.base/java.net=ALL-UNNAMED \
+ --add-opens=java.base/java.nio=ALL-UNNAMED \
+ --add-opens=java.base/java.util=ALL-UNNAMED \
+ --add-opens=java.base/java.util.concurrent=ALL-UNNAMED \
+ --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \
+ --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \
+ --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
+ --add-opens=java.base/sun.nio.cs=ALL-UNNAMED \
+ --add-opens=java.base/sun.security.action=ALL-UNNAMED \
+ --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \
+ -Djdk.reflect.useDirectMethodHandle=false \
+ -Dio.netty.tryReflectionSetAccessible=true \
+ -cp $JAR_PATH \
+ io.glutenproject.integration.tpc.Tpc $@
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]