This is an automated email from the ASF dual-hosted git repository.
ulyssesyou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 5b6a729fa [KYUUBI #5800] [KYUUBI#5467] Integrate Intel Gluten with
Spark engine
5b6a729fa is described below
commit 5b6a729fa832bc783cc39d201e953ba1545f608d
Author: wangjunbo <[email protected]>
AuthorDate: Thu Dec 7 10:47:00 2023 +0800
[KYUUBI #5800] [KYUUBI#5467] Integrate Intel Gluten with Spark engine
# :mag: Description
## Issue References ๐
This pull request fixes #5467
## Describe Your Solution ๐ง
1. Add Gluten UTs.
2. Setup CI for Gluten testing
3. Write docs to guide users in setting up Kyuubi with Spark plus Gluten.
## Types of changes :bookmark:
- [ ] Bugfix (non-breaking change which fixes an issue)
- [x] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
## Test Plan ๐งช
#### Behavior Without This Pull Request :coffin:
#### Behavior With This Pull Request :tada:
#### Related Unit Tests
github action ci tests: [Gluten Test
CI](https://github.com/Kwafoor/incubator-kyuubi/actions/runs/7111586978)
---
# Checklists
## ๐ Author Self Checklist
- [ ] My code follows the [style
guidelines](https://kyuubi.readthedocs.io/en/master/contributing/code/style.html)
of this project
- [x] I have performed a self-review
- [x] I have commented my code, particularly in hard-to-understand areas
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] I have added tests that prove my fix is effective or that my feature
works
- [x] New and existing unit tests pass locally with my changes
- [ ] This patch was not authored or co-authored using [Generative
Tooling](https://www.apache.org/legal/generative-tooling.html)
## ๐ Committer Pre-Merge Checklist
- [ ] Pull request title is okay.
- [ ] No license issues.
- [ ] Milestone correctly set?
- [ ] Test coverage is ok
- [ ] Assignees are selected.
- [ ] Minimum number of approvals
- [ ] No changes are requested
**Be nice. Be informative.**
Closes #5800 from Kwafoor/kyuubi_5467.
Closes #5800
c6dd26f93 [wangjunbo] fix
7818ae0c5 [wangjunbo] fix Scala Test
296f08c8c [wangjunbo] remove spark-3.2 gluten test
5a704675d [wangjunbo] [KYUUBI#5467] Integrate Intel Gluten with Spark engine
Authored-by: wangjunbo <[email protected]>
Signed-off-by: ulyssesyou <[email protected]>
---
.github/workflows/gluten.yml | 128 ++++++++++++++++++
.github/workflows/master.yml | 2 +-
docs/deployment/spark/gluten.md | 55 ++++++++
docs/deployment/spark/index.rst | 1 +
integration-tests/kyuubi-gluten-it/pom.xml | 127 ++++++++++++++++++
.../src/test/resources/load-tpcds-tiny.sql | 146 +++++++++++++++++++++
.../src/test/resources/load-tpch-tiny.sql | 59 +++++++++
.../src/test/resources/log4j2-test.xml | 43 ++++++
.../org/apache/kyuubi/it/gluten/GlutenSuite.scala | 51 +++++++
.../org/apache/kyuubi/it/gluten/TPCUtils.scala | 32 +++++
.../it/gluten/tpcds/GlutenTPCDSQuerySuite.scala | 86 ++++++++++++
.../it/gluten/tpch/GlutenTPCHQuerySuite.scala | 75 +++++++++++
integration-tests/pom.xml | 1 +
.../scala/org/apache/kyuubi/GlutenSuiteMixin.scala | 33 +++++
.../java/org/apache/kyuubi/tags/GlutenTest.java | 29 ++++
15 files changed, 867 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/gluten.yml b/.github/workflows/gluten.yml
new file mode 100644
index 000000000..80a22ceaf
--- /dev/null
+++ b/.github/workflows/gluten.yml
@@ -0,0 +1,128 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+name: Gluten CI
+
+on:
+ schedule:
+ - cron: 0 4 * * *
+
+env:
+ MVN_OPT: -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true
-Dspotless.check.skip -Dorg.slf4j.simpleLogger.defaultLogLevel=warn
-Pjdbc-shaded,gen-policy -Dmaven.plugin.download.cache.path=/tmp/engine-archives
+
+jobs:
+ gluten-build:
+ name: Build Gluten
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v4
+ - name: Tune Runner VM
+ uses: ./.github/actions/tune-runner-vm
+ - name: Update and Upgrade
+ run: sudo apt-get update && sudo apt-get upgrade -y
+ - name: Install dependencies
+ run: |
+ sudo apt-get install -y software-properties-common
+ sudo apt-get install -y libunwind-dev build-essential cmake
libssl-dev libre2-dev libcurl4-openssl-dev clang lldb lld libz-dev git
ninja-build uuid-dev
+ - name: Setup JDK 8
+ uses: actions/setup-java@v3
+ with:
+ distribution: temurin
+ java-version: 8
+ cache: 'maven'
+ check-latest: false
+ - name: Setup Maven
+ uses: ./.github/actions/setup-maven
+ - name: Get gluten cache date
+ id: date
+ run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+ - name: Check gluten cache
+ id: gluten-cache
+ uses: actions/cache@v3
+ with:
+ path: gluten/package/target/
+ key: gluten_package_${{ steps.date.outputs.date }}
+ - name: Build gluten project
+ run: |
+ if [[ "${{ steps.gluten-cache.outputs.cache-hit }}" != 'true' ]];
then
+ git clone https://github.com/oap-project/gluten.git
+ cd gluten
+ ./dev/buildbundle-veloxbe.sh
+ fi
+ - uses: actions/cache@v3
+ if: steps.gluten-cache.outputs.cache-hit != 'true'
+ with:
+ path: gluten/package/target/
+ key: gluten_package_${{ steps.date.outputs.date }}
+
+ gluten-it:
+ name: Gluten Integration TPC-H/DS Test
+ needs: gluten-build
+ runs-on: ubuntu-22.04
+ strategy:
+ fail-fast: false
+ matrix:
+ spark: [ '3.4', '3.3' ]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Tune Runner VM
+ uses: ./.github/actions/tune-runner-vm
+ - name: Update and Upgrade
+ run: sudo apt-get update && sudo apt-get upgrade -y
+ - name: Install dependencies
+ run: |
+ sudo apt-get install -y software-properties-common
+ sudo apt-get install -y libunwind-dev build-essential cmake
libssl-dev libre2-dev libcurl4-openssl-dev clang lldb lld libz-dev git
ninja-build uuid-dev
+ sudo apt-get install -y libsnappy-dev libthrift-dev libboost-all-dev
libgflags-dev libgoogle-glog-dev
+ - name: Cache Engine Archives
+ uses: ./.github/actions/cache-engine-archives
+ - name: Get gluten cache date
+ id: date
+ run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+ - name: Check gluten cache
+ id: gluten-cache
+ uses: actions/cache@v3
+ with:
+ path: gluten/package/target/
+ key: gluten_package_${{ steps.date.outputs.date }}
+ - name: Cache Gluten Package
+ uses: actions/cache@v3
+ with:
+ path: gluten/package/target/
+ key: gluten_package
+ - name: Setup JDK 8
+ uses: actions/setup-java@v3
+ with:
+ distribution: temurin
+ java-version: 8
+ cache: 'maven'
+ check-latest: false
+ - name: Setup Maven
+ uses: ./.github/actions/setup-maven
+ - name: Run Gluten Integration TPC-H/DS Test
+ run: |
+ TEST_MODULES="integration-tests/kyuubi-gluten-it"
+ ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -am clean install
-DskipTests -Pgluten-spark-${{ matrix.spark }}
+ ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} test -Pgluten-spark-${{
matrix.spark }} \
+ -Dmaven.plugin.scalatest.exclude.tags='' -Dtest=none
-Dmaven.plugin.scalatest.include.tags='org.apache.kyuubi.tags.GlutenTest'
+ - name: Upload test logs
+ if: failure()
+ uses: actions/upload-artifact@v3
+ with:
+ name: unit-tests-log-spark-${{ matrix.spark }}-gluten
+ path: |
+ **/target/unit-tests.log
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 6c12700d8..109b21c23 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -156,7 +156,7 @@ jobs:
uses: ./.github/actions/cache-engine-archives
- name: Build on Scala ${{ matrix.scala }}
run: |
-
TEST_MODULES="!externals/kyuubi-flink-sql-engine,!integration-tests/kyuubi-flink-it"
+
TEST_MODULES="!externals/kyuubi-flink-sql-engine,!integration-tests/kyuubi-flink-it,!integration-tests/kyuubi-gluten-it"
./build/mvn clean install ${MVN_OPT} -pl ${TEST_MODULES} -am \
-Pscala-${{ matrix.scala }} -Pjava-${{ matrix.java }} -Pspark-${{
matrix.spark }}
- name: Upload test logs
diff --git a/docs/deployment/spark/gluten.md b/docs/deployment/spark/gluten.md
new file mode 100644
index 000000000..8f6bcdef7
--- /dev/null
+++ b/docs/deployment/spark/gluten.md
@@ -0,0 +1,55 @@
+<!--
+- Licensed to the Apache Software Foundation (ASF) under one or more
+- contributor license agreements. See the NOTICE file distributed with
+- this work for additional information regarding copyright ownership.
+- The ASF licenses this file to You under the Apache License, Version 2.0
+- (the "License"); you may not use this file except in compliance with
+- the License. You may obtain a copy of the License at
+-
+- http://www.apache.org/licenses/LICENSE-2.0
+-
+- Unless required by applicable law or agreed to in writing, software
+- distributed under the License is distributed on an "AS IS" BASIS,
+- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- See the License for the specific language governing permissions and
+- limitations under the License.
+-->
+<!-- DO NOT MODIFY THIS FILE DIRECTLY, IT IS AUTO-GENERATED BY
[org.apache.kyuubi.engine.spark.udf.KyuubiDefinedFunctionSuite] -->
+
+# Gluten
+
+Gluten is a Spark plugin developed by Intel, designed to accelerate Apache
Spark with native libraries. Currently, only CentOS 7/8 and Ubuntu 20.04/22.04,
along with Spark 3.2/3.3/3.4, are supported. Users can employ the following
methods to utilize the Gluten with Velox native libraries.
+
+## Building(with velox Backend)
+
+### Build gluten velox backend package
+
+Git clone gluten project, use gluten build script `buildbundle-veloxbe.sh`,
and target package is in `/path/to/gluten/package/target/`
+
+```bash
+git clone https://github.com/oap-project/gluten.git
+cd /path/to/gluten
+
+## The script builds two jars for spark 3.2.x, 3.3.x, and 3.4.x.
+./dev/buildbundle-veloxbe.sh
+```
+
+## Usage
+
+You can use Gluten to accelerate Spark by following steps.
+
+### Installing
+
+add gluten jar: `copy
/path/to/gluten/package/target/gluten-velox-bundle-spark3.x_2.12-*.jar
$SPARK_HOME/jars/` or specified to `spark.jars` configuration
+
+### Configure
+
+add config into `spark-defaults.conf`:
+
+```properties
+spark.plugins=io.glutenproject.GlutenPlugin
+spark.memory.offHeap.size=20g
+spark.memory.offHeap.enabled=true
+spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager
+```
+
diff --git a/docs/deployment/spark/index.rst b/docs/deployment/spark/index.rst
index 0d75c5063..acaaa6ec5 100644
--- a/docs/deployment/spark/index.rst
+++ b/docs/deployment/spark/index.rst
@@ -30,3 +30,4 @@ Even if you don't use Kyuubi, as a simple Spark user, I'm
sure you'll find the n
dynamic_allocation
aqe
incremental_collection
+ gluten
diff --git a/integration-tests/kyuubi-gluten-it/pom.xml
b/integration-tests/kyuubi-gluten-it/pom.xml
new file mode 100644
index 000000000..5a2c2984c
--- /dev/null
+++ b/integration-tests/kyuubi-gluten-it/pom.xml
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.kyuubi</groupId>
+ <artifactId>integration-tests</artifactId>
+ <version>1.9.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+
+ <artifactId>kyuubi-gluten-it_${scala.binary.version}</artifactId>
+ <name>Kyuubi Test Gluten IT</name>
+ <url>https://kyuubi.apache.org/</url>
+
+ <properties>
+ <gluten.version>1.1.0-SNAPSHOT</gluten.version>
+ <spark.version>3.4.1</spark.version>
+ <spark.binary.version>3.4</spark.binary.version>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.kyuubi</groupId>
+ <artifactId>kyuubi-common_${scala.binary.version}</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.kyuubi</groupId>
+ <artifactId>kyuubi-common_${scala.binary.version}</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.kyuubi</groupId>
+
<artifactId>kyuubi-spark-connector-common_${scala.binary.version}</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.kyuubi</groupId>
+
<artifactId>kyuubi-spark-connector-tpcds_${scala.binary.version}</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.kyuubi</groupId>
+
<artifactId>kyuubi-spark-connector-tpch_${scala.binary.version}</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-hive_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+ </build>
+
+ <profiles>
+ <profile>
+ <id>gluten-spark-3.4</id>
+ <properties>
+
<maven.plugin.scalatest.include.tags>org.apache.kyuubi.tags.GlutenTest</maven.plugin.scalatest.include.tags>
+ <spark.version>3.4.1</spark.version>
+ <spark.binary.version>3.4</spark.binary.version>
+ </properties>
+ <dependencies>
+ <dependency>
+ <groupId>io.glutenproject</groupId>
+
<artifactId>gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04</artifactId>
+ <version>${gluten.version}</version>
+ <scope>system</scope>
+
<systemPath>${project.basedir}/../../gluten/package/target/gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04-${gluten.version}.jar</systemPath>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <id>gluten-spark-3.3</id>
+ <properties>
+
<maven.plugin.scalatest.include.tags>org.apache.kyuubi.tags.GlutenTest</maven.plugin.scalatest.include.tags>
+ <spark.version>3.3.1</spark.version>
+ <spark.binary.version>3.3</spark.binary.version>
+ </properties>
+ <dependencies>
+ <dependency>
+ <groupId>io.glutenproject</groupId>
+
<artifactId>gluten-velox-bundle-spark3.3_2.12-ubuntu_22.04</artifactId>
+ <version>${gluten.version}</version>
+ <scope>system</scope>
+
<systemPath>${project.basedir}/../../gluten/package/target/gluten-velox-bundle-spark3.3_2.12-ubuntu_22.04-${gluten.version}.jar</systemPath>
+ </dependency>
+ </dependencies>
+ </profile>
+ </profiles>
+</project>
diff --git
a/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpcds-tiny.sql
b/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpcds-tiny.sql
new file mode 100644
index 000000000..952a9cf3a
--- /dev/null
+++ b/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpcds-tiny.sql
@@ -0,0 +1,146 @@
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+CREATE DATABASE IF NOT EXISTS spark_catalog.tpcds_tiny;
+
+USE spark_catalog.tpcds_tiny;
+
+--
+-- Name: catalog_sales; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS catalog_sales USING parquet PARTITIONED BY
(cs_sold_date_sk)
+AS SELECT * FROM tpcds.tiny.catalog_sales;
+
+--
+-- Name: catalog_returns; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS catalog_returns USING parquet PARTITIONED BY
(cr_returned_date_sk)
+AS SELECT * FROM tpcds.tiny.catalog_returns;
+
+--
+-- Name: inventory; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS inventory USING parquet PARTITIONED BY (inv_date_sk)
+AS SELECT * FROM tpcds.tiny.inventory;
+
+--
+-- Name: store_sales; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS store_sales USING parquet PARTITIONED BY
(ss_sold_date_sk)
+AS SELECT * FROM tpcds.tiny.store_sales;
+
+--
+-- Name: store_returns; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS store_returns USING parquet PARTITIONED BY
(sr_returned_date_sk)
+AS SELECT * FROM tpcds.tiny.store_returns;
+
+--
+-- Name: web_sales; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_sales USING parquet PARTITIONED BY
(ws_sold_date_sk)
+AS SELECT * FROM tpcds.tiny.web_sales;
+
+--
+-- Name: web_returns; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_returns USING parquet PARTITIONED BY
(wr_returned_date_sk)
+AS SELECT * FROM tpcds.tiny.web_returns;
+
+--
+-- Name: call_center; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS call_center USING parquet AS SELECT * FROM
tpcds.tiny.call_center;
+
+--
+-- Name: catalog_page; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS catalog_page USING parquet AS SELECT * FROM
tpcds.tiny.catalog_page;
+
+--
+-- Name: customer; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer USING parquet AS SELECT * FROM
tpcds.tiny.customer;
+
+--
+-- Name: customer_address; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer_address USING parquet AS SELECT * FROM
tpcds.tiny.customer_address;
+
+--
+-- Name: customer_demographics; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer_demographics USING parquet AS SELECT *
FROM tpcds.tiny.customer_demographics;
+
+--
+-- Name: date_dim; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS date_dim USING parquet AS SELECT * FROM
tpcds.tiny.date_dim;
+
+--
+-- Name: household_demographics; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS household_demographics USING parquet AS SELECT *
FROM tpcds.tiny.household_demographics;
+
+--
+-- Name: income_band; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS income_band USING parquet AS SELECT * FROM
tpcds.tiny.income_band;
+
+--
+-- Name: item; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS item USING parquet AS SELECT * FROM tpcds.tiny.item;
+
+--
+-- Name: promotion; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS promotion USING parquet AS SELECT * FROM
tpcds.tiny.promotion;
+
+--
+-- Name: reason; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS reason USING parquet AS SELECT * FROM
tpcds.tiny.reason;
+
+--
+-- Name: ship_mode; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS ship_mode USING parquet AS SELECT * FROM
tpcds.tiny.ship_mode;
+
+--
+-- Name: store; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS store USING parquet AS SELECT * FROM
tpcds.tiny.store;
+
+--
+-- Name: time_dim; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS time_dim USING parquet AS SELECT * FROM
tpcds.tiny.time_dim;
+
+--
+-- Name: warehouse; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS warehouse USING parquet AS SELECT * FROM
tpcds.tiny.warehouse;
+
+--
+-- Name: web_page; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_page USING parquet AS SELECT * FROM
tpcds.tiny.web_page;
+
+--
+-- Name: web_site; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS web_site USING parquet AS SELECT * FROM
tpcds.tiny.web_site;
diff --git
a/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpch-tiny.sql
b/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpch-tiny.sql
new file mode 100644
index 000000000..8f2228f54
--- /dev/null
+++ b/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpch-tiny.sql
@@ -0,0 +1,59 @@
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements. See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License. You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+CREATE DATABASE IF NOT EXISTS spark_catalog.tpch_tiny;
+
+USE spark_catalog.tpch_tiny;
+
+--
+-- Name: customer; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS customer USING parquet AS SELECT * FROM
tpch.tiny.customer;
+
+--
+-- Name: orders; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS orders USING parquet AS SELECT * FROM
tpch.tiny.orders;
+
+--
+-- Name: lineitem; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS lineitem USING parquet AS SELECT * FROM
tpch.tiny.lineitem;
+
+--
+-- Name: part; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS part USING parquet AS SELECT * FROM tpch.tiny.part;
+
+--
+-- Name: partsupp; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS partsupp USING parquet AS SELECT * FROM
tpch.tiny.partsupp;
+
+--
+-- Name: supplier; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS supplier USING parquet AS SELECT * FROM
tpch.tiny.supplier;
+
+--
+-- Name: nation; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS nation USING parquet AS SELECT * FROM
tpch.tiny.nation;
+
+--
+-- Name: region; Type: TABLE; Tablespace:
+--
+CREATE TABLE IF NOT EXISTS region USING parquet AS SELECT * FROM
tpch.tiny.region;
diff --git
a/integration-tests/kyuubi-gluten-it/src/test/resources/log4j2-test.xml
b/integration-tests/kyuubi-gluten-it/src/test/resources/log4j2-test.xml
new file mode 100644
index 000000000..3110216c1
--- /dev/null
+++ b/integration-tests/kyuubi-gluten-it/src/test/resources/log4j2-test.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<!-- Extra logging related to initialization of Log4j.
+ Set to debug or trace if log4j initialization is failing. -->
+<Configuration status="WARN">
+ <Appenders>
+ <Console name="stdout" target="SYSTEM_OUT">
+ <PatternLayout pattern="%d{HH:mm:ss.SSS} %p %c: %m%n%ex"/>
+ <Filters>
+ <ThresholdFilter level="FATAL"/>
+ <RegexFilter regex=".*Thrift error occurred during processing
of message.*" onMatch="DENY" onMismatch="NEUTRAL"/>
+ </Filters>
+ </Console>
+ <File name="file" fileName="target/unit-tests.log">
+ <PatternLayout pattern="%d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex"/>
+ <Filters>
+ <RegexFilter regex=".*Thrift error occurred during processing
of message.*" onMatch="DENY" onMismatch="NEUTRAL"/>
+ </Filters>
+ </File>
+ </Appenders>
+ <Loggers>
+ <Root level="INFO">
+ <AppenderRef ref="stdout"/>
+ <AppenderRef ref="file"/>
+ </Root>
+ </Loggers>
+</Configuration>
diff --git
a/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/GlutenSuite.scala
b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/GlutenSuite.scala
new file mode 100644
index 000000000..67e9a92b6
--- /dev/null
+++
b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/GlutenSuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.it.gluten
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+
+import org.apache.kyuubi.{GlutenSuiteMixin, KyuubiFunSuite}
+import
org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession
+import org.apache.kyuubi.tags.GlutenTest
+
+@GlutenTest
+class GlutenSuite extends KyuubiFunSuite with GlutenSuiteMixin {
+
+ lazy val sparkConf: SparkConf = {
+ val glutenConf = new SparkConf().setMaster("local[*]")
+ .set("spark.ui.enabled", "false")
+ extraConfigs.foreach { case (k, v) => glutenConf.set(k, v) }
+ glutenConf
+ }
+
+ test("KYUUBI #5467:test gluten select") {
+ withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) {
spark =>
+ val result = spark.sql("SELECT 1").head()
+ assert(result.get(0) == 1)
+ }
+ }
+
+ test("KYUUBI #5467: test gluten plan") {
+ withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) {
spark =>
+ val plan = spark.sql("explain SELECT 1").head().getString(0)
+ assert(plan.contains("VeloxColumnarToRowExec") && plan.contains(
+ "VeloxColumnarToRowExec") && plan.contains("RowToVeloxColumnar"))
+ }
+ }
+}
diff --git
a/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/TPCUtils.scala
b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/TPCUtils.scala
new file mode 100644
index 000000000..667a23780
--- /dev/null
+++
b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/TPCUtils.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.it.gluten
+
+import scala.io.{Codec, Source}
+
+import org.apache.kyuubi.Utils
+
+object TPCUtils {
+ def loadTPCFile(resourceFile: String): String = {
+ val in = Utils.getContextOrKyuubiClassLoader
+ .getResourceAsStream(resourceFile)
+ val str: String = Source.fromInputStream(in)(Codec.UTF8).mkString
+ in.close()
+ str
+ }
+}
diff --git
a/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpcds/GlutenTPCDSQuerySuite.scala
b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpcds/GlutenTPCDSQuerySuite.scala
new file mode 100644
index 000000000..9110974a3
--- /dev/null
+++
b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpcds/GlutenTPCDSQuerySuite.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.it.gluten.tpcds
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.scalatest.tags.Slow
+
+import org.apache.kyuubi.{GlutenSuiteMixin, KyuubiFunSuite}
+import org.apache.kyuubi.it.gluten.TPCUtils.loadTPCFile
+import org.apache.kyuubi.spark.connector.common.GoldenFileUtils.LICENSE_HEADER
+import
org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession
+import org.apache.kyuubi.spark.connector.tpcds.TPCDSCatalog
+import org.apache.kyuubi.tags.GlutenTest
+
+@Slow
+@GlutenTest
+class GlutenTPCDSQuerySuite extends KyuubiFunSuite with GlutenSuiteMixin {
+
+ val queries: Set[String] = (1 to 99).map(i => s"q$i").toSet -
+ ("q14", "q23", "q24", "q39") +
+ ("q14a", "q14b", "q23a", "q23b", "q24a", "q24b", "q39a", "q39b") -
+ // TODO:Fix gluten tpc-ds query test
+ ("q1", "q4", "q7", "q11", "q12", "q17", "q20", "q21", "q25", "q26", "q29",
"q30", "q34", "q37",
+ "q39a", "q39b", "q40", "q43", "q46", "q49", "q56", "q58", "q59", "q60",
"q68", "q73", "q74",
+ "q78", "q79", "q81", "q82", "q83", "q84", "q91", "q98")
+ lazy val sparkConf: SparkConf = {
+ val glutenConf = new SparkConf().setMaster("local[*]")
+ .set("spark.ui.enabled", "false")
+ .set("spark.sql.catalogImplementation", "in-memory")
+ .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
+ .set("spark.sql.catalog.tpcds.useTableSchema_2_6", "true")
+ extraConfigs.foreach { case (k, v) => glutenConf.set(k, v) }
+ glutenConf
+ }
+
+ test("KYUUBI #5467:gluten tpc-ds tiny query suite") {
+ val viewSuffix = "view"
+ withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) {
spark =>
+ loadTPDSTINY(spark)
+ queries.map { queryName =>
+ queryName -> loadTPCFile(s"kyuubi/tpcds_3.2/$queryName.sql")
+ }.foreach { case (name, sql) =>
+ try {
+ val result = spark.sql(sql).collect()
+ val schema = spark.sql(sql).schema
+ val schemaDDL = LICENSE_HEADER + schema.toDDL + "\n"
+ spark.createDataFrame(result.toList.asJava,
schema).createTempView(s"$name$viewSuffix")
+ val sumHashResult = LICENSE_HEADER + spark.sql(
+ s"select sum(hash(*)) from
$name$viewSuffix").collect().head.get(0) + "\n"
+ val expectHash = loadTPCFile(s"kyuubi/tpcds_3.2/$name.output.hash")
+ val expectSchema =
loadTPCFile(s"kyuubi/tpcds_3.2/$name.output.schema")
+ assert(schemaDDL == expectSchema)
+ assert(sumHashResult == expectHash)
+ } catch {
+ case cause: Throwable =>
+ fail(name, cause)
+ }
+ }
+ }
+ }
+
+ def loadTPDSTINY(sc: SparkSession): Unit = {
+ val queryContent: String = loadTPCFile("load-tpcds-tiny.sql")
+ queryContent.split(";\n").filterNot(_.trim.isEmpty).foreach { sql =>
+ sc.sql(sql)
+ }
+ }
+}
diff --git
a/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpch/GlutenTPCHQuerySuite.scala
b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpch/GlutenTPCHQuerySuite.scala
new file mode 100644
index 000000000..98b4e9448
--- /dev/null
+++
b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpch/GlutenTPCHQuerySuite.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.it.gluten.tpch
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.scalatest.tags.Slow
+
+import org.apache.kyuubi.{GlutenSuiteMixin, KyuubiFunSuite}
+import org.apache.kyuubi.it.gluten.TPCUtils.loadTPCFile
+import org.apache.kyuubi.spark.connector.common.GoldenFileUtils.LICENSE_HEADER
+import
org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession
+import org.apache.kyuubi.spark.connector.tpch.TPCHCatalog
+import org.apache.kyuubi.tags.GlutenTest
+
+@Slow
+@GlutenTest
+class GlutenTPCHQuerySuite extends KyuubiFunSuite with GlutenSuiteMixin {
+ // TODO: Fix the inconsistency in q9 results.
+ val queries: Set[String] = (1 to 22).map(i => s"q$i").toSet - "q9"
+
+ lazy val sparkConf: SparkConf = {
+ val glutenConf = new SparkConf().setMaster("local[*]")
+ .set("spark.ui.enabled", "false")
+ .set("spark.sql.catalogImplementation", "in-memory")
+ .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName)
+ extraConfigs.foreach { case (k, v) => glutenConf.set(k, v) }
+ glutenConf
+ }
+
+ test("KYUUBI #5467:gluten tpc-h tiny query suite") {
+ val viewSuffix = "view"
+ withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) {
spark =>
+ loadTPCHTINY(spark)
+ queries.map { queryName =>
+ queryName -> loadTPCFile(s"kyuubi/tpch/$queryName.sql")
+ }.foreach { case (name, sql) =>
+ val result = spark.sql(sql).collect()
+ val schema = spark.sql(sql).schema
+ val schemaDDL = LICENSE_HEADER + schema.toDDL + "\n"
+ spark.createDataFrame(result.toList.asJava,
schema).createTempView(s"$name$viewSuffix")
+ val sumHashResult = LICENSE_HEADER + spark.sql(
+ s"select sum(hash(*)) from $name$viewSuffix").collect().head.get(0)
+ "\n"
+ val expectHash = loadTPCFile(s"kyuubi/tpch/$name.output.hash")
+ val expectSchema = loadTPCFile(s"kyuubi/tpch/$name.output.schema")
+ assert(schemaDDL == expectSchema, s"query $name schema not match")
+ assert(sumHashResult == expectHash, s"query $name result not match")
+ }
+ }
+ }
+
+ def loadTPCHTINY(sc: SparkSession): Unit = {
+ val queryContent: String = loadTPCFile("load-tpch-tiny.sql")
+ queryContent.split(";\n").filterNot(_.trim.isEmpty).foreach { sql =>
+ sc.sql(sql)
+ }
+ }
+}
diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml
index 35d0b4f9e..d28f391b4 100644
--- a/integration-tests/pom.xml
+++ b/integration-tests/pom.xml
@@ -31,6 +31,7 @@
<modules>
<module>kyuubi-flink-it</module>
+ <module>kyuubi-gluten-it</module>
<module>kyuubi-hive-it</module>
<module>kyuubi-trino-it</module>
<module>kyuubi-jdbc-it</module>
diff --git
a/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala
b/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala
new file mode 100644
index 000000000..6095e1630
--- /dev/null
+++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi
+
+trait GlutenSuiteMixin {
+ protected def extraJars: String = {
+ System.getProperty("java.class.path")
+ .split(":")
+ .filter(_.contains("gluten-velox-bundle-spark")).head
+ }
+
+ protected def extraConfigs: Map[String, String] = Map(
+ "spark.plugins" -> "io.glutenproject.GlutenPlugin",
+ "spark.memory.offHeap.size" -> "4g",
+ "spark.memory.offHeap.enabled" -> "true",
+ "spark.shuffle.manager" ->
"org.apache.spark.shuffle.sort.ColumnarShuffleManager",
+ "spark.jars" -> extraJars)
+}
diff --git
a/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java
b/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java
new file mode 100644
index 000000000..8620df4b9
--- /dev/null
+++ b/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.tags;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+import org.scalatest.TagAnnotation;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface GlutenTest {}