This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 098e660 [KYUUBI #703] support hudi-0.9.0
098e660 is described below
commit 098e660a3d7603bab25f50e0d8219b26fda43fa9
Author: simon <[email protected]>
AuthorDate: Sat Sep 4 10:48:18 2021 +0800
[KYUUBI #703] support hudi-0.9.0
### _Why are the changes needed?_
Hudi has add Spark Sql Support in version 0.9.0.
This PR add Hudi-0.9.0 dependency and Unit Testing #703
### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including
negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [x] [Run
test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests)
locally before make a pull request
Closes #994 from simon824/master.
Closes #703
a04b2e42 [simon] exclude jdk.tools dep
50e1ab01 [simon] fix jdk11 dep
f33ebdf1 [simon] fix hudi dep conflicts
ff2585c2 [simon] fix yarn dep conflicts
32dd1ea8 [simon] fix scala version conflicts
b8a37401 [simon] add spark.sql.catalogImplementation
82f5422f [simon] fix spark3 dependency
2a6c497d [simon] fix spark3 dependency
4dbec8be [Simon] Merge branch 'apache:master' into master
3f180157 [simon] fix maven
73e48d0a [simon] add spark3 support maven dependency
3def658b [simon] fix missing spark-sql-engine hudi maven dependency
524132d3 [simon] bugfix
e98998a6 [simon] fix pom error
59fc6669 [Simon] Merge branch 'apache:master' into master
d9e17ebf [simon] fix conflict
2466ece3 [simon] add sparkSQL test on hudi-0.9.0
Lead-authored-by: simon <[email protected]>
Co-authored-by: Simon <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
---
externals/kyuubi-spark-sql-engine/pom.xml | 36 ++++++++
.../spark/operation/SparkHudiOperationSuite.scala | 36 ++++++++
.../test/java/org/apache/kyuubi/tags/HudiTest.java | 30 ++++++
.../scala/org/apache/kyuubi/HudiSuiteMixin.scala | 46 ++++++++++
.../kyuubi/operation/BasicHudiJDBCTests.scala | 102 +++++++++++++++++++++
kyuubi-server/pom.xml | 36 ++++++++
.../operation/datalake/HudiOperationSuite.scala | 34 +++++++
pom.xml | 76 ++++++++++++++-
8 files changed, 394 insertions(+), 2 deletions(-)
diff --git a/externals/kyuubi-spark-sql-engine/pom.xml
b/externals/kyuubi-spark-sql-engine/pom.xml
index 1c40e34..cc1cdc9 100644
--- a/externals/kyuubi-spark-sql-engine/pom.xml
+++ b/externals/kyuubi-spark-sql-engine/pom.xml
@@ -140,6 +140,42 @@
</dependency>
<dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-avro_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-spark3_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-avro</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-spark_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
<groupId>io.delta</groupId>
<artifactId>delta-core_${scala.binary.version}</artifactId>
<scope>test</scope>
diff --git
a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkHudiOperationSuite.scala
b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkHudiOperationSuite.scala
new file mode 100644
index 0000000..f2f6017
--- /dev/null
+++
b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkHudiOperationSuite.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.engine.spark.operation
+
+import org.apache.kyuubi.engine.spark.WithSparkSQLEngine
+import org.apache.kyuubi.operation.BasicHudiJDBCTests
+import org.apache.kyuubi.tags.HudiTest
+
+@HudiTest
+class SparkHudiOperationSuite extends WithSparkSQLEngine with
BasicHudiJDBCTests {
+ override protected def jdbcUrl: String = getJdbcUrl
+
+ override def withKyuubiConf: Map[String, String] = extraConfigs
+
+ override def afterAll(): Unit = {
+ super.afterAll()
+ for ((k, _) <- extraConfigs) {
+ System.clearProperty(k)
+ }
+ }
+}
diff --git a/kyuubi-common/src/test/java/org/apache/kyuubi/tags/HudiTest.java
b/kyuubi-common/src/test/java/org/apache/kyuubi/tags/HudiTest.java
new file mode 100644
index 0000000..d52fd94
--- /dev/null
+++ b/kyuubi-common/src/test/java/org/apache/kyuubi/tags/HudiTest.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.tags;
+
+import org.scalatest.TagAnnotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface HudiTest {}
diff --git
a/kyuubi-common/src/test/scala/org/apache/kyuubi/HudiSuiteMixin.scala
b/kyuubi-common/src/test/scala/org/apache/kyuubi/HudiSuiteMixin.scala
new file mode 100644
index 0000000..f4c38bf
--- /dev/null
+++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/HudiSuiteMixin.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi
+
+import java.nio.file.Path
+
+trait HudiSuiteMixin extends DataLakeSuiteMixin {
+
+ override protected def format: String = "hudi"
+
+ override protected def catalog: String = "spark_catalog"
+
+ override protected def warehouse: Path = Utils.createTempDir()
+
+ override protected def extraJars: String = {
+ var extraJars = ""
+ System.getProperty("java.class.path")
+ .split(":")
+ .filter(_.contains("jar"))
+ .foreach(i => extraJars += i + ",")
+
+ extraJars.substring(0, extraJars.length - 1)
+ }
+
+ override protected def extraConfigs = Map(
+ "spark.sql.catalogImplementation" -> "in-memory",
+ "spark.sql.defaultCatalog" -> catalog,
+ "spark.sql.extensions" ->
"org.apache.spark.sql.hudi.HoodieSparkSessionExtension",
+ "spark.serializer" -> "org.apache.spark.serializer.KryoSerializer",
+ "spark.jars" -> extraJars)
+}
diff --git
a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/BasicHudiJDBCTests.scala
b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/BasicHudiJDBCTests.scala
new file mode 100644
index 0000000..961d76a
--- /dev/null
+++
b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/BasicHudiJDBCTests.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.operation
+
+import org.apache.kyuubi.HudiSuiteMixin
+import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._
+
+
+trait BasicHudiJDBCTests extends JDBCTestUtils with HudiSuiteMixin {
+
+ test("get catalogs") {
+ withJdbcStatement() { statement =>
+ val metaData = statement.getConnection.getMetaData
+ val catalogs = metaData.getCatalogs
+ catalogs.next()
+ assert(catalogs.getString(TABLE_CAT) === "spark_catalog")
+ assert(!catalogs.next())
+ }
+ }
+
+ test("get schemas") {
+ val dbs = Seq("db1", "db2", "db33", "db44")
+ val dbDflts = Seq("default", "global_temp")
+
+ val catalog = "spark_catalog"
+ withDatabases(dbs: _*) { statement =>
+ dbs.foreach(db => statement.execute(s"CREATE DATABASE IF NOT EXISTS
$db"))
+ val metaData = statement.getConnection.getMetaData
+
+ Seq("", "*", "%", null, ".*", "_*", "_%", ".%") foreach { pattern =>
+ checkGetSchemas(metaData.getSchemas(catalog, pattern), dbs ++ dbDflts,
catalog)
+ }
+
+ Seq("db%", "db.*") foreach { pattern =>
+ checkGetSchemas(metaData.getSchemas(catalog, pattern), dbs, catalog)
+ }
+
+ Seq("db_", "db.") foreach { pattern =>
+ checkGetSchemas(metaData.getSchemas(catalog, pattern), dbs.take(2),
catalog)
+ }
+
+ checkGetSchemas(metaData.getSchemas(catalog, "db1"), Seq("db1"), catalog)
+ checkGetSchemas(metaData.getSchemas(catalog, "db_not_exist"), Seq.empty,
catalog)
+ }
+ }
+
+ test("get tables") {
+ val table = "table_1_test"
+ val schema = "default"
+ val tableType = "TABLE"
+
+ withJdbcStatement(table) { statement =>
+ statement.execute(
+ s"""
+ | create table $table (
+ | id int,
+ | name string,
+ | price double,
+ | ts long
+ | ) using $format
+ | options (
+ | primaryKey = 'id',
+ | preCombineField = 'ts'
+ | )
+ """.stripMargin)
+
+ val metaData = statement.getConnection.getMetaData
+ val rs1 = metaData.getTables(null, null, null, null)
+
+ assert(rs1.next())
+ val catalogName = rs1.getString(TABLE_CAT)
+ assert(catalogName === "spark_catalog" || catalogName === null)
+ assert(rs1.getString(TABLE_SCHEM) === schema)
+ assert(rs1.getString(TABLE_NAME) == table)
+ assert(rs1.getString(TABLE_TYPE) == tableType)
+ assert(!rs1.next())
+
+ val rs2 = metaData.getTables(null, null, "table%", Array("TABLE"))
+ assert(rs2.next())
+ assert(rs2.getString(TABLE_NAME) == table)
+ assert(!rs2.next())
+
+ val rs3 = metaData.getTables(null, "default", "*", Array("VIEW"))
+ assert(!rs3.next())
+ }
+ }
+}
diff --git a/kyuubi-server/pom.xml b/kyuubi-server/pom.xml
index 2bb821a..db3e46f 100644
--- a/kyuubi-server/pom.xml
+++ b/kyuubi-server/pom.xml
@@ -149,6 +149,42 @@
</dependency>
<dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-avro_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-spark3_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-avro</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-spark_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
<groupId>io.delta</groupId>
<artifactId>delta-core_${scala.binary.version}</artifactId>
<scope>test</scope>
diff --git
a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/datalake/HudiOperationSuite.scala
b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/datalake/HudiOperationSuite.scala
new file mode 100644
index 0000000..ed90f26
--- /dev/null
+++
b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/datalake/HudiOperationSuite.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.operation.datalake
+
+import org.apache.kyuubi.WithKyuubiServer
+import org.apache.kyuubi.config.KyuubiConf
+import org.apache.kyuubi.operation.BasicHudiJDBCTests
+import org.apache.kyuubi.tags.HudiTest
+
+@HudiTest
+class HudiOperationSuite extends WithKyuubiServer with BasicHudiJDBCTests {
+ override protected val conf: KyuubiConf = {
+ val kyuubiConf = KyuubiConf().set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 20000L)
+ extraConfigs.foreach { case (k, v) => kyuubiConf.set(k, v) }
+ kyuubiConf
+ }
+
+ override def jdbcUrl: String = getJdbcUrl
+}
diff --git a/pom.xml b/pom.xml
index 1ca49d9..59ccf50 100644
--- a/pom.xml
+++ b/pom.xml
@@ -99,7 +99,9 @@
<guava.version>30.1-jre</guava.version>
<hadoop.version>3.2.2</hadoop.version>
<hadoop.binary.version>3.2</hadoop.binary.version>
+ <hbase.version>1.2.3</hbase.version>
<hive.version>2.3.7</hive.version>
+ <hudi.version>0.9.0</hudi.version>
<iceberg.name>iceberg-spark3-runtime</iceberg.name>
<iceberg.version>0.12.0</iceberg.version>
<jackson.version>2.11.4</jackson.version>
@@ -110,6 +112,7 @@
<jetty.version>9.4.41.v20210516</jetty.version>
<kubernetes-client.version>5.5.0</kubernetes-client.version>
<ldapsdk.version>5.1.4</ldapsdk.version>
+ <parquet.version>1.10.1</parquet.version>
<prometheus.version>0.10.0</prometheus.version>
<scalatest.version>3.2.9</scalatest.version>
<scopt.version>4.0.1</scopt.version>
@@ -1000,6 +1003,75 @@
<version>${iceberg.version}</version>
</dependency>
+ <!-- Hudi dependency -->
+ <dependency>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-avro</artifactId>
+ <version>${parquet.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <version>${hbase.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-all</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>jdk.tools</groupId>
+ <artifactId>jdk.tools</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-avro_${scala.binary.version}</artifactId>
+ <version>${spark.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-spark3_${scala.binary.version}</artifactId>
+ <version>${hudi.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-spark-common_2.11</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+
<artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
+ <version>${hudi.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-common</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-spark_${scala.binary.version}</artifactId>
+ <version>${hudi.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-spark2_2.11</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hudi</groupId>
+ <artifactId>hudi-spark-common_2.11</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+
<dependency>
<groupId>io.delta</groupId>
<artifactId>delta-core_${scala.binary.version}</artifactId>
@@ -1589,7 +1661,7 @@
<properties>
<spark.version>3.1.2</spark.version>
<delta.version>1.0.0</delta.version>
-
<maven.plugin.scalatest.exclude.tags>org.apache.kyuubi.tags.ExtendedSQLTest</maven.plugin.scalatest.exclude.tags>
+
<maven.plugin.scalatest.exclude.tags>org.apache.kyuubi.tags.ExtendedSQLTest,org.apache.kyuubi.tags.HudiTest</maven.plugin.scalatest.exclude.tags>
</properties>
</profile>
@@ -1597,7 +1669,7 @@
<id>spark-master</id>
<properties>
<spark.version>3.2.0-SNAPSHOT</spark.version>
-
<maven.plugin.scalatest.exclude.tags>org.apache.kyuubi.tags.ExtendedSQLTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest</maven.plugin.scalatest.exclude.tags>
+
<maven.plugin.scalatest.exclude.tags>org.apache.kyuubi.tags.ExtendedSQLTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.HudiTest</maven.plugin.scalatest.exclude.tags>
</properties>
</profile>