This is an automated email from the ASF dual-hosted git repository.
fanng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new c8c66a44e [#3919] feat(catalog-lakehouse-paimon): Support hive backend
for Paimon Catalog (#5092)
c8c66a44e is described below
commit c8c66a44e31411bfbb343c26d7c7da36b43e4939
Author: cai can <[email protected]>
AuthorDate: Mon Oct 21 14:53:40 2024 +0800
[#3919] feat(catalog-lakehouse-paimon): Support hive backend for Paimon
Catalog (#5092)
### What changes were proposed in this pull request?
Support hive backend for Paimon Catalog
### Why are the changes needed?
Fix: https://github.com/apache/gravitino/issues/3919
### Does this PR introduce _any_ user-facing change?
will add doc in a later pr.
### How was this patch tested?
new UT and IT.
---------
Co-authored-by: caican <[email protected]>
---
catalogs/catalog-lakehouse-paimon/build.gradle.kts | 34 ++++++++-
.../lakehouse/paimon/PaimonCatalogBackend.java | 3 +-
.../integration/test/CatalogPaimonBaseIT.java | 13 +++-
.../integration/test/CatalogPaimonHiveIT.java | 83 ++++++++++++++++++++++
.../lakehouse/paimon/utils/TestCatalogUtils.java | 23 +++++-
docs/lakehouse-paimon-catalog.md | 8 ++-
6 files changed, 154 insertions(+), 10 deletions(-)
diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts
b/catalogs/catalog-lakehouse-paimon/build.gradle.kts
index 16a3382cf..6839cc163 100644
--- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts
+++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts
@@ -46,10 +46,9 @@ dependencies {
exclude("com.sun.jersey")
exclude("javax.servlet")
exclude("org.apache.curator")
- exclude("org.apache.hive")
exclude("org.apache.hbase")
exclude("org.apache.zookeeper")
- exclude("org.eclipse.jetty.aggregate:jetty-all")
+ exclude("org.eclipse.jetty.aggregate")
exclude("org.mortbay.jetty")
exclude("org.mortbay.jetty:jetty")
exclude("org.mortbay.jetty:jetty-util")
@@ -67,9 +66,40 @@ dependencies {
exclude("org.apache.parquet:parquet-encoding")
exclude("org.apache.parquet:parquet-common")
exclude("org.apache.parquet:parquet-hadoop")
+ exclude("org.apache.parquet:parquet-hadoop-bundle")
exclude("org.apache.paimon:paimon-codegen-loader")
exclude("org.apache.paimon:paimon-shade-caffeine-2")
exclude("org.apache.paimon:paimon-shade-guava-30")
+ exclude("org.apache.hive:hive-service-rpc")
+ exclude("org.apache.logging.log4j")
+ exclude("com.google.guava")
+ exclude("commons-lang")
+ exclude("org.slf4j")
+ exclude("org.apache.orc")
+ exclude("org.apache.httpcomponents")
+ exclude("jline")
+ exclude("org.eclipse.jetty.orbit")
+ exclude("org.apache.ant")
+ exclude("com.tdunning")
+ exclude("io.dropwizard.metrics")
+ exclude("com.github.joshelser")
+ exclude("commons-codec")
+ exclude("commons-cli")
+ exclude("tomcat")
+ exclude("org.apache.avro")
+ exclude("net.sf.opencsv")
+ exclude("javolution")
+ exclude("com.jolbox")
+ exclude("com.zaxxer")
+ exclude("org.apache.derby")
+ exclude("org.datanucleus")
+ exclude("commons-pool")
+ exclude("commons-dbcp")
+ exclude("javax.jdo")
+ exclude("org.antlr")
+ exclude("co.cask.tephra")
+ exclude("com.google.code.findbugs")
+ exclude("com.github.spotbugs")
}
implementation(libs.bundles.log4j)
implementation(libs.commons.lang3)
diff --git
a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogBackend.java
b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogBackend.java
index 355a79f58..7371c5be3 100644
---
a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogBackend.java
+++
b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogBackend.java
@@ -21,5 +21,6 @@ package org.apache.gravitino.catalog.lakehouse.paimon;
/** The type of Apache Paimon catalog backend. */
public enum PaimonCatalogBackend {
FILESYSTEM,
- JDBC
+ JDBC,
+ HIVE
}
diff --git
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java
index ed90745a7..19bbde331 100644
---
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java
+++
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java
@@ -99,6 +99,7 @@ public abstract class CatalogPaimonBaseIT extends BaseIT {
protected String jdbcPassword;
protected Catalog catalog;
protected org.apache.paimon.catalog.Catalog paimonCatalog;
+ protected SparkSession spark;
protected String metalakeName =
GravitinoITUtils.genRandomName("paimon_it_metalake");
protected String catalogName =
GravitinoITUtils.genRandomName("paimon_it_catalog");
protected String schemaName =
GravitinoITUtils.genRandomName("paimon_it_schema");
@@ -115,8 +116,8 @@ public abstract class CatalogPaimonBaseIT extends BaseIT {
private static final String alertTableName = "alert_table_name";
private static String INSERT_BATCH_WITHOUT_PARTITION_TEMPLATE = "INSERT INTO
paimon.%s VALUES %s";
private static final String SELECT_ALL_TEMPLATE = "SELECT * FROM paimon.%s";
+ private static final String DEFAULT_DB = "default";
private GravitinoMetalake metalake;
- protected SparkSession spark;
private Map<String, String> catalogProperties;
@BeforeAll
@@ -727,7 +728,7 @@ public abstract class CatalogPaimonBaseIT extends BaseIT {
// update column position
Column col1 = Column.of("name", Types.StringType.get(), "comment");
Column col2 = Column.of("address", Types.StringType.get(), "comment");
- Column col3 = Column.of("date_of_birth", Types.DateType.get(), "comment");
+ Column col3 = Column.of("date_of_birth", Types.StringType.get(),
"comment");
Column[] newColumns = new Column[] {col1, col2, col3};
NameIdentifier tableIdentifier =
@@ -874,7 +875,13 @@ public abstract class CatalogPaimonBaseIT extends BaseIT {
private void clearTableAndSchema() {
SupportsSchemas supportsSchema = catalog.asSchemas();
Arrays.stream(supportsSchema.listSchemas())
- .forEach(schema -> supportsSchema.dropSchema(schema, true));
+ .forEach(
+ schema -> {
+ // can not drop default database for hive backend.
+ if (!DEFAULT_DB.equalsIgnoreCase(schema)) {
+ supportsSchema.dropSchema(schema, true);
+ }
+ });
}
private void createMetalake() {
diff --git
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonHiveIT.java
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonHiveIT.java
new file mode 100644
index 000000000..fcb220a88
--- /dev/null
+++
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonHiveIT.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.catalog.lakehouse.paimon.integration.test;
+
+import com.google.common.collect.Maps;
+import java.util.Map;
+import org.apache.gravitino.NameIdentifier;
+import org.apache.gravitino.Schema;
+import org.apache.gravitino.SupportsSchemas;
+import
org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogPropertiesMetadata;
+import org.apache.gravitino.integration.test.container.HiveContainer;
+import org.apache.gravitino.integration.test.util.GravitinoITUtils;
+import org.apache.paimon.catalog.Catalog;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+
+@Tag("gravitino-docker-test")
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+public class CatalogPaimonHiveIT extends CatalogPaimonBaseIT {
+
+ @Override
+ protected Map<String, String> initPaimonCatalogProperties() {
+ Map<String, String> catalogProperties = Maps.newHashMap();
+ catalogProperties.put("key1", "val1");
+ catalogProperties.put("key2", "val2");
+
+ TYPE = "hive";
+ WAREHOUSE =
+ String.format(
+ "hdfs://%s:%d/user/hive/warehouse-catalog-paimon/",
+ containerSuite.getHiveContainer().getContainerIpAddress(),
+ HiveContainer.HDFS_DEFAULTFS_PORT);
+ URI =
+ String.format(
+ "thrift://%s:%d",
+ containerSuite.getHiveContainer().getContainerIpAddress(),
+ HiveContainer.HIVE_METASTORE_PORT);
+
+
catalogProperties.put(PaimonCatalogPropertiesMetadata.GRAVITINO_CATALOG_BACKEND,
TYPE);
+ catalogProperties.put(PaimonCatalogPropertiesMetadata.WAREHOUSE,
WAREHOUSE);
+ catalogProperties.put(PaimonCatalogPropertiesMetadata.URI, URI);
+
+ return catalogProperties;
+ }
+
+ @Test
+ void testPaimonSchemaProperties() throws Catalog.DatabaseNotExistException {
+ SupportsSchemas schemas = catalog.asSchemas();
+
+ // create schema check.
+ String testSchemaName = GravitinoITUtils.genRandomName("test_schema_1");
+ NameIdentifier schemaIdent = NameIdentifier.of(metalakeName, catalogName,
testSchemaName);
+ Map<String, String> schemaProperties = Maps.newHashMap();
+ schemaProperties.put("key", "hive");
+ Schema createdSchema =
+ schemas.createSchema(schemaIdent.name(), schema_comment,
schemaProperties);
+ Assertions.assertEquals(createdSchema.properties().get("key"), "hive");
+
+ // load schema check.
+ Schema schema = schemas.loadSchema(schemaIdent.name());
+ Assertions.assertEquals(schema.properties().get("key"), "hive");
+ Map<String, String> loadedProps =
paimonCatalog.loadDatabaseProperties(schemaIdent.name());
+ Assertions.assertEquals(loadedProps.get("key"), "hive");
+ }
+}
diff --git
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestCatalogUtils.java
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestCatalogUtils.java
index e8fe66551..d1b50d520 100644
---
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestCatalogUtils.java
+++
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestCatalogUtils.java
@@ -29,9 +29,12 @@ import java.util.Locale;
import java.util.function.Consumer;
import org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogBackend;
import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConfig;
+import org.apache.gravitino.integration.test.container.ContainerSuite;
+import org.apache.gravitino.integration.test.container.HiveContainer;
import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.catalog.FileSystemCatalog;
import org.apache.paimon.factories.FactoryException;
+import org.apache.paimon.hive.HiveCatalog;
import org.apache.paimon.jdbc.JdbcCatalog;
import org.junit.jupiter.api.Test;
@@ -44,6 +47,8 @@ public class TestCatalogUtils {
assertCatalog(PaimonCatalogBackend.FILESYSTEM.name(),
FileSystemCatalog.class);
// Test load JdbcCatalog for jdbc metastore.
assertCatalog(PaimonCatalogBackend.JDBC.name(), JdbcCatalog.class);
+ // Test load HiveCatalog for hive metastore.
+ assertCatalog(PaimonCatalogBackend.HIVE.name(), HiveCatalog.class);
// Test load catalog exception for other metastore.
assertThrowsExactly(FactoryException.class, () -> assertCatalog("other",
catalog -> {}));
}
@@ -66,7 +71,7 @@ public class TestCatalogUtils {
System.getProperty("java.io.tmpdir"),
"paimon_catalog_warehouse"),
PaimonConfig.CATALOG_URI.getKey(),
- "jdbc:h2:mem:testdb",
+ generateUri(metastore),
PaimonConfig.CATALOG_JDBC_USER.getKey(),
"user",
PaimonConfig.CATALOG_JDBC_PASSWORD.getKey(),
@@ -75,4 +80,20 @@ public class TestCatalogUtils {
consumer.accept(catalog);
}
}
+
+ private static String generateUri(String metastore) {
+ String uri = "uri";
+ if (PaimonCatalogBackend.JDBC.name().equalsIgnoreCase(metastore)) {
+ uri = "jdbc:h2:mem:testdb";
+ } else if (PaimonCatalogBackend.HIVE.name().equalsIgnoreCase(metastore)) {
+ ContainerSuite containerSuite = ContainerSuite.getInstance();
+ containerSuite.startHiveContainer();
+ uri =
+ String.format(
+ "thrift://%s:%d",
+ containerSuite.getHiveContainer().getContainerIpAddress(),
+ HiveContainer.HIVE_METASTORE_PORT);
+ }
+ return uri;
+ }
}
diff --git a/docs/lakehouse-paimon-catalog.md b/docs/lakehouse-paimon-catalog.md
index 4c336f3d3..14595f06c 100644
--- a/docs/lakehouse-paimon-catalog.md
+++ b/docs/lakehouse-paimon-catalog.md
@@ -22,17 +22,16 @@ Builds with Apache Paimon `0.8.0`.
### Catalog capabilities
-- Works as a catalog proxy, supporting `FilesystemCatalog` and `JdbcCatalog`.
+- Works as a catalog proxy, supporting `FilesystemCatalog`, `JdbcCatalog` and
`HiveCatalog`.
- Supports DDL operations for Paimon schemas and tables.
-- Doesn't support `HiveCatalog` catalog backend now.
- Doesn't support alterSchema.
### Catalog properties
| Property name | Description
| Default value | Required
| Since Version |
|----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|-----------------------------------------------------------------|-------------------|
-| `catalog-backend` | Catalog backend of
Gravitino Paimon catalog. Supports `filesystem` and `jdbc` now.
| (none) | Yes
| 0.6.0-incubating |
+| `catalog-backend` | Catalog backend of
Gravitino Paimon catalog. Supports `filesystem`, `jdbc` and `hive`.
| (none) | Yes
| 0.6.0-incubating |
| `uri` | The URI configuration
of the Paimon catalog. `thrift://127.0.0.1:9083` or
`jdbc:postgresql://127.0.0.1:5432/db_name` or
`jdbc:mysql://127.0.0.1:3306/metastore_db`. It is optional for
`FilesystemCatalog`. | (none) | required if the value of
`catalog-backend` is not `filesystem`. | 0.6.0-incubating |
| `warehouse` | Warehouse directory of
catalog. `file:///user/hive/warehouse-paimon/` for local fs,
`hdfs://namespace/hdfs/path` for HDFS , `s3://{bucket-name}/path/` for S3 or
`oss://{bucket-name}/path` for Aliyun OSS | (none) | Yes
| 0.6.0-incubating |
| `authentication.type` | The type of
authentication for Paimon catalog backend, currently Gravitino only supports
`Kerberos` and `simple`.
| `simple` | No
| 0.6.0-incubating |
@@ -51,6 +50,9 @@ Builds with Apache Paimon `0.8.0`.
If you want to use the `oss` or `s3` warehouse, you need to place related jars
in the `catalogs/lakehouse-paimon/lib` directory, more information can be found
in the [Paimon S3](https://paimon.apache.org/docs/master/filesystems/s3/).
:::
+:::note
+The hive backend does not support the kerberos authentication now.
+:::
Any properties not defined by Gravitino with `gravitino.bypass.` prefix will
pass to Paimon catalog properties and HDFS configuration. For example, if
specify `gravitino.bypass.table.type`, `table.type` will pass to Paimon catalog
properties.