This is an automated email from the ASF dual-hosted git repository.

fanng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new c8c66a44e [#3919] feat(catalog-lakehouse-paimon): Support hive backend 
for Paimon Catalog (#5092)
c8c66a44e is described below

commit c8c66a44e31411bfbb343c26d7c7da36b43e4939
Author: cai can <[email protected]>
AuthorDate: Mon Oct 21 14:53:40 2024 +0800

    [#3919] feat(catalog-lakehouse-paimon): Support hive backend for Paimon 
Catalog (#5092)
    
    ### What changes were proposed in this pull request?
    Support hive backend for Paimon Catalog
    
    ### Why are the changes needed?
    
    Fix: https://github.com/apache/gravitino/issues/3919
    
    ### Does this PR introduce _any_ user-facing change?
    will add doc in a later pr.
    
    ### How was this patch tested?
    new UT and IT.
    
    ---------
    
    Co-authored-by: caican <[email protected]>
---
 catalogs/catalog-lakehouse-paimon/build.gradle.kts | 34 ++++++++-
 .../lakehouse/paimon/PaimonCatalogBackend.java     |  3 +-
 .../integration/test/CatalogPaimonBaseIT.java      | 13 +++-
 .../integration/test/CatalogPaimonHiveIT.java      | 83 ++++++++++++++++++++++
 .../lakehouse/paimon/utils/TestCatalogUtils.java   | 23 +++++-
 docs/lakehouse-paimon-catalog.md                   |  8 ++-
 6 files changed, 154 insertions(+), 10 deletions(-)

diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts 
b/catalogs/catalog-lakehouse-paimon/build.gradle.kts
index 16a3382cf..6839cc163 100644
--- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts
+++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts
@@ -46,10 +46,9 @@ dependencies {
     exclude("com.sun.jersey")
     exclude("javax.servlet")
     exclude("org.apache.curator")
-    exclude("org.apache.hive")
     exclude("org.apache.hbase")
     exclude("org.apache.zookeeper")
-    exclude("org.eclipse.jetty.aggregate:jetty-all")
+    exclude("org.eclipse.jetty.aggregate")
     exclude("org.mortbay.jetty")
     exclude("org.mortbay.jetty:jetty")
     exclude("org.mortbay.jetty:jetty-util")
@@ -67,9 +66,40 @@ dependencies {
     exclude("org.apache.parquet:parquet-encoding")
     exclude("org.apache.parquet:parquet-common")
     exclude("org.apache.parquet:parquet-hadoop")
+    exclude("org.apache.parquet:parquet-hadoop-bundle")
     exclude("org.apache.paimon:paimon-codegen-loader")
     exclude("org.apache.paimon:paimon-shade-caffeine-2")
     exclude("org.apache.paimon:paimon-shade-guava-30")
+    exclude("org.apache.hive:hive-service-rpc")
+    exclude("org.apache.logging.log4j")
+    exclude("com.google.guava")
+    exclude("commons-lang")
+    exclude("org.slf4j")
+    exclude("org.apache.orc")
+    exclude("org.apache.httpcomponents")
+    exclude("jline")
+    exclude("org.eclipse.jetty.orbit")
+    exclude("org.apache.ant")
+    exclude("com.tdunning")
+    exclude("io.dropwizard.metrics")
+    exclude("com.github.joshelser")
+    exclude("commons-codec")
+    exclude("commons-cli")
+    exclude("tomcat")
+    exclude("org.apache.avro")
+    exclude("net.sf.opencsv")
+    exclude("javolution")
+    exclude("com.jolbox")
+    exclude("com.zaxxer")
+    exclude("org.apache.derby")
+    exclude("org.datanucleus")
+    exclude("commons-pool")
+    exclude("commons-dbcp")
+    exclude("javax.jdo")
+    exclude("org.antlr")
+    exclude("co.cask.tephra")
+    exclude("com.google.code.findbugs")
+    exclude("com.github.spotbugs")
   }
   implementation(libs.bundles.log4j)
   implementation(libs.commons.lang3)
diff --git 
a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogBackend.java
 
b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogBackend.java
index 355a79f58..7371c5be3 100644
--- 
a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogBackend.java
+++ 
b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogBackend.java
@@ -21,5 +21,6 @@ package org.apache.gravitino.catalog.lakehouse.paimon;
 /** The type of Apache Paimon catalog backend. */
 public enum PaimonCatalogBackend {
   FILESYSTEM,
-  JDBC
+  JDBC,
+  HIVE
 }
diff --git 
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java
 
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java
index ed90745a7..19bbde331 100644
--- 
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java
+++ 
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java
@@ -99,6 +99,7 @@ public abstract class CatalogPaimonBaseIT extends BaseIT {
   protected String jdbcPassword;
   protected Catalog catalog;
   protected org.apache.paimon.catalog.Catalog paimonCatalog;
+  protected SparkSession spark;
   protected String metalakeName = 
GravitinoITUtils.genRandomName("paimon_it_metalake");
   protected String catalogName = 
GravitinoITUtils.genRandomName("paimon_it_catalog");
   protected String schemaName = 
GravitinoITUtils.genRandomName("paimon_it_schema");
@@ -115,8 +116,8 @@ public abstract class CatalogPaimonBaseIT extends BaseIT {
   private static final String alertTableName = "alert_table_name";
   private static String INSERT_BATCH_WITHOUT_PARTITION_TEMPLATE = "INSERT INTO 
paimon.%s VALUES %s";
   private static final String SELECT_ALL_TEMPLATE = "SELECT * FROM paimon.%s";
+  private static final String DEFAULT_DB = "default";
   private GravitinoMetalake metalake;
-  protected SparkSession spark;
   private Map<String, String> catalogProperties;
 
   @BeforeAll
@@ -727,7 +728,7 @@ public abstract class CatalogPaimonBaseIT extends BaseIT {
     // update column position
     Column col1 = Column.of("name", Types.StringType.get(), "comment");
     Column col2 = Column.of("address", Types.StringType.get(), "comment");
-    Column col3 = Column.of("date_of_birth", Types.DateType.get(), "comment");
+    Column col3 = Column.of("date_of_birth", Types.StringType.get(), 
"comment");
 
     Column[] newColumns = new Column[] {col1, col2, col3};
     NameIdentifier tableIdentifier =
@@ -874,7 +875,13 @@ public abstract class CatalogPaimonBaseIT extends BaseIT {
   private void clearTableAndSchema() {
     SupportsSchemas supportsSchema = catalog.asSchemas();
     Arrays.stream(supportsSchema.listSchemas())
-        .forEach(schema -> supportsSchema.dropSchema(schema, true));
+        .forEach(
+            schema -> {
+              // can not drop default database for hive backend.
+              if (!DEFAULT_DB.equalsIgnoreCase(schema)) {
+                supportsSchema.dropSchema(schema, true);
+              }
+            });
   }
 
   private void createMetalake() {
diff --git 
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonHiveIT.java
 
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonHiveIT.java
new file mode 100644
index 000000000..fcb220a88
--- /dev/null
+++ 
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonHiveIT.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.catalog.lakehouse.paimon.integration.test;
+
+import com.google.common.collect.Maps;
+import java.util.Map;
+import org.apache.gravitino.NameIdentifier;
+import org.apache.gravitino.Schema;
+import org.apache.gravitino.SupportsSchemas;
+import 
org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogPropertiesMetadata;
+import org.apache.gravitino.integration.test.container.HiveContainer;
+import org.apache.gravitino.integration.test.util.GravitinoITUtils;
+import org.apache.paimon.catalog.Catalog;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+
+@Tag("gravitino-docker-test")
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+public class CatalogPaimonHiveIT extends CatalogPaimonBaseIT {
+
+  @Override
+  protected Map<String, String> initPaimonCatalogProperties() {
+    Map<String, String> catalogProperties = Maps.newHashMap();
+    catalogProperties.put("key1", "val1");
+    catalogProperties.put("key2", "val2");
+
+    TYPE = "hive";
+    WAREHOUSE =
+        String.format(
+            "hdfs://%s:%d/user/hive/warehouse-catalog-paimon/",
+            containerSuite.getHiveContainer().getContainerIpAddress(),
+            HiveContainer.HDFS_DEFAULTFS_PORT);
+    URI =
+        String.format(
+            "thrift://%s:%d",
+            containerSuite.getHiveContainer().getContainerIpAddress(),
+            HiveContainer.HIVE_METASTORE_PORT);
+
+    
catalogProperties.put(PaimonCatalogPropertiesMetadata.GRAVITINO_CATALOG_BACKEND,
 TYPE);
+    catalogProperties.put(PaimonCatalogPropertiesMetadata.WAREHOUSE, 
WAREHOUSE);
+    catalogProperties.put(PaimonCatalogPropertiesMetadata.URI, URI);
+
+    return catalogProperties;
+  }
+
+  @Test
+  void testPaimonSchemaProperties() throws Catalog.DatabaseNotExistException {
+    SupportsSchemas schemas = catalog.asSchemas();
+
+    // create schema check.
+    String testSchemaName = GravitinoITUtils.genRandomName("test_schema_1");
+    NameIdentifier schemaIdent = NameIdentifier.of(metalakeName, catalogName, 
testSchemaName);
+    Map<String, String> schemaProperties = Maps.newHashMap();
+    schemaProperties.put("key", "hive");
+    Schema createdSchema =
+        schemas.createSchema(schemaIdent.name(), schema_comment, 
schemaProperties);
+    Assertions.assertEquals(createdSchema.properties().get("key"), "hive");
+
+    // load schema check.
+    Schema schema = schemas.loadSchema(schemaIdent.name());
+    Assertions.assertEquals(schema.properties().get("key"), "hive");
+    Map<String, String> loadedProps = 
paimonCatalog.loadDatabaseProperties(schemaIdent.name());
+    Assertions.assertEquals(loadedProps.get("key"), "hive");
+  }
+}
diff --git 
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestCatalogUtils.java
 
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestCatalogUtils.java
index e8fe66551..d1b50d520 100644
--- 
a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestCatalogUtils.java
+++ 
b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/TestCatalogUtils.java
@@ -29,9 +29,12 @@ import java.util.Locale;
 import java.util.function.Consumer;
 import org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogBackend;
 import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConfig;
+import org.apache.gravitino.integration.test.container.ContainerSuite;
+import org.apache.gravitino.integration.test.container.HiveContainer;
 import org.apache.paimon.catalog.Catalog;
 import org.apache.paimon.catalog.FileSystemCatalog;
 import org.apache.paimon.factories.FactoryException;
+import org.apache.paimon.hive.HiveCatalog;
 import org.apache.paimon.jdbc.JdbcCatalog;
 import org.junit.jupiter.api.Test;
 
@@ -44,6 +47,8 @@ public class TestCatalogUtils {
     assertCatalog(PaimonCatalogBackend.FILESYSTEM.name(), 
FileSystemCatalog.class);
     // Test load JdbcCatalog for jdbc metastore.
     assertCatalog(PaimonCatalogBackend.JDBC.name(), JdbcCatalog.class);
+    // Test load HiveCatalog for hive metastore.
+    assertCatalog(PaimonCatalogBackend.HIVE.name(), HiveCatalog.class);
     // Test load catalog exception for other metastore.
     assertThrowsExactly(FactoryException.class, () -> assertCatalog("other", 
catalog -> {}));
   }
@@ -66,7 +71,7 @@ public class TestCatalogUtils {
                             System.getProperty("java.io.tmpdir"),
                             "paimon_catalog_warehouse"),
                         PaimonConfig.CATALOG_URI.getKey(),
-                        "jdbc:h2:mem:testdb",
+                        generateUri(metastore),
                         PaimonConfig.CATALOG_JDBC_USER.getKey(),
                         "user",
                         PaimonConfig.CATALOG_JDBC_PASSWORD.getKey(),
@@ -75,4 +80,20 @@ public class TestCatalogUtils {
       consumer.accept(catalog);
     }
   }
+
+  private static String generateUri(String metastore) {
+    String uri = "uri";
+    if (PaimonCatalogBackend.JDBC.name().equalsIgnoreCase(metastore)) {
+      uri = "jdbc:h2:mem:testdb";
+    } else if (PaimonCatalogBackend.HIVE.name().equalsIgnoreCase(metastore)) {
+      ContainerSuite containerSuite = ContainerSuite.getInstance();
+      containerSuite.startHiveContainer();
+      uri =
+          String.format(
+              "thrift://%s:%d",
+              containerSuite.getHiveContainer().getContainerIpAddress(),
+              HiveContainer.HIVE_METASTORE_PORT);
+    }
+    return uri;
+  }
 }
diff --git a/docs/lakehouse-paimon-catalog.md b/docs/lakehouse-paimon-catalog.md
index 4c336f3d3..14595f06c 100644
--- a/docs/lakehouse-paimon-catalog.md
+++ b/docs/lakehouse-paimon-catalog.md
@@ -22,17 +22,16 @@ Builds with Apache Paimon `0.8.0`.
 
 ### Catalog capabilities
 
-- Works as a catalog proxy, supporting `FilesystemCatalog` and `JdbcCatalog`.
+- Works as a catalog proxy, supporting `FilesystemCatalog`, `JdbcCatalog` and 
`HiveCatalog`.
 - Supports DDL operations for Paimon schemas and tables.
 
-- Doesn't support `HiveCatalog` catalog backend now.
 - Doesn't support alterSchema.
 
 ### Catalog properties
 
 | Property name                                      | Description             
                                                                                
                                                                                
                    | Default value          | Required                         
                               | Since Version     |
 
|----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|-----------------------------------------------------------------|-------------------|
-| `catalog-backend`                                  | Catalog backend of 
Gravitino Paimon catalog. Supports `filesystem` and `jdbc` now.                 
                                                                                
                         | (none)                 | Yes                         
                                    | 0.6.0-incubating  |
+| `catalog-backend`                                  | Catalog backend of 
Gravitino Paimon catalog. Supports `filesystem`, `jdbc` and `hive`.             
                                                                                
                         | (none)                 | Yes                         
                                    | 0.6.0-incubating  |
 | `uri`                                              | The URI configuration 
of the Paimon catalog. `thrift://127.0.0.1:9083` or 
`jdbc:postgresql://127.0.0.1:5432/db_name` or 
`jdbc:mysql://127.0.0.1:3306/metastore_db`. It is optional for 
`FilesystemCatalog`. | (none)                 | required if the value of 
`catalog-backend` is not `filesystem`. | 0.6.0-incubating  |
 | `warehouse`                                        | Warehouse directory of 
catalog. `file:///user/hive/warehouse-paimon/` for local fs, 
`hdfs://namespace/hdfs/path` for HDFS , `s3://{bucket-name}/path/` for S3 or 
`oss://{bucket-name}/path` for Aliyun OSS  | (none)                 | Yes       
                                                      | 0.6.0-incubating  |
 | `authentication.type`                              | The type of 
authentication for Paimon catalog backend, currently Gravitino only supports 
`Kerberos` and `simple`.                                                        
                                   | `simple`               | No                
                                              | 0.6.0-incubating  |
@@ -51,6 +50,9 @@ Builds with Apache Paimon `0.8.0`.
 If you want to use the `oss` or `s3` warehouse, you need to place related jars 
in the `catalogs/lakehouse-paimon/lib` directory, more information can be found 
in the [Paimon S3](https://paimon.apache.org/docs/master/filesystems/s3/). 
 :::
 
+:::note
+The hive backend does not support the kerberos authentication now.
+:::
 
 Any properties not defined by Gravitino with `gravitino.bypass.` prefix will 
pass to Paimon catalog properties and HDFS configuration. For example, if 
specify `gravitino.bypass.table.type`, `table.type` will pass to Paimon catalog 
properties.
 

Reply via email to