This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new c01edaba2 [#4528] improvement(hive-catalog): reduce hive catalog libs 
size from 146MB to 43MB (#4531)
c01edaba2 is described below

commit c01edaba281aa66de32ad3cc7cbafcfedaba09be
Author: mchades <[email protected]>
AuthorDate: Fri Aug 30 13:23:24 2024 +0800

    [#4528] improvement(hive-catalog): reduce hive catalog libs size from 146MB 
to 43MB (#4531)
    
    ### What changes were proposed in this pull request?
    
    remove some unnecessary dependencies
    
    ### Why are the changes needed?
    
    Fix: #4528
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    CI passed
---
 LICENSE.bin                                        |  3 ++
 .../authorization-ranger/build.gradle.kts          | 16 +++++++---
 catalogs/catalog-hive/build.gradle.kts             | 36 +++++++++++++++++++---
 .../apache/gravitino/catalog/hive/HiveTable.java   |  5 ++-
 .../catalog/hive/HiveTableOperations.java          | 13 ++++----
 gradle/libs.versions.toml                          | 23 +++++++++-----
 6 files changed, 69 insertions(+), 27 deletions(-)

diff --git a/LICENSE.bin b/LICENSE.bin
index ca9218f77..b56a0fdf4 100644
--- a/LICENSE.bin
+++ b/LICENSE.bin
@@ -358,6 +358,8 @@
    WildFly
    Confluent Kafka Streams Examples
    Apache Arrow
+   Rome
+   Jettison
 
    This product bundles various third-party components also under the
    Apache Software Foundation License 1.1
@@ -404,6 +406,7 @@
    Common Development and Distribution License 1.0
 
    Javax Activation
+   Javax Mail
    Stax API
    Java Servlet API
    JSR311 API
diff --git a/authorizations/authorization-ranger/build.gradle.kts 
b/authorizations/authorization-ranger/build.gradle.kts
index 51dc592a7..b197dc20c 100644
--- a/authorizations/authorization-ranger/build.gradle.kts
+++ b/authorizations/authorization-ranger/build.gradle.kts
@@ -31,11 +31,17 @@ dependencies {
   implementation(project(":core")) {
     exclude(group = "*")
   }
+
   implementation(libs.bundles.log4j)
   implementation(libs.commons.lang3)
   implementation(libs.guava)
-
+  implementation(libs.javax.jaxb.api) {
+    exclude("*")
+  }
+  implementation(libs.javax.ws.rs.api)
+  implementation(libs.jettison)
   compileOnly(libs.lombok)
+  implementation(libs.mail)
   implementation(libs.ranger.intg) {
     exclude("org.apache.hadoop", "hadoop-common")
     exclude("org.apache.hive", "hive-storage-api")
@@ -50,11 +56,9 @@ dependencies {
     exclude("org.apache.ranger", "ranger-plugin-classloader")
     exclude("net.java.dev.jna")
     exclude("javax.ws.rs")
+    exclude("org.eclipse.jetty")
   }
-  implementation(libs.javax.ws.rs.api)
-  implementation(libs.javax.jaxb.api) {
-    exclude("*")
-  }
+  implementation(libs.rome)
 
   testImplementation(project(":common"))
   testImplementation(project(":clients:client-java"))
@@ -70,6 +74,7 @@ dependencies {
     exclude("org.apache.lucene")
     exclude("org.apache.solr")
     exclude("org.apache.kafka")
+    exclude("org.eclipse.jetty")
     exclude("org.elasticsearch")
     exclude("org.elasticsearch.client")
     exclude("org.elasticsearch.plugin")
@@ -78,6 +83,7 @@ dependencies {
   }
   testImplementation(libs.hive2.jdbc) {
     exclude("org.slf4j")
+    exclude("org.eclipse.jetty.aggregate")
   }
   testImplementation(libs.mysql.driver)
 }
diff --git a/catalogs/catalog-hive/build.gradle.kts 
b/catalogs/catalog-hive/build.gradle.kts
index 720428e0a..776e9bf39 100644
--- a/catalogs/catalog-hive/build.gradle.kts
+++ b/catalogs/catalog-hive/build.gradle.kts
@@ -30,12 +30,24 @@ val icebergVersion: String = libs.versions.iceberg.get()
 val scalaCollectionCompatVersion: String = 
libs.versions.scala.collection.compat.get()
 
 dependencies {
-  implementation(project(":api"))
-  implementation(project(":catalogs:catalog-common"))
-  implementation(project(":core"))
+  implementation(project(":api")) {
+    exclude("*")
+  }
+  implementation(project(":catalogs:catalog-common")) {
+    exclude("*")
+  }
+  implementation(project(":core")) {
+    exclude("*")
+  }
 
   implementation(libs.caffeine)
+  implementation(libs.commons.collections3)
+  implementation(libs.commons.configuration1)
+  implementation(libs.htrace.core4)
   implementation(libs.guava)
+  implementation(libs.hadoop2.auth) {
+    exclude("*")
+  }
   implementation(libs.hive2.exec) {
     artifact {
       classifier = "core"
@@ -43,19 +55,28 @@ dependencies {
     exclude("com.google.code.findbugs", "jsr305")
     exclude("com.google.protobuf")
     exclude("org.apache.avro")
+    exclude("org.apache.ant")
     exclude("org.apache.calcite")
     exclude("org.apache.calcite.avatica")
     exclude("org.apache.curator")
+    exclude("org.apache.derby")
     exclude("org.apache.hadoop", "hadoop-yarn-server-resourcemanager")
+    exclude("org.apache.hive", "hive-llap-tez")
+    exclude("org.apache.hive", "hive-vector-code-gen")
+    exclude("org.apache.ivy")
     exclude("org.apache.logging.log4j")
     exclude("org.apache.zookeeper")
+    exclude("org.codehaus.groovy", "groovy-all")
+    exclude("org.datanucleus", "datanucleus-core")
     exclude("org.eclipse.jetty.aggregate", "jetty-all")
     exclude("org.eclipse.jetty.orbit", "javax.servlet")
     exclude("org.openjdk.jol")
     exclude("org.pentaho")
     exclude("org.slf4j")
   }
+  implementation(libs.woodstox.core)
   implementation(libs.hive2.metastore) {
+    exclude("ant")
     exclude("co.cask.tephra")
     exclude("com.github.joshelser")
     exclude("com.google.code.findbugs", "jsr305")
@@ -64,13 +85,16 @@ dependencies {
     exclude("com.zaxxer", "HikariCP")
     exclude("io.dropwizard.metricss")
     exclude("javax.transaction", "transaction-api")
+    exclude("org.apache.ant")
     exclude("org.apache.avro")
     exclude("org.apache.curator")
+    exclude("org.apache.derby")
     exclude("org.apache.hadoop", "hadoop-yarn-server-resourcemanager")
     exclude("org.apache.hbase")
     exclude("org.apache.logging.log4j")
     exclude("org.apache.parquet", "parquet-hadoop-bundle")
     exclude("org.apache.zookeeper")
+    exclude("org.datanucleus")
     exclude("org.eclipse.jetty.aggregate", "jetty-all")
     exclude("org.eclipse.jetty.orbit", "javax.servlet")
     exclude("org.openjdk.jol")
@@ -135,7 +159,11 @@ tasks {
 
   val copyCatalogLibs by registering(Copy::class) {
     dependsOn("jar", "runtimeJars")
-    from("build/libs")
+    from("build/libs") {
+      exclude("guava-*.jar")
+      exclude("log4j-*.jar")
+      exclude("slf4j-*.jar")
+    }
     into("$rootDir/distribution/package/catalogs/hive/libs")
   }
 
diff --git 
a/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTable.java
 
b/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTable.java
index f1c5f45fb..2108390c8 100644
--- 
a/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTable.java
+++ 
b/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTable.java
@@ -31,7 +31,6 @@ import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import lombok.ToString;
-import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.gravitino.catalog.hive.converter.HiveDataTypeConverter;
 import org.apache.gravitino.connector.BaseTable;
@@ -87,7 +86,7 @@ public class HiveTable extends BaseTable {
 
     StorageDescriptor sd = table.getSd();
     Distribution distribution = Distributions.NONE;
-    if (CollectionUtils.isNotEmpty(sd.getBucketCols())) {
+    if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) {
       // Hive table use hash strategy as bucketing strategy
       distribution =
           Distributions.hash(
@@ -96,7 +95,7 @@ public class HiveTable extends BaseTable {
     }
 
     SortOrder[] sortOrders = new SortOrder[0];
-    if (CollectionUtils.isNotEmpty(sd.getSortCols())) {
+    if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
       sortOrders =
           sd.getSortCols().stream()
               .map(
diff --git 
a/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTableOperations.java
 
b/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTableOperations.java
index 4ade7cdc7..a5ca0778d 100644
--- 
a/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTableOperations.java
+++ 
b/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTableOperations.java
@@ -45,7 +45,6 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.UnknownTableException;
-import org.apache.parquet.Strings;
 import org.apache.thrift.TException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -163,19 +162,19 @@ public class HiveTableOperations implements 
TableOperations, SupportsPartitions
     Preconditions.checkArgument(
         transformFields.size() == identityPartition.fieldNames().length,
         "Hive partition field names must be the same as table partitioning 
field names: %s, but got %s",
-        Strings.join(transformFields, ","),
-        Strings.join(
+        String.join(",", transformFields),
+        String.join(
+            ",",
             Arrays.stream(identityPartition.fieldNames())
-                .map(f -> Strings.join(f, "."))
-                .collect(Collectors.toList()),
-            ","));
+                .map(f -> String.join(".", f))
+                .collect(Collectors.toList())));
     Arrays.stream(identityPartition.fieldNames())
         .forEach(
             f ->
                 Preconditions.checkArgument(
                     transformFields.contains(f[0]),
                     "Hive partition field name must be in table partitioning 
field names: %s, but got %s",
-                    Strings.join(transformFields, ","),
+                    String.join(",", transformFields),
                     f[0]));
 
     try {
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 8cc2b0f48..f278e95a4 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -28,18 +28,19 @@ jetty = "9.4.51.v20230217"
 jersey = "2.41"
 mockito = "4.11.0"
 airlift-json = "237"
-airlift-log = "231"
 airlift-resolver = "1.6"
-airlift-units = "1.8"
 hive2 = "2.3.9"
 hadoop2 = "2.10.2"
 hadoop3 = "3.1.0"
 hadoop-minikdc = "3.3.6"
+htrace-core4 = "4.1.0-incubating"
 httpclient5 = "5.2.1"
 mockserver = "5.15.0"
 commons-lang3 = "3.14.0"
 commons-io = "2.15.0"
 commons-collections4 = "4.4"
+commons-collections3 = "3.2.2"
+commons-configuration1 = "1.6"
 commons-dbcp2 = "2.11.0"
 caffeine = "2.9.3"
 rocksdbjni = "7.10.2"
@@ -62,7 +63,6 @@ jline = "3.21.0"
 okhttp3 = "4.11.0"
 metrics = "4.2.25"
 prometheus = "0.16.0"
-jsqlparser = "4.2"
 mysql = "8.0.23"
 postgresql = "42.6.0"
 immutables-value = "2.10.0"
@@ -91,6 +91,10 @@ node-plugin = "7.0.1"
 commons-cli = "1.2"
 sun-activation-version = "1.2.0"
 error-prone = "3.1.0"
+woodstox-core = "5.3.0"
+mail = "1.4.1"
+rome = "1.0"
+jettison = "1.1"
 
 [libraries]
 protobuf-java = { group = "com.google.protobuf", name = "protobuf-java", 
version.ref = "protoc" }
@@ -130,18 +134,17 @@ hive2-metastore = { group = "org.apache.hive", name = 
"hive-metastore", version.
 hive2-exec = { group = "org.apache.hive", name = "hive-exec", version.ref = 
"hive2"}
 hive2-common = { group = "org.apache.hive", name = "hive-common", version.ref 
= "hive2"}
 hive2-jdbc = { group = "org.apache.hive", name = "hive-jdbc", version.ref = 
"hive2"}
+hadoop2-auth = { group = "org.apache.hadoop", name = "hadoop-auth", 
version.ref = "hadoop2" }
 hadoop2-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", 
version.ref = "hadoop2" }
 hadoop2-common = { group = "org.apache.hadoop", name = "hadoop-common", 
version.ref = "hadoop2"}
 hadoop2-mapreduce-client-core = { group = "org.apache.hadoop", name = 
"hadoop-mapreduce-client-core", version.ref = "hadoop2"}
 hadoop3-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", 
version.ref = "hadoop3" }
 hadoop3-common = { group = "org.apache.hadoop", name = "hadoop-common", 
version.ref = "hadoop3"}
 hadoop3-client = { group = "org.apache.hadoop", name = "hadoop-client", 
version.ref = "hadoop3"}
-hadoop3-mapreduce-client-core = { group = "org.apache.hadoop", name = 
"hadoop-mapreduce-client-core", version.ref = "hadoop3"}
 hadoop3-minicluster = { group = "org.apache.hadoop", name = 
"hadoop-minicluster", version.ref = "hadoop-minikdc"}
+htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4", 
version.ref = "htrace-core4" }
 airlift-json = { group = "io.airlift", name = "json", version.ref = 
"airlift-json"}
 airlift-resolver = { group = "io.airlift.resolver", name = "resolver", 
version.ref = "airlift-resolver"}
-airlift-units = { group = "io.airlift", name = "units", version.ref = 
"airlift-units"}
-airlift-log = { group = "io.airlift", name = "log", version.ref = 
"airlift-log"}
 httpclient5 = { group = "org.apache.httpcomponents.client5", name = 
"httpclient5", version.ref = "httpclient5" }
 mockserver-netty = { group = "org.mock-server", name = "mockserver-netty", 
version.ref = "mockserver" }
 mockserver-client-java = { group = "org.mock-server", name = 
"mockserver-client-java", version.ref = "mockserver" }
@@ -150,6 +153,8 @@ commons-io = { group = "commons-io", name = "commons-io", 
version.ref = "commons
 caffeine = { group = "com.github.ben-manes.caffeine", name = "caffeine", 
version.ref = "caffeine" }
 rocksdbjni = { group = "org.rocksdb", name = "rocksdbjni", version.ref = 
"rocksdbjni" }
 commons-collections4 = { group = "org.apache.commons", name = 
"commons-collections4", version.ref = "commons-collections4" }
+commons-collections3 = { group = "commons-collections", name = 
"commons-collections", version.ref = "commons-collections3" }
+commons-configuration1 = { group = "commons-configuration", name = 
"commons-configuration", version.ref = "commons-configuration1" }
 iceberg-aws = { group = "org.apache.iceberg", name = "iceberg-aws", 
version.ref = "iceberg" }
 iceberg-core = { group = "org.apache.iceberg", name = "iceberg-core", 
version.ref = "iceberg" }
 iceberg-api = { group = "org.apache.iceberg", name = "iceberg-api", 
version.ref = "iceberg" }
@@ -158,7 +163,6 @@ paimon-core = { group = "org.apache.paimon", name = 
"paimon-core", version.ref =
 paimon-format = { group = "org.apache.paimon", name = "paimon-format", 
version.ref = "paimon" }
 paimon-hive-catalog = { group = "org.apache.paimon", name = 
"paimon-hive-catalog", version.ref = "paimon" }
 trino-spi= { group = "io.trino", name = "trino-spi", version.ref = "trino" }
-trino-toolkit= { group = "io.trino", name = "trino-plugin-toolkit", 
version.ref = "trino" }
 trino-testing= { group = "io.trino", name = "trino-testing", version.ref = 
"trino" }
 trino-memory= { group = "io.trino", name = "trino-memory", version.ref = 
"trino" }
 trino-cli= { group = "io.trino", name = "trino-cli", version.ref = "trino" }
@@ -183,7 +187,6 @@ metrics-servlets = { group = "io.dropwizard.metrics", name 
= "metrics-servlets",
 prometheus-client = { group = "io.prometheus", name = "simpleclient", 
version.ref = "prometheus" }
 prometheus-dropwizard = { group = "io.prometheus", name = 
"simpleclient_dropwizard", version.ref = "prometheus" }
 prometheus-servlet = { group = "io.prometheus", name = "simpleclient_servlet", 
version.ref = "prometheus" }
-jsqlparser = { group = "com.github.jsqlparser", name = "jsqlparser", 
version.ref = "jsqlparser" }
 mysql-driver = { group = "mysql", name = "mysql-connector-java", version.ref = 
"mysql" }
 postgresql-driver = { group = "org.postgresql", name = "postgresql", 
version.ref = "postgresql" }
 minikdc = { group = "org.apache.hadoop", name = "hadoop-minikdc", version.ref 
= "hadoop-minikdc"}
@@ -194,6 +197,7 @@ kafka-clients = { group = "org.apache.kafka", name = 
"kafka-clients", version.re
 kafka = { group = "org.apache.kafka", name = "kafka_2.12", version.ref = 
"kafka" }
 curator-test = { group = "org.apache.curator", name = "curator-test", 
version.ref = "curator"}
 cglib = { group = "cglib", name = "cglib", version.ref = "cglib"}
+woodstox-core = { group = "com.fasterxml.woodstox", name = "woodstox-core", 
version.ref = "woodstox-core"}
 
 ranger-intg = { group = "org.apache.ranger", name = "ranger-intg", version.ref 
= "ranger" }
 javax-jaxb-api = { group = "javax.xml.bind", name = "jaxb-api", version.ref = 
"javax-jaxb-api" }
@@ -204,6 +208,9 @@ mybatis = { group = "org.mybatis", name = "mybatis", 
version.ref = "mybatis"}
 h2db = { group = "com.h2database", name = "h2", version.ref = "h2db"}
 awaitility = { group = "org.awaitility", name = "awaitility", version.ref = 
"awaitility" }
 servlet = { group = "javax.servlet", name = "javax.servlet-api", version.ref = 
"servlet" }
+mail = { group = "javax.mail", name = "mail", version.ref = "mail" }
+rome = { group = "rome", name = "rome", version.ref = "rome" }
+jettison = { group = "org.codehaus.jettison", name = "jettison", version.ref = 
"jettison" }
 
 [bundles]
 log4j = ["slf4j-api", "log4j-slf4j2-impl", "log4j-api", "log4j-core", 
"log4j-12-api"]

Reply via email to