This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new c01edaba2 [#4528] improvement(hive-catalog): reduce hive catalog libs
size from 146MB to 43MB (#4531)
c01edaba2 is described below
commit c01edaba281aa66de32ad3cc7cbafcfedaba09be
Author: mchades <[email protected]>
AuthorDate: Fri Aug 30 13:23:24 2024 +0800
[#4528] improvement(hive-catalog): reduce hive catalog libs size from 146MB
to 43MB (#4531)
### What changes were proposed in this pull request?
remove some unnecessary dependencies
### Why are the changes needed?
Fix: #4528
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
CI passed
---
LICENSE.bin | 3 ++
.../authorization-ranger/build.gradle.kts | 16 +++++++---
catalogs/catalog-hive/build.gradle.kts | 36 +++++++++++++++++++---
.../apache/gravitino/catalog/hive/HiveTable.java | 5 ++-
.../catalog/hive/HiveTableOperations.java | 13 ++++----
gradle/libs.versions.toml | 23 +++++++++-----
6 files changed, 69 insertions(+), 27 deletions(-)
diff --git a/LICENSE.bin b/LICENSE.bin
index ca9218f77..b56a0fdf4 100644
--- a/LICENSE.bin
+++ b/LICENSE.bin
@@ -358,6 +358,8 @@
WildFly
Confluent Kafka Streams Examples
Apache Arrow
+ Rome
+ Jettison
This product bundles various third-party components also under the
Apache Software Foundation License 1.1
@@ -404,6 +406,7 @@
Common Development and Distribution License 1.0
Javax Activation
+ Javax Mail
Stax API
Java Servlet API
JSR311 API
diff --git a/authorizations/authorization-ranger/build.gradle.kts
b/authorizations/authorization-ranger/build.gradle.kts
index 51dc592a7..b197dc20c 100644
--- a/authorizations/authorization-ranger/build.gradle.kts
+++ b/authorizations/authorization-ranger/build.gradle.kts
@@ -31,11 +31,17 @@ dependencies {
implementation(project(":core")) {
exclude(group = "*")
}
+
implementation(libs.bundles.log4j)
implementation(libs.commons.lang3)
implementation(libs.guava)
-
+ implementation(libs.javax.jaxb.api) {
+ exclude("*")
+ }
+ implementation(libs.javax.ws.rs.api)
+ implementation(libs.jettison)
compileOnly(libs.lombok)
+ implementation(libs.mail)
implementation(libs.ranger.intg) {
exclude("org.apache.hadoop", "hadoop-common")
exclude("org.apache.hive", "hive-storage-api")
@@ -50,11 +56,9 @@ dependencies {
exclude("org.apache.ranger", "ranger-plugin-classloader")
exclude("net.java.dev.jna")
exclude("javax.ws.rs")
+ exclude("org.eclipse.jetty")
}
- implementation(libs.javax.ws.rs.api)
- implementation(libs.javax.jaxb.api) {
- exclude("*")
- }
+ implementation(libs.rome)
testImplementation(project(":common"))
testImplementation(project(":clients:client-java"))
@@ -70,6 +74,7 @@ dependencies {
exclude("org.apache.lucene")
exclude("org.apache.solr")
exclude("org.apache.kafka")
+ exclude("org.eclipse.jetty")
exclude("org.elasticsearch")
exclude("org.elasticsearch.client")
exclude("org.elasticsearch.plugin")
@@ -78,6 +83,7 @@ dependencies {
}
testImplementation(libs.hive2.jdbc) {
exclude("org.slf4j")
+ exclude("org.eclipse.jetty.aggregate")
}
testImplementation(libs.mysql.driver)
}
diff --git a/catalogs/catalog-hive/build.gradle.kts
b/catalogs/catalog-hive/build.gradle.kts
index 720428e0a..776e9bf39 100644
--- a/catalogs/catalog-hive/build.gradle.kts
+++ b/catalogs/catalog-hive/build.gradle.kts
@@ -30,12 +30,24 @@ val icebergVersion: String = libs.versions.iceberg.get()
val scalaCollectionCompatVersion: String =
libs.versions.scala.collection.compat.get()
dependencies {
- implementation(project(":api"))
- implementation(project(":catalogs:catalog-common"))
- implementation(project(":core"))
+ implementation(project(":api")) {
+ exclude("*")
+ }
+ implementation(project(":catalogs:catalog-common")) {
+ exclude("*")
+ }
+ implementation(project(":core")) {
+ exclude("*")
+ }
implementation(libs.caffeine)
+ implementation(libs.commons.collections3)
+ implementation(libs.commons.configuration1)
+ implementation(libs.htrace.core4)
implementation(libs.guava)
+ implementation(libs.hadoop2.auth) {
+ exclude("*")
+ }
implementation(libs.hive2.exec) {
artifact {
classifier = "core"
@@ -43,19 +55,28 @@ dependencies {
exclude("com.google.code.findbugs", "jsr305")
exclude("com.google.protobuf")
exclude("org.apache.avro")
+ exclude("org.apache.ant")
exclude("org.apache.calcite")
exclude("org.apache.calcite.avatica")
exclude("org.apache.curator")
+ exclude("org.apache.derby")
exclude("org.apache.hadoop", "hadoop-yarn-server-resourcemanager")
+ exclude("org.apache.hive", "hive-llap-tez")
+ exclude("org.apache.hive", "hive-vector-code-gen")
+ exclude("org.apache.ivy")
exclude("org.apache.logging.log4j")
exclude("org.apache.zookeeper")
+ exclude("org.codehaus.groovy", "groovy-all")
+ exclude("org.datanucleus", "datanucleus-core")
exclude("org.eclipse.jetty.aggregate", "jetty-all")
exclude("org.eclipse.jetty.orbit", "javax.servlet")
exclude("org.openjdk.jol")
exclude("org.pentaho")
exclude("org.slf4j")
}
+ implementation(libs.woodstox.core)
implementation(libs.hive2.metastore) {
+ exclude("ant")
exclude("co.cask.tephra")
exclude("com.github.joshelser")
exclude("com.google.code.findbugs", "jsr305")
@@ -64,13 +85,16 @@ dependencies {
exclude("com.zaxxer", "HikariCP")
exclude("io.dropwizard.metricss")
exclude("javax.transaction", "transaction-api")
+ exclude("org.apache.ant")
exclude("org.apache.avro")
exclude("org.apache.curator")
+ exclude("org.apache.derby")
exclude("org.apache.hadoop", "hadoop-yarn-server-resourcemanager")
exclude("org.apache.hbase")
exclude("org.apache.logging.log4j")
exclude("org.apache.parquet", "parquet-hadoop-bundle")
exclude("org.apache.zookeeper")
+ exclude("org.datanucleus")
exclude("org.eclipse.jetty.aggregate", "jetty-all")
exclude("org.eclipse.jetty.orbit", "javax.servlet")
exclude("org.openjdk.jol")
@@ -135,7 +159,11 @@ tasks {
val copyCatalogLibs by registering(Copy::class) {
dependsOn("jar", "runtimeJars")
- from("build/libs")
+ from("build/libs") {
+ exclude("guava-*.jar")
+ exclude("log4j-*.jar")
+ exclude("slf4j-*.jar")
+ }
into("$rootDir/distribution/package/catalogs/hive/libs")
}
diff --git
a/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTable.java
b/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTable.java
index f1c5f45fb..2108390c8 100644
---
a/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTable.java
+++
b/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTable.java
@@ -31,7 +31,6 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.ToString;
-import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.gravitino.catalog.hive.converter.HiveDataTypeConverter;
import org.apache.gravitino.connector.BaseTable;
@@ -87,7 +86,7 @@ public class HiveTable extends BaseTable {
StorageDescriptor sd = table.getSd();
Distribution distribution = Distributions.NONE;
- if (CollectionUtils.isNotEmpty(sd.getBucketCols())) {
+ if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) {
// Hive table use hash strategy as bucketing strategy
distribution =
Distributions.hash(
@@ -96,7 +95,7 @@ public class HiveTable extends BaseTable {
}
SortOrder[] sortOrders = new SortOrder[0];
- if (CollectionUtils.isNotEmpty(sd.getSortCols())) {
+ if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) {
sortOrders =
sd.getSortCols().stream()
.map(
diff --git
a/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTableOperations.java
b/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTableOperations.java
index 4ade7cdc7..a5ca0778d 100644
---
a/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTableOperations.java
+++
b/catalogs/catalog-hive/src/main/java/org/apache/gravitino/catalog/hive/HiveTableOperations.java
@@ -45,7 +45,6 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.UnknownTableException;
-import org.apache.parquet.Strings;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -163,19 +162,19 @@ public class HiveTableOperations implements
TableOperations, SupportsPartitions
Preconditions.checkArgument(
transformFields.size() == identityPartition.fieldNames().length,
"Hive partition field names must be the same as table partitioning
field names: %s, but got %s",
- Strings.join(transformFields, ","),
- Strings.join(
+ String.join(",", transformFields),
+ String.join(
+ ",",
Arrays.stream(identityPartition.fieldNames())
- .map(f -> Strings.join(f, "."))
- .collect(Collectors.toList()),
- ","));
+ .map(f -> String.join(".", f))
+ .collect(Collectors.toList())));
Arrays.stream(identityPartition.fieldNames())
.forEach(
f ->
Preconditions.checkArgument(
transformFields.contains(f[0]),
"Hive partition field name must be in table partitioning
field names: %s, but got %s",
- Strings.join(transformFields, ","),
+ String.join(",", transformFields),
f[0]));
try {
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 8cc2b0f48..f278e95a4 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -28,18 +28,19 @@ jetty = "9.4.51.v20230217"
jersey = "2.41"
mockito = "4.11.0"
airlift-json = "237"
-airlift-log = "231"
airlift-resolver = "1.6"
-airlift-units = "1.8"
hive2 = "2.3.9"
hadoop2 = "2.10.2"
hadoop3 = "3.1.0"
hadoop-minikdc = "3.3.6"
+htrace-core4 = "4.1.0-incubating"
httpclient5 = "5.2.1"
mockserver = "5.15.0"
commons-lang3 = "3.14.0"
commons-io = "2.15.0"
commons-collections4 = "4.4"
+commons-collections3 = "3.2.2"
+commons-configuration1 = "1.6"
commons-dbcp2 = "2.11.0"
caffeine = "2.9.3"
rocksdbjni = "7.10.2"
@@ -62,7 +63,6 @@ jline = "3.21.0"
okhttp3 = "4.11.0"
metrics = "4.2.25"
prometheus = "0.16.0"
-jsqlparser = "4.2"
mysql = "8.0.23"
postgresql = "42.6.0"
immutables-value = "2.10.0"
@@ -91,6 +91,10 @@ node-plugin = "7.0.1"
commons-cli = "1.2"
sun-activation-version = "1.2.0"
error-prone = "3.1.0"
+woodstox-core = "5.3.0"
+mail = "1.4.1"
+rome = "1.0"
+jettison = "1.1"
[libraries]
protobuf-java = { group = "com.google.protobuf", name = "protobuf-java",
version.ref = "protoc" }
@@ -130,18 +134,17 @@ hive2-metastore = { group = "org.apache.hive", name =
"hive-metastore", version.
hive2-exec = { group = "org.apache.hive", name = "hive-exec", version.ref =
"hive2"}
hive2-common = { group = "org.apache.hive", name = "hive-common", version.ref
= "hive2"}
hive2-jdbc = { group = "org.apache.hive", name = "hive-jdbc", version.ref =
"hive2"}
+hadoop2-auth = { group = "org.apache.hadoop", name = "hadoop-auth",
version.ref = "hadoop2" }
hadoop2-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs",
version.ref = "hadoop2" }
hadoop2-common = { group = "org.apache.hadoop", name = "hadoop-common",
version.ref = "hadoop2"}
hadoop2-mapreduce-client-core = { group = "org.apache.hadoop", name =
"hadoop-mapreduce-client-core", version.ref = "hadoop2"}
hadoop3-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs",
version.ref = "hadoop3" }
hadoop3-common = { group = "org.apache.hadoop", name = "hadoop-common",
version.ref = "hadoop3"}
hadoop3-client = { group = "org.apache.hadoop", name = "hadoop-client",
version.ref = "hadoop3"}
-hadoop3-mapreduce-client-core = { group = "org.apache.hadoop", name =
"hadoop-mapreduce-client-core", version.ref = "hadoop3"}
hadoop3-minicluster = { group = "org.apache.hadoop", name =
"hadoop-minicluster", version.ref = "hadoop-minikdc"}
+htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4",
version.ref = "htrace-core4" }
airlift-json = { group = "io.airlift", name = "json", version.ref =
"airlift-json"}
airlift-resolver = { group = "io.airlift.resolver", name = "resolver",
version.ref = "airlift-resolver"}
-airlift-units = { group = "io.airlift", name = "units", version.ref =
"airlift-units"}
-airlift-log = { group = "io.airlift", name = "log", version.ref =
"airlift-log"}
httpclient5 = { group = "org.apache.httpcomponents.client5", name =
"httpclient5", version.ref = "httpclient5" }
mockserver-netty = { group = "org.mock-server", name = "mockserver-netty",
version.ref = "mockserver" }
mockserver-client-java = { group = "org.mock-server", name =
"mockserver-client-java", version.ref = "mockserver" }
@@ -150,6 +153,8 @@ commons-io = { group = "commons-io", name = "commons-io",
version.ref = "commons
caffeine = { group = "com.github.ben-manes.caffeine", name = "caffeine",
version.ref = "caffeine" }
rocksdbjni = { group = "org.rocksdb", name = "rocksdbjni", version.ref =
"rocksdbjni" }
commons-collections4 = { group = "org.apache.commons", name =
"commons-collections4", version.ref = "commons-collections4" }
+commons-collections3 = { group = "commons-collections", name =
"commons-collections", version.ref = "commons-collections3" }
+commons-configuration1 = { group = "commons-configuration", name =
"commons-configuration", version.ref = "commons-configuration1" }
iceberg-aws = { group = "org.apache.iceberg", name = "iceberg-aws",
version.ref = "iceberg" }
iceberg-core = { group = "org.apache.iceberg", name = "iceberg-core",
version.ref = "iceberg" }
iceberg-api = { group = "org.apache.iceberg", name = "iceberg-api",
version.ref = "iceberg" }
@@ -158,7 +163,6 @@ paimon-core = { group = "org.apache.paimon", name =
"paimon-core", version.ref =
paimon-format = { group = "org.apache.paimon", name = "paimon-format",
version.ref = "paimon" }
paimon-hive-catalog = { group = "org.apache.paimon", name =
"paimon-hive-catalog", version.ref = "paimon" }
trino-spi= { group = "io.trino", name = "trino-spi", version.ref = "trino" }
-trino-toolkit= { group = "io.trino", name = "trino-plugin-toolkit",
version.ref = "trino" }
trino-testing= { group = "io.trino", name = "trino-testing", version.ref =
"trino" }
trino-memory= { group = "io.trino", name = "trino-memory", version.ref =
"trino" }
trino-cli= { group = "io.trino", name = "trino-cli", version.ref = "trino" }
@@ -183,7 +187,6 @@ metrics-servlets = { group = "io.dropwizard.metrics", name
= "metrics-servlets",
prometheus-client = { group = "io.prometheus", name = "simpleclient",
version.ref = "prometheus" }
prometheus-dropwizard = { group = "io.prometheus", name =
"simpleclient_dropwizard", version.ref = "prometheus" }
prometheus-servlet = { group = "io.prometheus", name = "simpleclient_servlet",
version.ref = "prometheus" }
-jsqlparser = { group = "com.github.jsqlparser", name = "jsqlparser",
version.ref = "jsqlparser" }
mysql-driver = { group = "mysql", name = "mysql-connector-java", version.ref =
"mysql" }
postgresql-driver = { group = "org.postgresql", name = "postgresql",
version.ref = "postgresql" }
minikdc = { group = "org.apache.hadoop", name = "hadoop-minikdc", version.ref
= "hadoop-minikdc"}
@@ -194,6 +197,7 @@ kafka-clients = { group = "org.apache.kafka", name =
"kafka-clients", version.re
kafka = { group = "org.apache.kafka", name = "kafka_2.12", version.ref =
"kafka" }
curator-test = { group = "org.apache.curator", name = "curator-test",
version.ref = "curator"}
cglib = { group = "cglib", name = "cglib", version.ref = "cglib"}
+woodstox-core = { group = "com.fasterxml.woodstox", name = "woodstox-core",
version.ref = "woodstox-core"}
ranger-intg = { group = "org.apache.ranger", name = "ranger-intg", version.ref
= "ranger" }
javax-jaxb-api = { group = "javax.xml.bind", name = "jaxb-api", version.ref =
"javax-jaxb-api" }
@@ -204,6 +208,9 @@ mybatis = { group = "org.mybatis", name = "mybatis",
version.ref = "mybatis"}
h2db = { group = "com.h2database", name = "h2", version.ref = "h2db"}
awaitility = { group = "org.awaitility", name = "awaitility", version.ref =
"awaitility" }
servlet = { group = "javax.servlet", name = "javax.servlet-api", version.ref =
"servlet" }
+mail = { group = "javax.mail", name = "mail", version.ref = "mail" }
+rome = { group = "rome", name = "rome", version.ref = "rome" }
+jettison = { group = "org.codehaus.jettison", name = "jettison", version.ref =
"jettison" }
[bundles]
log4j = ["slf4j-api", "log4j-slf4j2-impl", "log4j-api", "log4j-core",
"log4j-12-api"]