This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 07cdcba44e [#5585] improvement(bundles): Refactor bundle jars and
provide core jars that does not contains hadoop-{aws,gcp,aliyun,azure} (#5806)
07cdcba44e is described below
commit 07cdcba44ed99cd90f109ba62df6d50ba67bbf59
Author: Qi Yu <[email protected]>
AuthorDate: Fri Dec 27 16:18:19 2024 +0800
[#5585] improvement(bundles): Refactor bundle jars and provide core jars
that does not contains hadoop-{aws,gcp,aliyun,azure} (#5806)
### What changes were proposed in this pull request?
Provide another kind of bundle jars that does not contains
hadoop-{aws,gcp,aliyun,azure} like aws-mini, gcp-mini.
### Why are the changes needed?
To make it works in a wide range of Hadoop version
Fix: #5585
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?
Existing UTs and ITs
---
.../authorization-chain/build.gradle.kts | 10 ++--
.../authorization-ranger/build.gradle.kts | 11 ++--
build.gradle.kts | 8 +--
bundles/aliyun-bundle/build.gradle.kts | 38 ++++---------
bundles/{aliyun-bundle => aliyun}/build.gradle.kts | 39 ++++++++-----
.../oss/credential/OSSSecretKeyProvider.java | 0
.../gravitino/oss/credential/OSSTokenProvider.java | 0
.../gravitino/oss/credential/policy/Condition.java | 0
.../gravitino/oss/credential/policy/Effect.java | 0
.../gravitino/oss/credential/policy/Policy.java | 0
.../gravitino/oss/credential/policy/Statement.java | 0
.../oss/credential/policy/StringLike.java | 0
.../gravitino/oss/fs/OSSFileSystemProvider.java | 0
....gravitino.catalog.hadoop.fs.FileSystemProvider | 0
....apache.gravitino.credential.CredentialProvider | 0
bundles/aws-bundle/build.gradle.kts | 24 +++-----
bundles/{aws-bundle => aws}/build.gradle.kts | 21 ++++---
.../s3/credential/S3SecretKeyProvider.java | 0
.../gravitino/s3/credential/S3TokenProvider.java | 0
.../gravitino/s3/fs/S3FileSystemProvider.java | 62 ++++++++++++++++++++-
....gravitino.catalog.hadoop.fs.FileSystemProvider | 0
....apache.gravitino.credential.CredentialProvider | 0
bundles/azure-bundle/build.gradle.kts | 25 ++-------
bundles/{azure-bundle => azure}/build.gradle.kts | 23 ++++----
.../abs/credential/ADLSLocationUtils.java | 0
.../abs/credential/ADLSTokenProvider.java | 0
.../abs/credential/AzureAccountKeyProvider.java | 0
.../gravitino/abs/fs/AzureFileSystemProvider.java | 0
....gravitino.catalog.hadoop.fs.FileSystemProvider | 0
....apache.gravitino.credential.CredentialProvider | 0
bundles/gcp-bundle/build.gradle.kts | 24 ++------
.../services => }/org.apache.hadoop.fs.FileSystem | 0
bundles/{gcp-bundle => gcp}/build.gradle.kts | 23 ++++----
.../gravitino/gcs/credential/GCSTokenProvider.java | 0
.../gravitino/gcs/fs/GCSFileSystemProvider.java | 7 +--
....gravitino.catalog.hadoop.fs.FileSystemProvider | 0
....apache.gravitino.credential.CredentialProvider | 0
.../apache/gravitino/catalog/hadoop/Constants.java | 8 ++-
catalogs/catalog-hadoop/build.gradle.kts | 47 ++++------------
catalogs/catalog-hive/build.gradle.kts | 3 +
catalogs/hadoop-common/build.gradle.kts | 5 +-
.../catalog/hadoop/fs/FileSystemUtils.java | 6 +-
.../filesystem-hadoop3-runtime/build.gradle.kts | 3 +
clients/filesystem-hadoop3/build.gradle.kts | 26 ++++-----
.../hadoop/GravitinoVirtualFileSystem.java | 24 ++++++++
docs/hadoop-catalog.md | 14 ++---
docs/how-to-use-gvfs.md | 65 +++++++++++++++++++---
gradle/libs.versions.toml | 12 ++--
iceberg/iceberg-rest-server/build.gradle.kts | 8 +--
integration-test-common/build.gradle.kts | 8 +--
settings.gradle.kts | 10 ++--
51 files changed, 318 insertions(+), 236 deletions(-)
diff --git a/authorizations/authorization-chain/build.gradle.kts
b/authorizations/authorization-chain/build.gradle.kts
index d5cd160742..e14cfa05ba 100644
--- a/authorizations/authorization-chain/build.gradle.kts
+++ b/authorizations/authorization-chain/build.gradle.kts
@@ -81,6 +81,7 @@ dependencies {
exclude("net.java.dev.jna")
exclude("javax.ws.rs")
exclude("org.eclipse.jetty")
+ exclude("org.apache.hadoop", "hadoop-common")
}
testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion")
testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion")
{
@@ -93,11 +94,10 @@ dependencies {
testImplementation("org.apache.kyuubi:kyuubi-spark-authz-shaded_$scalaVersion:$kyuubiVersion")
{
exclude("com.sun.jersey")
}
- testImplementation(libs.hadoop3.client)
- testImplementation(libs.hadoop3.common) {
- exclude("com.sun.jersey")
- exclude("javax.servlet", "servlet-api")
- }
+
+ testImplementation(libs.hadoop3.client.api)
+ testImplementation(libs.hadoop3.client.runtime)
+
testImplementation(libs.hadoop3.hdfs) {
exclude("com.sun.jersey")
exclude("javax.servlet", "servlet-api")
diff --git a/authorizations/authorization-ranger/build.gradle.kts
b/authorizations/authorization-ranger/build.gradle.kts
index d410b1ee8d..8cc82250c2 100644
--- a/authorizations/authorization-ranger/build.gradle.kts
+++ b/authorizations/authorization-ranger/build.gradle.kts
@@ -67,7 +67,12 @@ dependencies {
exclude("net.java.dev.jna")
exclude("javax.ws.rs")
exclude("org.eclipse.jetty")
+ // Conflicts with hadoop-client-api used in hadoop-catalog.
+ exclude("org.apache.hadoop", "hadoop-common")
}
+ implementation(libs.hadoop3.client.api)
+ implementation(libs.hadoop3.client.runtime)
+
implementation(libs.rome)
compileOnly(libs.lombok)
testRuntimeOnly(libs.junit.jupiter.engine)
@@ -92,11 +97,7 @@ dependencies {
testImplementation("org.apache.kyuubi:kyuubi-spark-authz-shaded_$scalaVersion:$kyuubiVersion")
{
exclude("com.sun.jersey")
}
- testImplementation(libs.hadoop3.client)
- testImplementation(libs.hadoop3.common) {
- exclude("com.sun.jersey")
- exclude("javax.servlet", "servlet-api")
- }
+
testImplementation(libs.hadoop3.hdfs) {
exclude("com.sun.jersey")
exclude("javax.servlet", "servlet-api")
diff --git a/build.gradle.kts b/build.gradle.kts
index c64997f3a9..154b4e7f77 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -779,7 +779,7 @@ tasks {
!it.name.startsWith("client") && !it.name.startsWith("filesystem") &&
!it.name.startsWith("spark") && !it.name.startsWith("iceberg") && it.name !=
"trino-connector" &&
it.name != "integration-test" && it.name != "bundled-catalog" &&
!it.name.startsWith("flink") &&
it.name != "integration-test" && it.name != "hive-metastore-common" &&
!it.name.startsWith("flink") &&
- it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name !=
"aws-bundle" && it.name != "azure-bundle" && it.name != "hadoop-common"
+ it.parent?.name != "bundles" && it.name != "hadoop-common"
) {
from(it.configurations.runtimeClasspath)
into("distribution/package/libs")
@@ -799,10 +799,8 @@ tasks {
!it.name.startsWith("integration-test") &&
!it.name.startsWith("flink") &&
!it.name.startsWith("trino-connector") &&
- it.name != "bundled-catalog" &&
- it.name != "hive-metastore-common" && it.name != "gcp-bundle" &&
- it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name !=
"azure-bundle" &&
- it.name != "hadoop-common" && it.name != "docs"
+ it.name != "hive-metastore-common" &&
+ it.name != "docs" && it.name != "hadoop-common" && it.parent?.name !=
"bundles"
) {
dependsOn("${it.name}:build")
from("${it.name}/build/libs")
diff --git a/bundles/aliyun-bundle/build.gradle.kts
b/bundles/aliyun-bundle/build.gradle.kts
index bc2d21a685..c837728559 100644
--- a/bundles/aliyun-bundle/build.gradle.kts
+++ b/bundles/aliyun-bundle/build.gradle.kts
@@ -25,32 +25,12 @@ plugins {
}
dependencies {
- compileOnly(project(":api"))
- compileOnly(project(":core"))
- compileOnly(project(":catalogs:catalog-common"))
- compileOnly(project(":catalogs:catalog-hadoop"))
- compileOnly(project(":catalogs:hadoop-common")) {
- exclude("*")
- }
- compileOnly(libs.hadoop3.common)
-
- implementation(libs.aliyun.credentials.sdk)
+ implementation(project(":bundles:aliyun"))
+ implementation(libs.commons.collections3)
+ implementation(libs.hadoop3.client.api)
+ implementation(libs.hadoop3.client.runtime)
implementation(libs.hadoop3.oss)
-
- // Aliyun oss SDK depends on this package, and JDK >= 9 requires manual add
- //
https://www.alibabacloud.com/help/en/oss/developer-reference/java-installation?spm=a2c63.p38356.0.i1
- implementation(libs.sun.activation)
-
- // oss needs StringUtils from commons-lang3 or the following error will
occur in 3.3.0
- // java.lang.NoClassDefFoundError: org/apache/commons/lang3/StringUtils
- //
org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystemStore.initialize(AliyunOSSFileSystemStore.java:111)
- //
org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem.initialize(AliyunOSSFileSystem.java:323)
- // org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3611)
- implementation(libs.commons.lang3)
-
- implementation(project(":catalogs:catalog-common")) {
- exclude("*")
- }
+ implementation(libs.httpclient)
}
tasks.withType(ShadowJar::class.java) {
@@ -60,8 +40,12 @@ tasks.withType(ShadowJar::class.java) {
mergeServiceFiles()
// Relocate dependencies to avoid conflicts
- relocate("org.jdom", "org.apache.gravitino.shaded.org.jdom")
- relocate("org.apache.commons.lang3",
"org.apache.gravitino.shaded.org.apache.commons.lang3")
+ relocate("org.jdom", "org.apache.gravitino.aliyun.shaded.org.jdom")
+ relocate("org.apache.commons.lang3",
"org.apache.gravitino.aliyun.shaded.org.apache.commons.lang3")
+ relocate("com.fasterxml.jackson",
"org.apache.gravitino.aliyun.shaded.com.fasterxml.jackson")
+ relocate("com.google.common",
"org.apache.gravitino.aliyun.shaded.com.google.common")
+ relocate("org.apache.http",
"org.apache.gravitino.aliyun.shaded.org.apache.http")
+ relocate("org.apache.commons.collections",
"org.apache.gravitino.aliyun.shaded.org.apache.commons.collections")
}
tasks.jar {
diff --git a/bundles/aliyun-bundle/build.gradle.kts
b/bundles/aliyun/build.gradle.kts
similarity index 70%
copy from bundles/aliyun-bundle/build.gradle.kts
copy to bundles/aliyun/build.gradle.kts
index bc2d21a685..f4d38d40b9 100644
--- a/bundles/aliyun-bundle/build.gradle.kts
+++ b/bundles/aliyun/build.gradle.kts
@@ -26,20 +26,22 @@ plugins {
dependencies {
compileOnly(project(":api"))
- compileOnly(project(":core"))
compileOnly(project(":catalogs:catalog-common"))
compileOnly(project(":catalogs:catalog-hadoop"))
- compileOnly(project(":catalogs:hadoop-common")) {
+ compileOnly(project(":core"))
+ compileOnly(libs.hadoop3.client.api)
+ compileOnly(libs.hadoop3.client.runtime)
+ compileOnly(libs.hadoop3.oss)
+
+ implementation(project(":catalogs:catalog-common")) {
+ exclude("*")
+ }
+ implementation(project(":catalogs:hadoop-common")) {
exclude("*")
}
- compileOnly(libs.hadoop3.common)
implementation(libs.aliyun.credentials.sdk)
- implementation(libs.hadoop3.oss)
-
- // Aliyun oss SDK depends on this package, and JDK >= 9 requires manual add
- //
https://www.alibabacloud.com/help/en/oss/developer-reference/java-installation?spm=a2c63.p38356.0.i1
- implementation(libs.sun.activation)
+ implementation(libs.commons.collections3)
// oss needs StringUtils from commons-lang3 or the following error will
occur in 3.3.0
// java.lang.NoClassDefFoundError: org/apache/commons/lang3/StringUtils
@@ -47,10 +49,17 @@ dependencies {
//
org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem.initialize(AliyunOSSFileSystem.java:323)
// org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3611)
implementation(libs.commons.lang3)
+ implementation(libs.guava)
- implementation(project(":catalogs:catalog-common")) {
- exclude("*")
- }
+ implementation(libs.httpclient)
+ implementation(libs.jackson.databind)
+ implementation(libs.jackson.annotations)
+ implementation(libs.jackson.datatype.jdk8)
+ implementation(libs.jackson.datatype.jsr310)
+
+ // Aliyun oss SDK depends on this package, and JDK >= 9 requires manual add
+ //
https://www.alibabacloud.com/help/en/oss/developer-reference/java-installation?spm=a2c63.p38356.0.i1
+ implementation(libs.sun.activation)
}
tasks.withType(ShadowJar::class.java) {
@@ -60,8 +69,12 @@ tasks.withType(ShadowJar::class.java) {
mergeServiceFiles()
// Relocate dependencies to avoid conflicts
- relocate("org.jdom", "org.apache.gravitino.shaded.org.jdom")
- relocate("org.apache.commons.lang3",
"org.apache.gravitino.shaded.org.apache.commons.lang3")
+ relocate("org.jdom", "org.apache.gravitino.aliyun.shaded.org.jdom")
+ relocate("org.apache.commons.lang3",
"org.apache.gravitino.aliyun.shaded.org.apache.commons.lang3")
+ relocate("com.fasterxml.jackson",
"org.apache.gravitino.aliyun.shaded.com.fasterxml.jackson")
+ relocate("com.google.common",
"org.apache.gravitino.aliyun.shaded.com.google.common")
+ relocate("org.apache.http",
"org.apache.gravitino.aliyun.shaded.org.apache.http")
+ relocate("org.apache.commons.collections",
"org.apache.gravitino.aliyun.shaded.org.apache.commons.collections")
}
tasks.jar {
diff --git
a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/OSSSecretKeyProvider.java
b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSSecretKeyProvider.java
similarity index 100%
rename from
bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/OSSSecretKeyProvider.java
rename to
bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSSecretKeyProvider.java
diff --git
a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java
b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java
similarity index 100%
rename from
bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java
rename to
bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/OSSTokenProvider.java
diff --git
a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Condition.java
b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Condition.java
similarity index 100%
rename from
bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Condition.java
rename to
bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Condition.java
diff --git
a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java
b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java
similarity index 100%
copy from
bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java
copy to
bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java
diff --git
a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Policy.java
b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Policy.java
similarity index 100%
rename from
bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Policy.java
rename to
bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Policy.java
diff --git
a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Statement.java
b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Statement.java
similarity index 100%
rename from
bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Statement.java
rename to
bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/Statement.java
diff --git
a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/StringLike.java
b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/StringLike.java
similarity index 100%
rename from
bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/StringLike.java
rename to
bundles/aliyun/src/main/java/org/apache/gravitino/oss/credential/policy/StringLike.java
diff --git
a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java
b/bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java
similarity index 100%
rename from
bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java
rename to
bundles/aliyun/src/main/java/org/apache/gravitino/oss/fs/OSSFileSystemProvider.java
diff --git
a/bundles/aliyun-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
b/bundles/aliyun/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
similarity index 100%
rename from
bundles/aliyun-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
rename to
bundles/aliyun/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
diff --git
a/bundles/aliyun-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
b/bundles/aliyun/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
similarity index 100%
rename from
bundles/aliyun-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
rename to
bundles/aliyun/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
diff --git a/bundles/aws-bundle/build.gradle.kts
b/bundles/aws-bundle/build.gradle.kts
index 3af5c8b4f3..35b1e22a4f 100644
--- a/bundles/aws-bundle/build.gradle.kts
+++ b/bundles/aws-bundle/build.gradle.kts
@@ -25,30 +25,20 @@ plugins {
}
dependencies {
- compileOnly(project(":api"))
- compileOnly(project(":core"))
- compileOnly(project(":catalogs:catalog-common"))
- compileOnly(project(":catalogs:catalog-hadoop"))
- compileOnly(project(":catalogs:hadoop-common")) {
- exclude("*")
- }
- compileOnly(libs.hadoop3.common)
-
- implementation(libs.aws.iam)
- implementation(libs.aws.policy)
- implementation(libs.aws.sts)
- implementation(libs.commons.lang3)
+ implementation(project(":bundles:aws"))
implementation(libs.hadoop3.aws)
- implementation(project(":catalogs:catalog-common")) {
- exclude("*")
- }
+ implementation(libs.hadoop3.client.api)
+ implementation(libs.hadoop3.client.runtime)
}
tasks.withType(ShadowJar::class.java) {
isZip64 = true
configurations = listOf(project.configurations.runtimeClasspath.get())
- relocate("org.apache.commons",
"org.apache.gravitino.aws.shaded.org.apache.commons")
archiveClassifier.set("")
+
+ relocate("org.apache.commons.lang3",
"org.apache.gravitino.aws.shaded.org.apache.commons.lang3")
+ relocate("com.google.common",
"org.apache.gravitino.aws.shaded.com.google.common")
+ relocate("com.fasterxml.jackson",
"org.apache.gravitino.aws.shaded.com.fasterxml.jackson")
}
tasks.jar {
diff --git a/bundles/aws-bundle/build.gradle.kts b/bundles/aws/build.gradle.kts
similarity index 78%
copy from bundles/aws-bundle/build.gradle.kts
copy to bundles/aws/build.gradle.kts
index 3af5c8b4f3..45fda5485d 100644
--- a/bundles/aws-bundle/build.gradle.kts
+++ b/bundles/aws/build.gradle.kts
@@ -26,29 +26,36 @@ plugins {
dependencies {
compileOnly(project(":api"))
- compileOnly(project(":core"))
compileOnly(project(":catalogs:catalog-common"))
compileOnly(project(":catalogs:catalog-hadoop"))
- compileOnly(project(":catalogs:hadoop-common")) {
+ compileOnly(project(":core"))
+ compileOnly(libs.hadoop3.aws)
+ compileOnly(libs.hadoop3.client.api)
+ compileOnly(libs.hadoop3.client.runtime)
+
+ implementation(project(":catalogs:catalog-common")) {
+ exclude("*")
+ }
+ implementation(project(":catalogs:hadoop-common")) {
exclude("*")
}
- compileOnly(libs.hadoop3.common)
implementation(libs.aws.iam)
implementation(libs.aws.policy)
implementation(libs.aws.sts)
implementation(libs.commons.lang3)
implementation(libs.hadoop3.aws)
- implementation(project(":catalogs:catalog-common")) {
- exclude("*")
- }
+ implementation(libs.guava)
}
tasks.withType(ShadowJar::class.java) {
isZip64 = true
configurations = listOf(project.configurations.runtimeClasspath.get())
- relocate("org.apache.commons",
"org.apache.gravitino.aws.shaded.org.apache.commons")
archiveClassifier.set("")
+
+ relocate("org.apache.commons.lang3",
"org.apache.gravitino.aws.shaded.org.apache.commons.lang3")
+ relocate("com.google.common",
"org.apache.gravitino.aws.shaded.com.google.common")
+ relocate("com.fasterxml.jackson",
"org.apache.gravitino.aws.shaded.com.fasterxml.jackson")
}
tasks.jar {
diff --git
a/bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/credential/S3SecretKeyProvider.java
b/bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3SecretKeyProvider.java
similarity index 100%
rename from
bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/credential/S3SecretKeyProvider.java
rename to
bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3SecretKeyProvider.java
diff --git
a/bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java
b/bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java
similarity index 100%
rename from
bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java
rename to
bundles/aws/src/main/java/org/apache/gravitino/s3/credential/S3TokenProvider.java
diff --git
a/bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java
b/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java
similarity index 53%
rename from
bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java
rename to
bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java
index 0d755c1f56..b7cd569bbf 100644
---
a/bundles/aws-bundle/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java
+++
b/bundles/aws/src/main/java/org/apache/gravitino/s3/fs/S3FileSystemProvider.java
@@ -19,9 +19,14 @@
package org.apache.gravitino.s3.fs;
+import com.amazonaws.auth.AWSCredentialsProvider;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
import java.io.IOException;
+import java.util.List;
import java.util.Map;
import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider;
import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils;
@@ -31,9 +36,13 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class S3FileSystemProvider implements FileSystemProvider {
+ private static final Logger LOGGER =
LoggerFactory.getLogger(S3FileSystemProvider.class);
+
@VisibleForTesting
public static final Map<String, String> GRAVITINO_KEY_TO_S3_HADOOP_KEY =
ImmutableMap.of(
@@ -41,20 +50,67 @@ public class S3FileSystemProvider implements
FileSystemProvider {
S3Properties.GRAVITINO_S3_ACCESS_KEY_ID, Constants.ACCESS_KEY,
S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, Constants.SECRET_KEY);
+ // We can't use Constants.AWS_CREDENTIALS_PROVIDER directly, as in 2.7, this
key does not exist.
+ private static final String S3_CREDENTIAL_KEY =
"fs.s3a.aws.credentials.provider";
+ private static final String S3_SIMPLE_CREDENTIAL =
+ "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider";
+
@Override
public FileSystem getFileSystem(Path path, Map<String, String> config)
throws IOException {
Configuration configuration = new Configuration();
Map<String, String> hadoopConfMap =
FileSystemUtils.toHadoopConfigMap(config,
GRAVITINO_KEY_TO_S3_HADOOP_KEY);
- if (!hadoopConfMap.containsKey(Constants.AWS_CREDENTIALS_PROVIDER)) {
- configuration.set(
- Constants.AWS_CREDENTIALS_PROVIDER,
Constants.ASSUMED_ROLE_CREDENTIALS_DEFAULT);
+ if (!hadoopConfMap.containsKey(S3_CREDENTIAL_KEY)) {
+ hadoopConfMap.put(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL);
}
+
hadoopConfMap.forEach(configuration::set);
+
+ // Hadoop-aws 2 does not support IAMInstanceCredentialsProvider
+ checkAndSetCredentialProvider(configuration);
+
return S3AFileSystem.newInstance(path.toUri(), configuration);
}
+ private void checkAndSetCredentialProvider(Configuration configuration) {
+ String provides = configuration.get(S3_CREDENTIAL_KEY);
+ if (provides == null) {
+ return;
+ }
+
+ Splitter splitter = Splitter.on(',').trimResults().omitEmptyStrings();
+ Joiner joiner = Joiner.on(",").skipNulls();
+ // Split the list of providers
+ List<String> providers = splitter.splitToList(provides);
+ List<String> validProviders = Lists.newArrayList();
+
+ for (String provider : providers) {
+ try {
+ Class<?> c = Class.forName(provider);
+ if (AWSCredentialsProvider.class.isAssignableFrom(c)) {
+ validProviders.add(provider);
+ } else {
+ LOGGER.warn(
+ "Credential provider {} is not a subclass of
AWSCredentialsProvider, skipping",
+ provider);
+ }
+ } catch (Exception e) {
+ LOGGER.warn(
+ "Credential provider {} not found in the Hadoop runtime, falling
back to default",
+ provider);
+ configuration.set(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL);
+ return;
+ }
+ }
+
+ if (validProviders.isEmpty()) {
+ configuration.set(S3_CREDENTIAL_KEY, S3_SIMPLE_CREDENTIAL);
+ } else {
+ configuration.set(S3_CREDENTIAL_KEY, joiner.join(validProviders));
+ }
+ }
+
/**
* Get the scheme of the FileSystem. Attention, for S3 the schema is "s3a",
not "s3". Users should
* use "s3a://..." to access S3.
diff --git
a/bundles/aws-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
b/bundles/aws/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
similarity index 100%
rename from
bundles/aws-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
rename to
bundles/aws/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
diff --git
a/bundles/aws-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
b/bundles/aws/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
similarity index 100%
rename from
bundles/aws-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
rename to
bundles/aws/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
diff --git a/bundles/azure-bundle/build.gradle.kts
b/bundles/azure-bundle/build.gradle.kts
index 9e4a4add54..7d9e253ac8 100644
--- a/bundles/azure-bundle/build.gradle.kts
+++ b/bundles/azure-bundle/build.gradle.kts
@@ -25,26 +25,10 @@ plugins {
}
dependencies {
- compileOnly(project(":api"))
- compileOnly(project(":core"))
- compileOnly(project(":catalogs:catalog-common"))
- compileOnly(project(":catalogs:catalog-hadoop"))
- compileOnly(project(":catalogs:hadoop-common")) {
- exclude("*")
- }
-
- compileOnly(libs.hadoop3.common)
-
- implementation(libs.azure.identity)
- implementation(libs.azure.storage.file.datalake)
-
- implementation(libs.commons.lang3)
- // runtime used
- implementation(libs.commons.logging)
+ implementation(project(":bundles:azure"))
implementation(libs.hadoop3.abs)
- implementation(project(":catalogs:catalog-common")) {
- exclude("*")
- }
+ implementation(libs.hadoop3.client.api)
+ implementation(libs.hadoop3.client.runtime)
}
tasks.withType(ShadowJar::class.java) {
@@ -56,7 +40,8 @@ tasks.withType(ShadowJar::class.java) {
relocate("org.apache.httpcomponents",
"org.apache.gravitino.azure.shaded.org.apache.httpcomponents")
relocate("org.apache.commons",
"org.apache.gravitino.azure.shaded.org.apache.commons")
relocate("com.fasterxml", "org.apache.gravitino.azure.shaded.com.fasterxml")
- relocate("com.google.guava",
"org.apache.gravitino.azure.shaded.com.google.guava")
+ relocate("com.google.common",
"org.apache.gravitino.azure.shaded.com.google.common")
+ relocate("org.eclipse.jetty",
"org.apache.gravitino.azure.shaded.org.eclipse.jetty")
}
tasks.jar {
diff --git a/bundles/azure-bundle/build.gradle.kts
b/bundles/azure/build.gradle.kts
similarity index 84%
copy from bundles/azure-bundle/build.gradle.kts
copy to bundles/azure/build.gradle.kts
index 9e4a4add54..59d8cf5f80 100644
--- a/bundles/azure-bundle/build.gradle.kts
+++ b/bundles/azure/build.gradle.kts
@@ -26,14 +26,19 @@ plugins {
dependencies {
compileOnly(project(":api"))
- compileOnly(project(":core"))
- compileOnly(project(":catalogs:catalog-common"))
compileOnly(project(":catalogs:catalog-hadoop"))
- compileOnly(project(":catalogs:hadoop-common")) {
+ compileOnly(project(":core"))
+
+ compileOnly(libs.hadoop3.abs)
+ compileOnly(libs.hadoop3.client.api)
+ compileOnly(libs.hadoop3.client.runtime)
+
+ implementation(project(":catalogs:catalog-common")) {
+ exclude("*")
+ }
+ implementation(project(":catalogs:hadoop-common")) {
exclude("*")
}
-
- compileOnly(libs.hadoop3.common)
implementation(libs.azure.identity)
implementation(libs.azure.storage.file.datalake)
@@ -41,10 +46,7 @@ dependencies {
implementation(libs.commons.lang3)
// runtime used
implementation(libs.commons.logging)
- implementation(libs.hadoop3.abs)
- implementation(project(":catalogs:catalog-common")) {
- exclude("*")
- }
+ implementation(libs.guava)
}
tasks.withType(ShadowJar::class.java) {
@@ -56,7 +58,8 @@ tasks.withType(ShadowJar::class.java) {
relocate("org.apache.httpcomponents",
"org.apache.gravitino.azure.shaded.org.apache.httpcomponents")
relocate("org.apache.commons",
"org.apache.gravitino.azure.shaded.org.apache.commons")
relocate("com.fasterxml", "org.apache.gravitino.azure.shaded.com.fasterxml")
- relocate("com.google.guava",
"org.apache.gravitino.azure.shaded.com.google.guava")
+ relocate("com.google.common",
"org.apache.gravitino.azure.shaded.com.google.common")
+ relocate("org.eclipse.jetty",
"org.apache.gravitino.azure.shaded.org.eclipse.jetty")
}
tasks.jar {
diff --git
a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/ADLSLocationUtils.java
b/bundles/azure/src/main/java/org/apache/gravitino/abs/credential/ADLSLocationUtils.java
similarity index 100%
rename from
bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/ADLSLocationUtils.java
rename to
bundles/azure/src/main/java/org/apache/gravitino/abs/credential/ADLSLocationUtils.java
diff --git
a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/ADLSTokenProvider.java
b/bundles/azure/src/main/java/org/apache/gravitino/abs/credential/ADLSTokenProvider.java
similarity index 100%
rename from
bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/ADLSTokenProvider.java
rename to
bundles/azure/src/main/java/org/apache/gravitino/abs/credential/ADLSTokenProvider.java
diff --git
a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/AzureAccountKeyProvider.java
b/bundles/azure/src/main/java/org/apache/gravitino/abs/credential/AzureAccountKeyProvider.java
similarity index 100%
rename from
bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/credential/AzureAccountKeyProvider.java
rename to
bundles/azure/src/main/java/org/apache/gravitino/abs/credential/AzureAccountKeyProvider.java
diff --git
a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java
b/bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java
similarity index 100%
rename from
bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java
rename to
bundles/azure/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java
diff --git
a/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
b/bundles/azure/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
similarity index 100%
rename from
bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
rename to
bundles/azure/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
diff --git
a/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
b/bundles/azure/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
similarity index 100%
rename from
bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
rename to
bundles/azure/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
diff --git a/bundles/gcp-bundle/build.gradle.kts
b/bundles/gcp-bundle/build.gradle.kts
index bae7411c75..73efaf9f22 100644
--- a/bundles/gcp-bundle/build.gradle.kts
+++ b/bundles/gcp-bundle/build.gradle.kts
@@ -25,25 +25,10 @@ plugins {
}
dependencies {
- compileOnly(project(":api"))
- compileOnly(project(":core"))
- compileOnly(project(":catalogs:catalog-common"))
- compileOnly(project(":catalogs:catalog-hadoop"))
- compileOnly(project(":catalogs:hadoop-common")) {
- exclude("*")
- }
-
- compileOnly(libs.hadoop3.common)
-
- implementation(libs.commons.lang3)
- // runtime used
- implementation(libs.commons.logging)
+ implementation(project(":bundles:gcp"))
+ implementation(libs.hadoop3.client.api)
+ implementation(libs.hadoop3.client.runtime)
implementation(libs.hadoop3.gcs)
- implementation(project(":catalogs:catalog-common")) {
- exclude("*")
- }
- implementation(libs.google.auth.http)
- implementation(libs.google.auth.credentials)
}
tasks.withType(ShadowJar::class.java) {
@@ -54,8 +39,9 @@ tasks.withType(ShadowJar::class.java) {
// Relocate dependencies to avoid conflicts
relocate("org.apache.httpcomponents",
"org.apache.gravitino.gcp.shaded.org.apache.httpcomponents")
relocate("org.apache.commons",
"org.apache.gravitino.gcp.shaded.org.apache.commons")
- relocate("com.google", "org.apache.gravitino.gcp.shaded.com.google")
+ relocate("com.google.common",
"org.apache.gravitino.gcp.shaded.com.google.common")
relocate("com.fasterxml", "org.apache.gravitino.gcp.shaded.com.fasterxml")
+ relocate("org.eclipse.jetty",
"org.apache.gravitino.gcp.shaded.org.eclipse.jetty")
}
tasks.jar {
diff --git
a/bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
b/bundles/gcp-bundle/src/main/resources/org.apache.hadoop.fs.FileSystem
similarity index 100%
rename from
bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
rename to bundles/gcp-bundle/src/main/resources/org.apache.hadoop.fs.FileSystem
diff --git a/bundles/gcp-bundle/build.gradle.kts b/bundles/gcp/build.gradle.kts
similarity index 86%
copy from bundles/gcp-bundle/build.gradle.kts
copy to bundles/gcp/build.gradle.kts
index bae7411c75..6f21dc3d5a 100644
--- a/bundles/gcp-bundle/build.gradle.kts
+++ b/bundles/gcp/build.gradle.kts
@@ -26,24 +26,24 @@ plugins {
dependencies {
compileOnly(project(":api"))
- compileOnly(project(":core"))
compileOnly(project(":catalogs:catalog-common"))
compileOnly(project(":catalogs:catalog-hadoop"))
- compileOnly(project(":catalogs:hadoop-common")) {
- exclude("*")
- }
+ compileOnly(project(":core"))
- compileOnly(libs.hadoop3.common)
+ compileOnly(libs.hadoop3.client.api)
+ compileOnly(libs.hadoop3.client.runtime)
- implementation(libs.commons.lang3)
- // runtime used
- implementation(libs.commons.logging)
- implementation(libs.hadoop3.gcs)
implementation(project(":catalogs:catalog-common")) {
exclude("*")
}
- implementation(libs.google.auth.http)
+ implementation(project(":catalogs:hadoop-common")) {
+ exclude("*")
+ }
+ implementation(libs.commons.lang3)
+ // runtime used
+ implementation(libs.commons.logging)
implementation(libs.google.auth.credentials)
+ implementation(libs.google.auth.http)
}
tasks.withType(ShadowJar::class.java) {
@@ -54,8 +54,9 @@ tasks.withType(ShadowJar::class.java) {
// Relocate dependencies to avoid conflicts
relocate("org.apache.httpcomponents",
"org.apache.gravitino.gcp.shaded.org.apache.httpcomponents")
relocate("org.apache.commons",
"org.apache.gravitino.gcp.shaded.org.apache.commons")
- relocate("com.google", "org.apache.gravitino.gcp.shaded.com.google")
+ relocate("com.google.common",
"org.apache.gravitino.gcp.shaded.com.google.common")
relocate("com.fasterxml", "org.apache.gravitino.gcp.shaded.com.fasterxml")
+ relocate("com.fasterxml.jackson",
"org.apache.gravitino.gcp.shaded.com.fasterxml.jackson")
}
tasks.jar {
diff --git
a/bundles/gcp-bundle/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java
b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java
similarity index 100%
rename from
bundles/gcp-bundle/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java
rename to
bundles/gcp/src/main/java/org/apache/gravitino/gcs/credential/GCSTokenProvider.java
diff --git
a/bundles/gcp-bundle/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java
b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java
similarity index 85%
rename from
bundles/gcp-bundle/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java
rename to
bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java
index a07ff3d6ec..0055e167c4 100644
---
a/bundles/gcp-bundle/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java
+++
b/bundles/gcp/src/main/java/org/apache/gravitino/gcs/fs/GCSFileSystemProvider.java
@@ -18,7 +18,6 @@
*/
package org.apache.gravitino.gcs.fs;
-import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import java.io.IOException;
@@ -29,11 +28,8 @@ import org.apache.gravitino.storage.GCSProperties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
public class GCSFileSystemProvider implements FileSystemProvider {
- private static final Logger LOGGER =
LoggerFactory.getLogger(GCSFileSystemProvider.class);
private static final String GCS_SERVICE_ACCOUNT_JSON_FILE =
"fs.gs.auth.service.account.json.keyfile";
@@ -46,8 +42,7 @@ public class GCSFileSystemProvider implements
FileSystemProvider {
Configuration configuration = new Configuration();
FileSystemUtils.toHadoopConfigMap(config, GRAVITINO_KEY_TO_GCS_HADOOP_KEY)
.forEach(configuration::set);
- LOGGER.info("Creating GCS file system with config: {}", config);
- return GoogleHadoopFileSystem.newInstance(path.toUri(), configuration);
+ return FileSystem.newInstance(path.toUri(), configuration);
}
@Override
diff --git
a/bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
b/bundles/gcp/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
similarity index 100%
rename from
bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
rename to
bundles/gcp/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
diff --git
a/bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
b/bundles/gcp/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
similarity index 100%
rename from
bundles/gcp-bundle/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
rename to
bundles/gcp/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider
diff --git
a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java
b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/hadoop/Constants.java
similarity index 78%
rename from
bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java
rename to
catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/hadoop/Constants.java
index 5906cb13c8..468728362b 100644
---
a/bundles/aliyun-bundle/src/main/java/org/apache/gravitino/oss/credential/policy/Effect.java
+++
b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/hadoop/Constants.java
@@ -17,8 +17,10 @@
* under the License.
*/
-package org.apache.gravitino.oss.credential.policy;
+package org.apache.gravitino.catalog.hadoop;
-public class Effect {
- public static final String ALLOW = "Allow";
+public class Constants {
+
+ public static final String BUILTIN_LOCAL_FS_PROVIDER = "builtin-local";
+ public static final String BUILTIN_HDFS_FS_PROVIDER = "builtin-hdfs";
}
diff --git a/catalogs/catalog-hadoop/build.gradle.kts
b/catalogs/catalog-hadoop/build.gradle.kts
index 8873b79504..d599a5e72f 100644
--- a/catalogs/catalog-hadoop/build.gradle.kts
+++ b/catalogs/catalog-hadoop/build.gradle.kts
@@ -28,43 +28,22 @@ dependencies {
implementation(project(":api")) {
exclude(group = "*")
}
-
- implementation(project(":core")) {
- exclude(group = "*")
- }
-
- implementation(project(":common")) {
- exclude(group = "*")
- }
-
implementation(project(":catalogs:catalog-common")) {
exclude(group = "*")
}
-
implementation(project(":catalogs:hadoop-common")) {
exclude(group = "*")
}
-
- implementation(libs.hadoop3.common) {
- exclude("com.sun.jersey")
- exclude("javax.servlet", "servlet-api")
- exclude("org.eclipse.jetty", "*")
- exclude("org.apache.hadoop", "hadoop-auth")
- exclude("org.apache.curator", "curator-client")
- exclude("org.apache.curator", "curator-framework")
- exclude("org.apache.curator", "curator-recipes")
- exclude("org.apache.avro", "avro")
- exclude("com.sun.jersey", "jersey-servlet")
+ implementation(project(":common")) {
+ exclude(group = "*")
}
-
- implementation(libs.hadoop3.client) {
- exclude("org.apache.hadoop", "hadoop-mapreduce-client-core")
- exclude("org.apache.hadoop", "hadoop-mapreduce-client-jobclient")
- exclude("org.apache.hadoop", "hadoop-yarn-api")
- exclude("org.apache.hadoop", "hadoop-yarn-client")
- exclude("com.squareup.okhttp", "okhttp")
+ implementation(project(":core")) {
+ exclude(group = "*")
}
-
+ implementation(libs.commons.lang3)
+ implementation(libs.commons.io)
+ implementation(libs.hadoop3.client.api)
+ implementation(libs.hadoop3.client.runtime)
implementation(libs.hadoop3.hdfs) {
exclude("com.sun.jersey")
exclude("javax.servlet", "servlet-api")
@@ -74,20 +53,18 @@ dependencies {
exclude("io.netty")
exclude("org.fusesource.leveldbjni")
}
-
implementation(libs.slf4j.api)
compileOnly(libs.guava)
- testImplementation(project(":bundles:aws-bundle"))
- testImplementation(project(":bundles:gcp-bundle"))
- testImplementation(project(":bundles:aliyun-bundle"))
- testImplementation(project(":bundles:azure-bundle"))
testImplementation(project(":clients:client-java"))
+ testImplementation(project(":bundles:aws-bundle", configuration = "shadow"))
+ testImplementation(project(":bundles:gcp-bundle", configuration = "shadow"))
+ testImplementation(project(":bundles:aliyun-bundle", configuration =
"shadow"))
+ testImplementation(project(":bundles:azure-bundle", configuration =
"shadow"))
testImplementation(project(":integration-test-common", "testArtifacts"))
testImplementation(project(":server"))
testImplementation(project(":server-common"))
-
testImplementation(libs.bundles.log4j)
testImplementation(libs.hadoop3.gcs)
testImplementation(libs.hadoop3.minicluster)
diff --git a/catalogs/catalog-hive/build.gradle.kts
b/catalogs/catalog-hive/build.gradle.kts
index b471fccead..6a8b815ab9 100644
--- a/catalogs/catalog-hive/build.gradle.kts
+++ b/catalogs/catalog-hive/build.gradle.kts
@@ -96,6 +96,9 @@ dependencies {
testImplementation(project(":integration-test-common", "testArtifacts"))
testImplementation(project(":server"))
testImplementation(project(":server-common"))
+ testImplementation(project(":catalogs:hadoop-common")) {
+ exclude("*")
+ }
testImplementation(libs.bundles.jetty)
testImplementation(libs.bundles.jersey)
diff --git a/catalogs/hadoop-common/build.gradle.kts
b/catalogs/hadoop-common/build.gradle.kts
index ab768cb1f1..566ce5986e 100644
--- a/catalogs/hadoop-common/build.gradle.kts
+++ b/catalogs/hadoop-common/build.gradle.kts
@@ -23,6 +23,9 @@ plugins {
// try to avoid adding extra dependencies because it is used by catalogs and
connectors.
dependencies {
+ implementation(project(":catalogs:catalog-common"))
implementation(libs.commons.lang3)
- implementation(libs.hadoop3.common)
+ implementation(libs.hadoop3.client.api)
+ implementation(libs.hadoop3.client.runtime)
+ implementation(libs.guava)
}
diff --git
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java
b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java
similarity index 95%
rename from
catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java
rename to
catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java
index 129a8e8827..a1434e85c3 100644
---
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java
+++
b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/FileSystemUtils.java
@@ -18,8 +18,8 @@
*/
package org.apache.gravitino.catalog.hadoop.fs;
-import static
org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.BUILTIN_HDFS_FS_PROVIDER;
-import static
org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.BUILTIN_LOCAL_FS_PROVIDER;
+import static
org.apache.gravitino.catalog.hadoop.Constants.BUILTIN_HDFS_FS_PROVIDER;
+import static
org.apache.gravitino.catalog.hadoop.Constants.BUILTIN_LOCAL_FS_PROVIDER;
import static
org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider.GRAVITINO_BYPASS;
import com.google.common.collect.Maps;
@@ -45,7 +45,7 @@ public class FileSystemUtils {
fileSystemProviders != null
? Arrays.stream(fileSystemProviders.split(","))
.map(f -> f.trim().toLowerCase(Locale.ROOT))
- .collect(java.util.stream.Collectors.toSet())
+ .collect(Collectors.toSet())
: Sets.newHashSet();
// Add built-in file system providers to the use list automatically.
diff --git a/clients/filesystem-hadoop3-runtime/build.gradle.kts
b/clients/filesystem-hadoop3-runtime/build.gradle.kts
index 8081a55604..db439a4981 100644
--- a/clients/filesystem-hadoop3-runtime/build.gradle.kts
+++ b/clients/filesystem-hadoop3-runtime/build.gradle.kts
@@ -28,6 +28,7 @@ plugins {
dependencies {
implementation(project(":clients:filesystem-hadoop3"))
implementation(project(":clients:client-java-runtime", configuration =
"shadow"))
+ implementation(libs.commons.lang3)
}
tasks.withType<ShadowJar>(ShadowJar::class.java) {
@@ -38,6 +39,8 @@ tasks.withType<ShadowJar>(ShadowJar::class.java) {
// Relocate dependencies to avoid conflicts
relocate("com.google", "org.apache.gravitino.shaded.com.google")
relocate("com.github.benmanes.caffeine",
"org.apache.gravitino.shaded.com.github.benmanes.caffeine")
+ // relocate common lang3 package
+ relocate("org.apache.commons.lang3",
"org.apache.gravitino.shaded.org.apache.commons.lang3")
}
tasks.jar {
diff --git a/clients/filesystem-hadoop3/build.gradle.kts
b/clients/filesystem-hadoop3/build.gradle.kts
index d24eb4efdf..424f6a1140 100644
--- a/clients/filesystem-hadoop3/build.gradle.kts
+++ b/clients/filesystem-hadoop3/build.gradle.kts
@@ -25,7 +25,8 @@ plugins {
dependencies {
compileOnly(project(":clients:client-java-runtime", configuration =
"shadow"))
- compileOnly(libs.hadoop3.common)
+ compileOnly(libs.hadoop3.client.api)
+ compileOnly(libs.hadoop3.client.runtime)
implementation(project(":catalogs:catalog-common")) {
exclude(group = "*")
@@ -35,32 +36,31 @@ dependencies {
}
implementation(libs.caffeine)
+ implementation(libs.guava)
+ implementation(libs.commons.lang3)
testImplementation(project(":api"))
testImplementation(project(":core"))
+ testImplementation(project(":catalogs:catalog-hadoop"))
testImplementation(project(":common"))
testImplementation(project(":server"))
testImplementation(project(":server-common"))
testImplementation(project(":clients:client-java"))
testImplementation(project(":integration-test-common", "testArtifacts"))
- testImplementation(project(":catalogs:catalog-hadoop"))
- testImplementation(project(":bundles:gcp-bundle"))
- testImplementation(project(":bundles:aliyun-bundle"))
- testImplementation(project(":bundles:aws-bundle"))
- testImplementation(project(":bundles:azure-bundle"))
- testImplementation(project(":bundles:gcp-bundle"))
+
+ testImplementation(project(":bundles:aws-bundle", configuration = "shadow"))
+ testImplementation(project(":bundles:gcp-bundle", configuration = "shadow"))
+ testImplementation(project(":bundles:aliyun-bundle", configuration =
"shadow"))
+ testImplementation(project(":bundles:azure-bundle", configuration =
"shadow"))
testImplementation(libs.awaitility)
testImplementation(libs.bundles.jetty)
testImplementation(libs.bundles.jersey)
testImplementation(libs.bundles.jwt)
- testImplementation(libs.guava)
- testImplementation(libs.hadoop3.client)
- testImplementation(libs.hadoop3.common) {
- exclude("com.sun.jersey")
- exclude("javax.servlet", "servlet-api")
- }
+ testImplementation(libs.hadoop3.client.api)
+ testImplementation(libs.hadoop3.client.runtime)
+
testImplementation(libs.hadoop3.hdfs) {
exclude("com.sun.jersey")
exclude("javax.servlet", "servlet-api")
diff --git
a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
index e18e376b46..a9c40e5584 100644
---
a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
+++
b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
@@ -40,6 +40,7 @@ import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.reflect.FieldUtils;
import org.apache.gravitino.NameIdentifier;
import org.apache.gravitino.audit.CallerContext;
import org.apache.gravitino.audit.FilesetAuditConstants;
@@ -392,6 +393,11 @@ public class GravitinoVirtualFileSystem extends FileSystem
{
scheme,
GravitinoVirtualFileSystemConfiguration.GVFS_SCHEME);
}
+ // Reset the FileSystem service loader to make sure the
FileSystem will reload the
+ // service file systems, this is a temporary solution to fix
the issue
+ // https://github.com/apache/gravitino/issues/5609
+ resetFileSystemServiceLoader(scheme);
+
Map<String, String> maps = getConfigMap(getConf());
return provider.getFileSystem(filePath, maps);
} catch (IOException ioe) {
@@ -404,6 +410,24 @@ public class GravitinoVirtualFileSystem extends FileSystem
{
return new FilesetContextPair(new Path(actualFileLocation), fs);
}
+ private void resetFileSystemServiceLoader(String fsScheme) {
+ try {
+ Map<String, Class<? extends FileSystem>> serviceFileSystems =
+ (Map<String, Class<? extends FileSystem>>)
+ FieldUtils.getField(FileSystem.class, "SERVICE_FILE_SYSTEMS",
true).get(null);
+
+ if (serviceFileSystems.containsKey(fsScheme)) {
+ return;
+ }
+
+ // Set this value to false so that FileSystem will reload the service
file systems when
+ // needed.
+ FieldUtils.getField(FileSystem.class, "FILE_SYSTEMS_LOADED",
true).set(null, false);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
private Map<String, String> getConfigMap(Configuration configuration) {
Map<String, String> maps = Maps.newHashMap();
configuration.forEach(entry -> maps.put(entry.getKey(), entry.getValue()));
diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md
index ce58826cb9..9048556ffa 100644
--- a/docs/hadoop-catalog.md
+++ b/docs/hadoop-catalog.md
@@ -10,10 +10,8 @@ license: "This software is licensed under the Apache License
version 2."
Hadoop catalog is a fileset catalog that using Hadoop Compatible File System
(HCFS) to manage
the storage location of the fileset. Currently, it supports local filesystem
and HDFS. For
-object storage like S3, GCS, and Azure Blob Storage, you can put the hadoop
object store jar like
-hadoop-aws into the `$GRAVITINO_HOME/catalogs/hadoop/libs` directory to enable
the support.
-Gravitino itself hasn't yet tested the object storage support, so if you have
any issue,
-please create an [issue](https://github.com/apache/gravitino/issues).
+object storage like S3, GCS, Azure Blob Storage and OSS, you can put the
hadoop object store jar like
+`gravitino-aws-bundle-{gravitino-version}.jar` into the
`$GRAVITINO_HOME/catalogs/hadoop/libs` directory to enable the support.
Note that Gravitino uses Hadoop 3 dependencies to build Hadoop catalog.
Theoretically, it should be
compatible with both Hadoop 2.x and 3.x, since Gravitino doesn't leverage any
new features in
@@ -52,7 +50,7 @@ Apart from the above properties, to access fileset like HDFS,
S3, GCS, OSS or cu
| `s3-access-key-id` | The access key of the AWS S3.
| (none) | Yes if it's a S3 fileset. |
0.7.0-incubating |
| `s3-secret-access-key` | The secret key of the AWS S3.
| (none) | Yes if it's a S3 fileset. |
0.7.0-incubating |
-At the same time, you need to place the corresponding bundle jar
[`gravitino-aws-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aws-bundle/)
in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
+At the same time, you need to place the corresponding bundle jar
[`gravitino-aws-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aws-bundle/)
in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
#### GCS fileset
@@ -62,7 +60,7 @@ At the same time, you need to place the corresponding bundle
jar [`gravitino-aws
| `default-filesystem-provider` | The name default filesystem providers of
this Hadoop catalog if users do not specify the scheme in the URI. Default
value is `builtin-local`, for GCS, if we set this value, we can omit the prefix
'gs://' in the location. | `builtin-local` | No |
0.7.0-incubating |
| `gcs-service-account-file` | The path of GCS service account JSON file.
| (none) | Yes if it's a GCS fileset. |
0.7.0-incubating |
-In the meantime, you need to place the corresponding bundle jar
[`gravitino-gcp-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gcp-bundle/)
in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
+In the meantime, you need to place the corresponding bundle jar
[`gravitino-gcp-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-gcp-bundle/)
in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
#### OSS fileset
@@ -74,7 +72,7 @@ In the meantime, you need to place the corresponding bundle
jar [`gravitino-gcp-
| `oss-access-key-id` | The access key of the Aliyun OSS.
| (none) | Yes if it's a OSS fileset. |
0.7.0-incubating |
| `oss-secret-access-key` | The secret key of the Aliyun OSS.
| (none) | Yes if it's a OSS fileset. |
0.7.0-incubating |
-In the meantime, you need to place the corresponding bundle jar
[`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aliyun-bundle/)
in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
+In the meantime, you need to place the corresponding bundle jar
[`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aliyun-bundle/)
in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
#### Azure Blob Storage fileset
@@ -86,7 +84,7 @@ In the meantime, you need to place the corresponding bundle
jar [`gravitino-aliy
| `azure-storage-account-name ` | The account name of Azure Blob Storage.
| (none) | Yes if it's a Azure
Blob Storage fileset. | 0.8.0-incubating |
| `azure-storage-account-key` | The account key of Azure Blob Storage.
| (none) | Yes if it's a Azure
Blob Storage fileset. | 0.8.0-incubating |
-Similar to the above, you need to place the corresponding bundle jar
[`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/)
in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
+Similar to the above, you need to place the corresponding bundle jar
[`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-azure-bundle/)
in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
:::note
- Gravitino contains builtin file system providers for local file
system(`builtin-local`) and HDFS(`builtin-hdfs`), that is to say if
`filesystem-providers` is not set, Gravitino will still support local file
system and HDFS. Apart from that, you can set the `filesystem-providers` to
support other file systems like S3, GCS, OSS or custom file system.
diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md
index 162d535be1..0dbfd867a3 100644
--- a/docs/how-to-use-gvfs.md
+++ b/docs/how-to-use-gvfs.md
@@ -77,7 +77,9 @@ Apart from the above properties, to access fileset like S3,
GCS, OSS and custom
| `s3-access-key-id` | The access key of the AWS S3.
| (none) |
Yes if it's a S3 fileset.| 0.7.0-incubating |
| `s3-secret-access-key` | The secret key of the AWS S3.
| (none) |
Yes if it's a S3 fileset.| 0.7.0-incubating |
-At the same time, you need to place the corresponding bundle jar
[`gravitino-aws-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aws-bundle/)
in the Hadoop environment(typically located in
`${HADOOP_HOME}/share/hadoop/common/lib/`).
+At the same time, you need to add the corresponding bundle jar
+1.
[`gravitino-aws-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aws-bundle/)
in the classpath if no hadoop environment is available, or
+2.
[`gravitino-aws-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aws/)
and hadoop-aws jar and other necessary dependencies in the classpath.
#### GCS fileset
@@ -86,7 +88,9 @@ At the same time, you need to place the corresponding bundle
jar [`gravitino-aws
|--------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|---------------------------|------------------|
| `gcs-service-account-file` | The path of GCS service account JSON file.
| (none) |
Yes if it's a GCS fileset.| 0.7.0-incubating |
-In the meantime, you need to place the corresponding bundle jar
[`gravitino-gcp-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gcp-bundle/)
in the Hadoop environment(typically located in
`${HADOOP_HOME}/share/hadoop/common/lib/`).
+In the meantime, you need to add the corresponding bundle jar
+1.
[`gravitino-gcp-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-gcp-bundle/)
in the classpath if no hadoop environment is available, or
+2. or
[`gravitino-gcp-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-gcp/)
and [gcs-connector
jar](https://github.com/GoogleCloudDataproc/hadoop-connectors/releases) and
other necessary dependencies in the classpath.
#### OSS fileset
@@ -97,7 +101,10 @@ In the meantime, you need to place the corresponding bundle
jar [`gravitino-gcp-
| `oss-access-key-id` | The access key of the Aliyun OSS.
| (none)
| Yes if it's a OSS fileset.| 0.7.0-incubating |
| `oss-secret-access-key` | The secret key of the Aliyun OSS.
| (none)
| Yes if it's a OSS fileset.| 0.7.0-incubating |
-In the meantime, you need to place the corresponding bundle jar
[`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aliyun-bundle/)
in the Hadoop environment(typically located in
`${HADOOP_HOME}/share/hadoop/common/lib/`).
+
+In the meantime, you need to place the corresponding bundle jar
+1.
[`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aliyun-bundle/)
in the classpath if no hadoop environment is available, or
+2.
[`gravitino-aliyun-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aliyun/)
and hadoop-aliyun jar and other necessary dependencies in the classpath.
#### Azure Blob Storage fileset
@@ -106,7 +113,9 @@ In the meantime, you need to place the corresponding bundle
jar [`gravitino-aliy
| `azure-storage-account-name` | The account name of Azure Blob Storage.
| (none) | Yes if it's a Azure Blob Storage fileset. |
0.8.0-incubating |
| `azure-storage-account-key` | The account key of Azure Blob Storage.
| (none) | Yes if it's a Azure Blob Storage fileset. |
0.8.0-incubating |
-Similar to the above, you need to place the corresponding bundle jar
[`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/)
in the Hadoop environment(typically located in
`${HADOOP_HOME}/share/hadoop/common/lib/`).
+Similar to the above, you need to place the corresponding bundle jar
+1.
[`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-azure-bundle/)
in the classpath if no hadoop environment is available, or
+2.
[`gravitino-azure-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-azure/)
and hadoop-azure jar and other necessary dependencies in the classpath.
#### Custom fileset
Since 0.7.0-incubating, users can define their own fileset type and configure
the corresponding properties, for more, please refer to [Custom
Fileset](./hadoop-catalog.md#how-to-custom-your-own-hcfs-file-system-fileset).
@@ -137,8 +146,13 @@ You can configure these properties in two ways:
```
:::note
-If you want to access the S3, GCS, OSS or custom fileset through GVFS, apart
from the above properties, you need to place the corresponding bundle jar in
the Hadoop environment.
-For example if you want to access the S3 fileset, you need to place the S3
bundle jar
[`gravitino-aws-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aws-bundle/)
in the Hadoop environment(typically located in
`${HADOOP_HOME}/share/hadoop/common/lib/`) or add it to the classpath.
+If you want to access the S3, GCS, OSS or custom fileset through GVFS, apart
from the above properties, you need to place the corresponding bundle jars in
the Hadoop environment.
+For example, if you want to access the S3 fileset, you need to place
+1. The aws hadoop bundle jar
[`gravitino-aws-bundle-${gravitino-version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aws-bundle/)
+2. or
[`gravitino-aws-${gravitino-version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-aws/),
and hadoop-aws jar and other necessary dependencies
+
+to the classpath, it typically locates in
`${HADOOP_HOME}/share/hadoop/common/lib/`).
+
:::
2. Configure the properties in the `core-site.xml` file of the Hadoop
environment:
@@ -212,6 +226,12 @@ cp gravitino-filesystem-hadoop3-runtime-{version}.jar
${HADOOP_HOME}/share/hadoo
# You need to ensure that the Kerberos has permission on the HDFS directory.
kinit -kt your_kerberos.keytab [email protected]
+
+# 4. Copy other dependencies to the Hadoop environment if you want to access
the S3 fileset via GVFS
+cp bundles/aws-bundle/build/libs/gravitino-aws-bundle-{version}.jar
${HADOOP_HOME}/share/hadoop/common/lib/
+cp
clients/filesystem-hadoop3-runtime/build/libs/gravitino-filesystem-hadoop3-runtime-{version}-SNAPSHOT.jar
${HADOOP_HOME}/share/hadoop/common/lib/
+cp ${HADOOP_HOME}/share/hadoop/tools/lib/*
${HADOOP_HOME}/share/hadoop/common/lib/
+
# 4. Try to list the fileset
./${HADOOP_HOME}/bin/hadoop dfs -ls
gvfs://fileset/test_catalog/test_schema/test_fileset_1
```
@@ -222,6 +242,36 @@ You can also perform operations on the files or
directories managed by fileset t
Make sure that your code is using the correct Hadoop environment, and that
your environment
has the `gravitino-filesystem-hadoop3-runtime-{version}.jar` dependency.
+```xml
+
+<dependency>
+ <groupId>org.apache.gravitino</groupId>
+ <artifactId>filesystem-hadoop3-runtime</artifactId>
+ <version>{gravitino-version}</version>
+</dependency>
+
+<!-- Use the following one if there is not hadoop environment -->
+<dependency>
+ <groupId>org.apache.gravitino</groupId>
+ <artifactId>gravitino-aws-bundle</artifactId>
+ <version>{gravitino-version}</version>
+</dependency>
+
+<!-- Use the following one if there already have hadoop environment -->
+<dependency>
+ <groupId>org.apache.gravitino</groupId>
+ <artifactId>gravitino-aws</artifactId>
+ <version>{gravitino-version}</version>
+</dependency>
+
+<dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-aws</artifactId>
+ <version>{hadoop-version}</version>
+</dependency>
+
+```
+
For example:
```java
@@ -462,8 +512,7 @@ from gravitino import gvfs
options = {
"cache_size": 20,
"cache_expired_time": 3600,
- "auth_type": "simple"
-
+ "auth_type": "simple",
# Optional, the following properties are required if you want to access
the S3 fileset via GVFS python client, for GCS and OSS fileset, you should set
the corresponding properties.
"s3_endpoint": "http://localhost:9000",
"s3_access_key_id": "minio",
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index a33c300ee8..52bccd9b48 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -36,12 +36,13 @@ airlift-json = "237"
airlift-resolver = "1.6"
hive2 = "2.3.9"
hadoop2 = "2.10.2"
-hadoop3 = "3.3.0"
+hadoop3 = "3.3.1"
hadoop3-gcs = "1.9.4-hadoop3"
-hadoop3-abs = "3.3.0"
-hadoop3-aliyun = "3.3.0"
-hadoop-minikdc = "3.3.0"
+hadoop3-abs = "3.3.1"
+hadoop3-aliyun = "3.3.1"
+hadoop-minikdc = "3.3.1"
htrace-core4 = "4.1.0-incubating"
+httpclient = "4.4.1"
httpclient5 = "5.2.1"
mockserver = "5.15.0"
commons-csv = "1.12.0"
@@ -177,6 +178,8 @@ hadoop3-aws = { group = "org.apache.hadoop", name =
"hadoop-aws", version.ref =
hadoop3-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs",
version.ref = "hadoop3" }
hadoop3-common = { group = "org.apache.hadoop", name = "hadoop-common",
version.ref = "hadoop3"}
hadoop3-client = { group = "org.apache.hadoop", name = "hadoop-client",
version.ref = "hadoop3"}
+hadoop3-client-api = { group = "org.apache.hadoop", name =
"hadoop-client-api", version.ref = "hadoop3"}
+hadoop3-client-runtime = { group = "org.apache.hadoop", name =
"hadoop-client-runtime", version.ref = "hadoop3"}
hadoop3-minicluster = { group = "org.apache.hadoop", name =
"hadoop-minicluster", version.ref = "hadoop-minikdc"}
hadoop3-gcs = { group = "com.google.cloud.bigdataoss", name = "gcs-connector",
version.ref = "hadoop3-gcs"}
hadoop3-oss = { group = "org.apache.hadoop", name = "hadoop-aliyun",
version.ref = "hadoop3-aliyun"}
@@ -184,6 +187,7 @@ hadoop3-abs = { group = "org.apache.hadoop", name =
"hadoop-azure", version.ref
htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4",
version.ref = "htrace-core4" }
airlift-json = { group = "io.airlift", name = "json", version.ref =
"airlift-json"}
airlift-resolver = { group = "io.airlift.resolver", name = "resolver",
version.ref = "airlift-resolver"}
+httpclient = { group = "org.apache.httpcomponents", name = "httpclient",
version.ref = "httpclient" }
httpclient5 = { group = "org.apache.httpcomponents.client5", name =
"httpclient5", version.ref = "httpclient5" }
mockserver-netty = { group = "org.mock-server", name = "mockserver-netty",
version.ref = "mockserver" }
mockserver-client-java = { group = "org.mock-server", name =
"mockserver-client-java", version.ref = "mockserver" }
diff --git a/iceberg/iceberg-rest-server/build.gradle.kts
b/iceberg/iceberg-rest-server/build.gradle.kts
index 03fe32c92a..fe35c4e778 100644
--- a/iceberg/iceberg-rest-server/build.gradle.kts
+++ b/iceberg/iceberg-rest-server/build.gradle.kts
@@ -62,10 +62,10 @@ dependencies {
annotationProcessor(libs.lombok)
compileOnly(libs.lombok)
- testImplementation(project(":bundles:aliyun-bundle"))
- testImplementation(project(":bundles:aws-bundle"))
- testImplementation(project(":bundles:gcp-bundle", configuration = "shadow"))
- testImplementation(project(":bundles:azure-bundle"))
+ testImplementation(project(":bundles:aliyun"))
+ testImplementation(project(":bundles:aws"))
+ testImplementation(project(":bundles:gcp", configuration = "shadow"))
+ testImplementation(project(":bundles:azure", configuration = "shadow"))
testImplementation(project(":integration-test-common", "testArtifacts"))
testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion")
diff --git a/integration-test-common/build.gradle.kts
b/integration-test-common/build.gradle.kts
index 283169a76a..bd15dc2a34 100644
--- a/integration-test-common/build.gradle.kts
+++ b/integration-test-common/build.gradle.kts
@@ -53,11 +53,11 @@ dependencies {
exclude("org.elasticsearch")
exclude("org.elasticsearch.client")
exclude("org.elasticsearch.plugin")
+ exclude("org.apache.hadoop", "hadoop-common")
}
- testImplementation(libs.hadoop3.common) {
- exclude("com.sun.jersey")
- exclude("javax.servlet", "servlet-api")
- }
+ testImplementation(libs.hadoop3.client.api)
+ testImplementation(libs.hadoop3.client.runtime)
+
testImplementation(platform("org.junit:junit-bom:5.9.1"))
testImplementation("org.junit.jupiter:junit-jupiter")
}
diff --git a/settings.gradle.kts b/settings.gradle.kts
index 562614764b..c865e14e7a 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -77,8 +77,8 @@ project(":spark-connector:spark-runtime-3.5").projectDir =
file("spark-connector
include("web:web", "web:integration-test")
include("docs")
include("integration-test-common")
-include(":bundles:aws-bundle")
-include(":bundles:gcp-bundle")
-include(":bundles:aliyun-bundle")
-include(":bundles:azure-bundle")
-include("catalogs:hadoop-common")
+include(":bundles:aws", ":bundles:aws-bundle")
+include(":bundles:gcp", ":bundles:gcp-bundle")
+include(":bundles:aliyun", ":bundles:aliyun-bundle")
+include(":bundles:azure", ":bundles:azure-bundle")
+include(":catalogs:hadoop-common")