This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 79c362c4e [#5492] feat(hadoop-catalog): Support Azure blob storage for
Gravitino server and GVFS Java client (#5508)
79c362c4e is described below
commit 79c362c4eb54fa0bf819fee9f60632f3436ba80c
Author: Qi Yu <[email protected]>
AuthorDate: Thu Nov 14 21:54:17 2024 +0800
[#5492] feat(hadoop-catalog): Support Azure blob storage for Gravitino
server and GVFS Java client (#5508)
### What changes were proposed in this pull request?
Add support for Support Azure blob storage for Gravitino server and GVFS
Java client
### Why are the changes needed?
It's a big improvement for fileset usage.
Fix: #5492
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?
ITs
---------
Co-authored-by: Jerry Shao <[email protected]>
---
LICENSE.bin | 1 +
build.gradle.kts | 4 +-
bundles/azure-bundle/build.gradle.kts | 62 +++++++
.../gravitino/abs/fs/AzureFileSystemProvider.java | 79 ++++++++
....gravitino.catalog.hadoop.fs.FileSystemProvider | 20 +++
.../apache/gravitino/storage/ABSProperties.java | 29 +++
catalogs/catalog-hadoop/build.gradle.kts | 1 +
.../integration/test/HadoopABSCatalogIT.java | 200 +++++++++++++++++++++
clients/filesystem-hadoop3/build.gradle.kts | 1 +
.../test/GravitinoVirtualFileSystemABSIT.java | 165 +++++++++++++++++
docs/hadoop-catalog.md | 12 ++
docs/how-to-use-gvfs.md | 12 +-
gradle/libs.versions.toml | 2 +
settings.gradle.kts | 1 +
14 files changed, 585 insertions(+), 4 deletions(-)
diff --git a/LICENSE.bin b/LICENSE.bin
index 738687a6a..9ab5edbd6 100644
--- a/LICENSE.bin
+++ b/LICENSE.bin
@@ -285,6 +285,7 @@
Apache Hadoop Aliyun connector
Apache Hadoop GCS connector
Apache Hadoop AWS connector
+ Apache Hadoop Azure connector
Apache Hadoop Annotatations
Apache Hadoop Auth
Apache Hadoop Client Aggregator
diff --git a/build.gradle.kts b/build.gradle.kts
index 23074cbe0..d290bd61e 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -774,7 +774,7 @@ tasks {
!it.name.startsWith("client") && !it.name.startsWith("filesystem") &&
!it.name.startsWith("spark") && !it.name.startsWith("iceberg") && it.name !=
"trino-connector" &&
it.name != "integration-test" && it.name != "bundled-catalog" &&
!it.name.startsWith("flink") &&
it.name != "integration-test" && it.name != "hive-metastore-common" &&
!it.name.startsWith("flink") &&
- it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name !=
"aws-bundle"
+ it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name !=
"aws-bundle" && it.name != "azure-bundle"
) {
from(it.configurations.runtimeClasspath)
into("distribution/package/libs")
@@ -796,7 +796,7 @@ tasks {
!it.name.startsWith("trino-connector") &&
it.name != "bundled-catalog" &&
it.name != "hive-metastore-common" && it.name != "gcp-bundle" &&
- it.name != "aliyun-bundle" && it.name != "aws-bundle"
+ it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name !=
"azure-bundle"
) {
dependsOn("${it.name}:build")
from("${it.name}/build/libs")
diff --git a/bundles/azure-bundle/build.gradle.kts
b/bundles/azure-bundle/build.gradle.kts
new file mode 100644
index 000000000..fa6a68d1a
--- /dev/null
+++ b/bundles/azure-bundle/build.gradle.kts
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
+
+plugins {
+ `maven-publish`
+ id("java")
+ alias(libs.plugins.shadow)
+}
+
+dependencies {
+ compileOnly(project(":api"))
+ compileOnly(project(":core"))
+ compileOnly(project(":catalogs:catalog-hadoop"))
+
+ compileOnly(libs.hadoop3.common)
+
+ implementation(libs.commons.lang3)
+ // runtime used
+ implementation(libs.commons.logging)
+ implementation(libs.hadoop3.abs)
+ implementation(project(":catalogs:catalog-common")) {
+ exclude("*")
+ }
+}
+
+tasks.withType(ShadowJar::class.java) {
+ isZip64 = true
+ configurations = listOf(project.configurations.runtimeClasspath.get())
+ archiveClassifier.set("")
+
+ // Relocate dependencies to avoid conflicts
+ relocate("org.apache.httpcomponents",
"org.apache.gravitino.azure.shaded.org.apache.httpcomponents")
+ relocate("org.apache.commons",
"org.apache.gravitino.azure.shaded.org.apache.commons")
+ relocate("com.fasterxml", "org.apache.gravitino.azure.shaded.com.fasterxml")
+ relocate("com.google.guava",
"org.apache.gravitino.azure.shaded.com.google.guava")
+}
+
+tasks.jar {
+ dependsOn(tasks.named("shadowJar"))
+ archiveClassifier.set("empty")
+}
+
+tasks.compileJava {
+ dependsOn(":catalogs:catalog-hadoop:runtimeJars")
+}
diff --git
a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java
b/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java
new file mode 100644
index 000000000..cad38e14c
--- /dev/null
+++
b/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.gravitino.abs.fs;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableMap;
+import java.io.IOException;
+import java.util.Map;
+import javax.annotation.Nonnull;
+import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider;
+import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils;
+import org.apache.gravitino.storage.ABSProperties;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class AzureFileSystemProvider implements FileSystemProvider {
+
+ @VisibleForTesting public static final String ABS_PROVIDER_SCHEME = "abfss";
+
+ @VisibleForTesting public static final String ABS_PROVIDER_NAME = "abs";
+
+ private static final String ABFS_IMPL =
"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem";
+
+ private static final String ABFS_IMPL_KEY = "fs.abfss.impl";
+
+ @Override
+ public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map<String,
String> config)
+ throws IOException {
+ Configuration configuration = new Configuration();
+
+ Map<String, String> hadoopConfMap =
+ FileSystemUtils.toHadoopConfigMap(config, ImmutableMap.of());
+
+ if (config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)
+ && config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)) {
+ hadoopConfMap.put(
+ String.format(
+ "fs.azure.account.key.%s.dfs.core.windows.net",
+ config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)),
+ config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY));
+ }
+
+ if (!config.containsKey(ABFS_IMPL_KEY)) {
+ configuration.set(ABFS_IMPL_KEY, ABFS_IMPL);
+ }
+
+ hadoopConfMap.forEach(configuration::set);
+
+ return FileSystem.get(path.toUri(), configuration);
+ }
+
+ @Override
+ public String scheme() {
+ return ABS_PROVIDER_SCHEME;
+ }
+
+ @Override
+ public String name() {
+ return ABS_PROVIDER_NAME;
+ }
+}
diff --git
a/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
b/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
new file mode 100644
index 000000000..ab864341c
--- /dev/null
+++
b/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+org.apache.gravitino.abs.fs.AzureFileSystemProvider
\ No newline at end of file
diff --git
a/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java
b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java
new file mode 100644
index 000000000..a76ece32b
--- /dev/null
+++
b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.gravitino.storage;
+
+public class ABSProperties {
+
+ // The account name of the Azure Blob Storage.
+ public static final String GRAVITINO_ABS_ACCOUNT_NAME = "abs-account-name";
+
+ // The account key of the Azure Blob Storage.
+ public static final String GRAVITINO_ABS_ACCOUNT_KEY = "abs-account-key";
+}
diff --git a/catalogs/catalog-hadoop/build.gradle.kts
b/catalogs/catalog-hadoop/build.gradle.kts
index c925d1b92..409a87fb1 100644
--- a/catalogs/catalog-hadoop/build.gradle.kts
+++ b/catalogs/catalog-hadoop/build.gradle.kts
@@ -80,6 +80,7 @@ dependencies {
testImplementation(project(":bundles:aws-bundle"))
testImplementation(project(":bundles:gcp-bundle"))
testImplementation(project(":bundles:aliyun-bundle"))
+ testImplementation(project(":bundles:azure-bundle"))
testImplementation(libs.minikdc)
testImplementation(libs.hadoop3.minicluster)
diff --git
a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java
new file mode 100644
index 000000000..0da915a7d
--- /dev/null
+++
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.catalog.hadoop.integration.test;
+
+import static
org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.net.URI;
+import java.util.Map;
+import org.apache.gravitino.Catalog;
+import org.apache.gravitino.NameIdentifier;
+import org.apache.gravitino.Schema;
+import org.apache.gravitino.abs.fs.AzureFileSystemProvider;
+import org.apache.gravitino.file.Fileset;
+import org.apache.gravitino.integration.test.util.GravitinoITUtils;
+import org.apache.gravitino.storage.ABSProperties;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledIf;
+import org.junit.platform.commons.util.StringUtils;
+
+@EnabledIf("absIsConfigured")
+public class HadoopABSCatalogIT extends HadoopCatalogIT {
+
+ public static final String ABS_ACCOUNT_NAME =
System.getenv("ABS_ACCOUNT_NAME");
+ public static final String ABS_ACCOUNT_KEY =
System.getenv("ABS_ACCOUNT_KEY");
+ public static final String ABS_CONTAINER_NAME =
System.getenv("ABS_CONTAINER_NAME");
+
+ @Override
+ public void startIntegrationTest() throws Exception {
+ // Just overwrite super, do nothing.
+ }
+
+ @BeforeAll
+ public void setup() throws IOException {
+ copyBundleJarsToHadoop("azure-bundle");
+
+ try {
+ super.startIntegrationTest();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+
+ metalakeName = GravitinoITUtils.genRandomName("CatalogFilesetIT_metalake");
+ catalogName = GravitinoITUtils.genRandomName("CatalogFilesetIT_catalog");
+ schemaName = GravitinoITUtils.genRandomName("CatalogFilesetIT_schema");
+
+ schemaName = GravitinoITUtils.genRandomName(SCHEMA_PREFIX);
+ Configuration conf = new Configuration();
+
+ conf.set(
+ String.format("fs.azure.account.key.%s.dfs.core.windows.net",
ABS_ACCOUNT_NAME),
+ ABS_ACCOUNT_KEY);
+
+ fileSystem =
+ FileSystem.get(
+ URI.create(
+ String.format(
+ "abfs://%s@%s.dfs.core.windows.net", ABS_CONTAINER_NAME,
ABS_ACCOUNT_NAME)),
+ conf);
+
+ createMetalake();
+ createCatalog();
+ createSchema();
+ }
+
+ protected String defaultBaseLocation() {
+ if (defaultBaseLocation == null) {
+ try {
+ Path bucket =
+ new Path(
+ String.format(
+ "%s://%s@%s.dfs.core.windows.net/%s",
+ AzureFileSystemProvider.ABS_PROVIDER_SCHEME,
+ ABS_CONTAINER_NAME,
+ ABS_ACCOUNT_NAME,
+ GravitinoITUtils.genRandomName("CatalogFilesetIT")));
+
+ if (!fileSystem.exists(bucket)) {
+ fileSystem.mkdirs(bucket);
+ }
+
+ defaultBaseLocation = bucket.toString();
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to create default base location",
e);
+ }
+ }
+
+ return defaultBaseLocation;
+ }
+
+ protected void createCatalog() {
+ Map<String, String> map = Maps.newHashMap();
+ map.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME);
+ map.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY);
+ map.put(FILESYSTEM_PROVIDERS, AzureFileSystemProvider.ABS_PROVIDER_NAME);
+ metalake.createCatalog(catalogName, Catalog.Type.FILESET, provider,
"comment", map);
+
+ catalog = metalake.loadCatalog(catalogName);
+ }
+
+ protected String generateLocation(String filesetName) {
+ return String.format("%s/%s", defaultBaseLocation, filesetName);
+ }
+
+ @Test
+ public void testCreateSchemaAndFilesetWithSpecialLocation() {
+ String localCatalogName = GravitinoITUtils.genRandomName("local_catalog");
+
+ String ossLocation =
+ String.format(
+ "%s://%s@%s.dfs.core.windows.net/%s",
+ AzureFileSystemProvider.ABS_PROVIDER_SCHEME,
+ ABS_CONTAINER_NAME,
+ ABS_ACCOUNT_NAME,
+ GravitinoITUtils.genRandomName("CatalogCatalogIT"));
+ Map<String, String> catalogProps = Maps.newHashMap();
+ catalogProps.put("location", ossLocation);
+ catalogProps.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME,
ABS_ACCOUNT_NAME);
+ catalogProps.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY);
+ catalogProps.put(FILESYSTEM_PROVIDERS,
AzureFileSystemProvider.ABS_PROVIDER_NAME);
+
+ Catalog localCatalog =
+ metalake.createCatalog(
+ localCatalogName, Catalog.Type.FILESET, provider, "comment",
catalogProps);
+ Assertions.assertEquals(ossLocation,
localCatalog.properties().get("location"));
+
+ // Create schema without specifying location.
+ Schema localSchema =
+ localCatalog
+ .asSchemas()
+ .createSchema("local_schema", "comment", ImmutableMap.of("key1",
"val1"));
+
+ Fileset localFileset =
+ localCatalog
+ .asFilesetCatalog()
+ .createFileset(
+ NameIdentifier.of(localSchema.name(), "local_fileset"),
+ "fileset comment",
+ Fileset.Type.MANAGED,
+ null,
+ ImmutableMap.of("k1", "v1"));
+ Assertions.assertEquals(
+ ossLocation + "/local_schema/local_fileset",
localFileset.storageLocation());
+
+ // Delete schema
+ localCatalog.asSchemas().dropSchema(localSchema.name(), true);
+
+ // Create schema with specifying location.
+ Map<String, String> schemaProps = ImmutableMap.of("location", ossLocation);
+ Schema localSchema2 =
+ localCatalog.asSchemas().createSchema("local_schema2", "comment",
schemaProps);
+ Assertions.assertEquals(ossLocation,
localSchema2.properties().get("location"));
+
+ Fileset localFileset2 =
+ localCatalog
+ .asFilesetCatalog()
+ .createFileset(
+ NameIdentifier.of(localSchema2.name(), "local_fileset2"),
+ "fileset comment",
+ Fileset.Type.MANAGED,
+ null,
+ ImmutableMap.of("k1", "v1"));
+ Assertions.assertEquals(ossLocation + "/local_fileset2",
localFileset2.storageLocation());
+
+ // Delete schema
+ localCatalog.asSchemas().dropSchema(localSchema2.name(), true);
+
+ // Delete catalog
+ metalake.dropCatalog(localCatalogName, true);
+ }
+
+ private static boolean absIsConfigured() {
+ return StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_NAME"))
+ && StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_KEY"))
+ && StringUtils.isNotBlank(System.getenv("ABS_CONTAINER_NAME"));
+ }
+}
diff --git a/clients/filesystem-hadoop3/build.gradle.kts
b/clients/filesystem-hadoop3/build.gradle.kts
index 9836c3514..55c0f59a0 100644
--- a/clients/filesystem-hadoop3/build.gradle.kts
+++ b/clients/filesystem-hadoop3/build.gradle.kts
@@ -45,6 +45,7 @@ dependencies {
testImplementation(project(":bundles:gcp-bundle"))
testImplementation(project(":bundles:aliyun-bundle"))
testImplementation(project(":bundles:aws-bundle"))
+ testImplementation(project(":bundles:azure-bundle"))
testImplementation(libs.awaitility)
testImplementation(libs.bundles.jetty)
testImplementation(libs.bundles.jersey)
diff --git
a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java
new file mode 100644
index 000000000..cc16ce920
--- /dev/null
+++
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.gravitino.filesystem.hadoop.integration.test;
+
+import static
org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import org.apache.gravitino.Catalog;
+import org.apache.gravitino.abs.fs.AzureFileSystemProvider;
+import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils;
+import org.apache.gravitino.integration.test.util.GravitinoITUtils;
+import org.apache.gravitino.storage.ABSProperties;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.condition.EnabledIf;
+import org.junit.platform.commons.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@EnabledIf("absIsConfigured")
+public class GravitinoVirtualFileSystemABSIT extends
GravitinoVirtualFileSystemIT {
+ private static final Logger LOG =
LoggerFactory.getLogger(GravitinoVirtualFileSystemABSIT.class);
+
+ public static final String ABS_ACCOUNT_NAME =
System.getenv("ABS_ACCOUNT_NAME");
+ public static final String ABS_ACCOUNT_KEY =
System.getenv("ABS_ACCOUNT_KEY");
+ public static final String ABS_CONTAINER_NAME =
System.getenv("ABS_CONTAINER_NAME");
+
+ @BeforeAll
+ public void startIntegrationTest() {
+ // Do nothing
+ }
+
+ @BeforeAll
+ public void startUp() throws Exception {
+ // Copy the Azure jars to the gravitino server if in deploy mode.
+ copyBundleJarsToHadoop("azure-bundle");
+ // Need to download jars to gravitino server
+ super.startIntegrationTest();
+
+ // This value can be by tune by the user, please change it accordingly.
+ defaultBockSize = 32 * 1024 * 1024;
+
+ // This value is 1 for ABS, 3 for GCS, and 1 for S3A.
+ defaultReplication = 1;
+
+ metalakeName = GravitinoITUtils.genRandomName("gvfs_it_metalake");
+ catalogName = GravitinoITUtils.genRandomName("catalog");
+ schemaName = GravitinoITUtils.genRandomName("schema");
+
+ Assertions.assertFalse(client.metalakeExists(metalakeName));
+ metalake = client.createMetalake(metalakeName, "metalake comment",
Collections.emptyMap());
+ Assertions.assertTrue(client.metalakeExists(metalakeName));
+
+ Map<String, String> properties = Maps.newHashMap();
+
+ properties.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME);
+ properties.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY);
+ properties.put(FILESYSTEM_PROVIDERS,
AzureFileSystemProvider.ABS_PROVIDER_NAME);
+
+ Catalog catalog =
+ metalake.createCatalog(
+ catalogName, Catalog.Type.FILESET, "hadoop", "catalog comment",
properties);
+ Assertions.assertTrue(metalake.catalogExists(catalogName));
+
+ catalog.asSchemas().createSchema(schemaName, "schema comment", properties);
+ Assertions.assertTrue(catalog.asSchemas().schemaExists(schemaName));
+
+ conf.set("fs.gvfs.impl",
"org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem");
+ conf.set("fs.AbstractFileSystem.gvfs.impl",
"org.apache.gravitino.filesystem.hadoop.Gvfs");
+ conf.set("fs.gvfs.impl.disable.cache", "true");
+ conf.set("fs.gravitino.server.uri", serverUri);
+ conf.set("fs.gravitino.client.metalake", metalakeName);
+
+ conf.set("fs.gvfs.filesystem.providers",
AzureFileSystemProvider.ABS_PROVIDER_NAME);
+ // Pass this configuration to the real file system
+ conf.set(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME);
+ conf.set(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY);
+ conf.set("fs.abfss.impl",
"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem");
+ }
+
+ @AfterAll
+ public void tearDown() throws IOException {
+ Catalog catalog = metalake.loadCatalog(catalogName);
+ catalog.asSchemas().dropSchema(schemaName, true);
+ metalake.dropCatalog(catalogName, true);
+ client.dropMetalake(metalakeName, true);
+
+ if (client != null) {
+ client.close();
+ client = null;
+ }
+
+ try {
+ closer.close();
+ } catch (Exception e) {
+ LOG.error("Exception in closing CloseableGroup", e);
+ }
+ }
+
+ /**
+ * Remove the `gravitino.bypass` prefix from the configuration and pass it
to the real file system
+ * This method corresponds to the method
org.apache.gravitino.filesystem.hadoop
+ * .GravitinoVirtualFileSystem#getConfigMap(Configuration) in the original
code.
+ */
+ protected Configuration
convertGvfsConfigToRealFileSystemConfig(Configuration gvfsConf) {
+ Configuration absConf = new Configuration();
+ Map<String, String> map = Maps.newHashMap();
+
+ gvfsConf.forEach(entry -> map.put(entry.getKey(), entry.getValue()));
+
+ Map<String, String> hadoopConfMap = FileSystemUtils.toHadoopConfigMap(map,
ImmutableMap.of());
+
+ if (gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME) != null
+ && gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY) != null) {
+ hadoopConfMap.put(
+ String.format(
+ "fs.azure.account.key.%s.dfs.core.windows.net",
+ gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)),
+ gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY));
+ }
+
+ hadoopConfMap.forEach(absConf::set);
+
+ return absConf;
+ }
+
+ protected String genStorageLocation(String fileset) {
+ return String.format(
+ "%s://%s@%s.dfs.core.windows.net/%s",
+ AzureFileSystemProvider.ABS_PROVIDER_SCHEME, ABS_CONTAINER_NAME,
ABS_ACCOUNT_NAME, fileset);
+ }
+
+ @Disabled("java.lang.UnsupportedOperationException: Append Support not
enabled")
+ public void testAppend() throws IOException {}
+
+ private static boolean absIsConfigured() {
+ return StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_NAME"))
+ && StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_KEY"))
+ && StringUtils.isNotBlank(System.getenv("ABS_CONTAINER_NAME"));
+ }
+}
diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md
index 0622574d4..f0fb9bb17 100644
--- a/docs/hadoop-catalog.md
+++ b/docs/hadoop-catalog.md
@@ -76,6 +76,18 @@ In the meantime, you need to place the corresponding bundle
jar [`gravitino-gcp-
In the meantime, you need to place the corresponding bundle jar
[`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aliyun-bundle/)
in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
+
+#### Azure Blob Storage fileset
+
+| Configuration item | Description
| Default value | Required
| Since version |
+|-------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------|
+| `filesystem-providers` | The file system providers to add. Set it to
`abs` if it's a Azure Blob Storage fileset, or a comma separated string that
contains `abs` like `oss,abs,s3` to support multiple kinds of fileset including
`abs`. | (none) | Yes
| 0.8.0-incubating |
+| `default-filesystem-provider` | The name default filesystem providers of
this Hadoop catalog if users do not specify the scheme in the URI. Default
value is `builtin-local`, for Azure Blob Storage, if we set this value, we can
omit the prefix 'abfss://' in the location. | `builtin-local` | No
| 0.8.0-incubating |
+| `abs-account-name` | The account name of Azure Blob storage.
| (none) | Yes if it's a Azure Blob
Storage fileset. | 0.8.0-incubating |
+| `abs-account-key` | The account key of Azure Blob storage.
| (none) | Yes if it's a Azure Blob
Storage fileset. | 0.8.0-incubating |
+
+Similar to the above, you need to place the corresponding bundle jar
[`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/)
in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
+
:::note
- Gravitino contains builtin file system providers for local file
system(`builtin-local`) and HDFS(`builtin-hdfs`), that is to say if
`filesystem-providers` is not set, Gravitino will still support local file
system and HDFS. Apart from that, you can set the `filesystem-providers` to
support other file systems like S3, GCS, OSS or custom file system.
- `default-filesystem-provider` is used to set the default file system
provider for the Hadoop catalog. If the user does not specify the scheme in the
URI, Gravitino will use the default file system provider to access the fileset.
For example, if the default file system provider is set to `builtin-local`, the
user can omit the prefix `file://` in the location.
diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md
index 7a3373092..6ea3a972d 100644
--- a/docs/how-to-use-gvfs.md
+++ b/docs/how-to-use-gvfs.md
@@ -102,6 +102,16 @@ In the meantime, you need to place the corresponding
bundle jar [`gravitino-gcp-
In the meantime, you need to place the corresponding bundle jar
[`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aliyun-bundle/)
in the Hadoop environment(typically located in
`${HADOOP_HOME}/share/hadoop/common/lib/`).
+#### Azure Blob Storage fileset
+
+| Configuration item | Description
| Default value | Required | Since version
|
+|--------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------|------------------|
+| `fs.gvfs.filesystem.providers` | The file system providers to add. Set it to
`abs` if it's a Azure Blob Storage fileset, or a comma separated string that
contains `abs` like `oss,abs,s3` to support multiple kinds of fileset including
`abs`. | (none) | Yes |
0.8.0-incubating |
+| `abs-account-name` | The account name of Azure Blob Storage.
| (none) | Yes if it's a Azure Blob Storage fileset. |
0.8.0-incubating |
+| `abs-account-key` | The account key of Azure Blob Storage.
| (none) | Yes if it's a Azure Blob Storage fileset. |
0.8.0-incubating |
+
+Similar to the above, you need to place the corresponding bundle jar
[`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/)
in the Hadoop environment(typically located in
`${HADOOP_HOME}/share/hadoop/common/lib/`).
+
#### Custom fileset
Since 0.7.0-incubating, users can define their own fileset type and configure
the corresponding properties, for more, please refer to [Custom
Fileset](./hadoop-catalog.md#how-to-custom-your-own-hcfs-file-system-fileset).
So, if you want to access the custom fileset through GVFS, you need to
configure the corresponding properties.
@@ -111,8 +121,6 @@ So, if you want to access the custom fileset through GVFS,
you need to configure
| `fs.gvfs.filesystem.providers` | The file system providers. please set it to
the value of `YourCustomFileSystemProvider#name` | (none) |
Yes | 0.7.0-incubating |
| `your-custom-properties` | The properties will be used to create a
FileSystem instance in `CustomFileSystemProvider#getFileSystem` | (none)
| No | - |
-
-
You can configure these properties in two ways:
1. Before obtaining the `FileSystem` in the code, construct a `Configuration`
object and set its properties:
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 79f953fa9..4b7441ea2 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -34,6 +34,7 @@ hive2 = "2.3.9"
hadoop2 = "2.10.2"
hadoop3 = "3.3.0"
hadoop3-gcs = "1.9.4-hadoop3"
+hadoop3-abs = "3.3.0"
hadoop3-aliyun = "3.3.0"
hadoop-minikdc = "3.3.0"
htrace-core4 = "4.1.0-incubating"
@@ -170,6 +171,7 @@ hadoop3-client = { group = "org.apache.hadoop", name =
"hadoop-client", version.
hadoop3-minicluster = { group = "org.apache.hadoop", name =
"hadoop-minicluster", version.ref = "hadoop-minikdc"}
hadoop3-gcs = { group = "com.google.cloud.bigdataoss", name = "gcs-connector",
version.ref = "hadoop3-gcs"}
hadoop3-oss = { group = "org.apache.hadoop", name = "hadoop-aliyun",
version.ref = "hadoop3-aliyun"}
+hadoop3-abs = { group = "org.apache.hadoop", name = "hadoop-azure",
version.ref = "hadoop3-abs"}
htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4",
version.ref = "htrace-core4" }
airlift-json = { group = "io.airlift", name = "json", version.ref =
"airlift-json"}
airlift-resolver = { group = "io.airlift.resolver", name = "resolver",
version.ref = "airlift-resolver"}
diff --git a/settings.gradle.kts b/settings.gradle.kts
index 1f3efb495..2cde39c22 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -74,3 +74,4 @@ include("integration-test-common")
include(":bundles:aws-bundle")
include(":bundles:gcp-bundle")
include(":bundles:aliyun-bundle")
+include("bundles:azure-bundle")