This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new 79c362c4e [#5492] feat(hadoop-catalog): Support Azure blob storage for 
Gravitino server and GVFS Java client (#5508)
79c362c4e is described below

commit 79c362c4eb54fa0bf819fee9f60632f3436ba80c
Author: Qi Yu <[email protected]>
AuthorDate: Thu Nov 14 21:54:17 2024 +0800

    [#5492] feat(hadoop-catalog): Support Azure blob storage for Gravitino 
server and GVFS Java client (#5508)
    
    ### What changes were proposed in this pull request?
    
    Add support for Support Azure blob storage for Gravitino server and GVFS
    Java client
    
    ### Why are the changes needed?
    
    It's a big improvement for fileset usage.
    
    Fix: #5492
    
    ### Does this PR introduce _any_ user-facing change?
    
    N/A
    
    ### How was this patch tested?
    
    ITs
    
    ---------
    
    Co-authored-by: Jerry Shao <[email protected]>
---
 LICENSE.bin                                        |   1 +
 build.gradle.kts                                   |   4 +-
 bundles/azure-bundle/build.gradle.kts              |  62 +++++++
 .../gravitino/abs/fs/AzureFileSystemProvider.java  |  79 ++++++++
 ....gravitino.catalog.hadoop.fs.FileSystemProvider |  20 +++
 .../apache/gravitino/storage/ABSProperties.java    |  29 +++
 catalogs/catalog-hadoop/build.gradle.kts           |   1 +
 .../integration/test/HadoopABSCatalogIT.java       | 200 +++++++++++++++++++++
 clients/filesystem-hadoop3/build.gradle.kts        |   1 +
 .../test/GravitinoVirtualFileSystemABSIT.java      | 165 +++++++++++++++++
 docs/hadoop-catalog.md                             |  12 ++
 docs/how-to-use-gvfs.md                            |  12 +-
 gradle/libs.versions.toml                          |   2 +
 settings.gradle.kts                                |   1 +
 14 files changed, 585 insertions(+), 4 deletions(-)

diff --git a/LICENSE.bin b/LICENSE.bin
index 738687a6a..9ab5edbd6 100644
--- a/LICENSE.bin
+++ b/LICENSE.bin
@@ -285,6 +285,7 @@
    Apache Hadoop Aliyun connector
    Apache Hadoop GCS connector
    Apache Hadoop AWS connector
+   Apache Hadoop Azure connector
    Apache Hadoop Annotatations
    Apache Hadoop Auth
    Apache Hadoop Client Aggregator
diff --git a/build.gradle.kts b/build.gradle.kts
index 23074cbe0..d290bd61e 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -774,7 +774,7 @@ tasks {
         !it.name.startsWith("client") && !it.name.startsWith("filesystem") && 
!it.name.startsWith("spark") && !it.name.startsWith("iceberg") && it.name != 
"trino-connector" &&
         it.name != "integration-test" && it.name != "bundled-catalog" && 
!it.name.startsWith("flink") &&
         it.name != "integration-test" && it.name != "hive-metastore-common" && 
!it.name.startsWith("flink") &&
-        it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name != 
"aws-bundle"
+        it.name != "gcp-bundle" && it.name != "aliyun-bundle" && it.name != 
"aws-bundle" && it.name != "azure-bundle"
       ) {
         from(it.configurations.runtimeClasspath)
         into("distribution/package/libs")
@@ -796,7 +796,7 @@ tasks {
         !it.name.startsWith("trino-connector") &&
         it.name != "bundled-catalog" &&
         it.name != "hive-metastore-common" && it.name != "gcp-bundle" &&
-        it.name != "aliyun-bundle" && it.name != "aws-bundle"
+        it.name != "aliyun-bundle" && it.name != "aws-bundle" && it.name != 
"azure-bundle"
       ) {
         dependsOn("${it.name}:build")
         from("${it.name}/build/libs")
diff --git a/bundles/azure-bundle/build.gradle.kts 
b/bundles/azure-bundle/build.gradle.kts
new file mode 100644
index 000000000..fa6a68d1a
--- /dev/null
+++ b/bundles/azure-bundle/build.gradle.kts
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
+
+plugins {
+  `maven-publish`
+  id("java")
+  alias(libs.plugins.shadow)
+}
+
+dependencies {
+  compileOnly(project(":api"))
+  compileOnly(project(":core"))
+  compileOnly(project(":catalogs:catalog-hadoop"))
+
+  compileOnly(libs.hadoop3.common)
+
+  implementation(libs.commons.lang3)
+  // runtime used
+  implementation(libs.commons.logging)
+  implementation(libs.hadoop3.abs)
+  implementation(project(":catalogs:catalog-common")) {
+    exclude("*")
+  }
+}
+
+tasks.withType(ShadowJar::class.java) {
+  isZip64 = true
+  configurations = listOf(project.configurations.runtimeClasspath.get())
+  archiveClassifier.set("")
+
+  // Relocate dependencies to avoid conflicts
+  relocate("org.apache.httpcomponents", 
"org.apache.gravitino.azure.shaded.org.apache.httpcomponents")
+  relocate("org.apache.commons", 
"org.apache.gravitino.azure.shaded.org.apache.commons")
+  relocate("com.fasterxml", "org.apache.gravitino.azure.shaded.com.fasterxml")
+  relocate("com.google.guava", 
"org.apache.gravitino.azure.shaded.com.google.guava")
+}
+
+tasks.jar {
+  dependsOn(tasks.named("shadowJar"))
+  archiveClassifier.set("empty")
+}
+
+tasks.compileJava {
+  dependsOn(":catalogs:catalog-hadoop:runtimeJars")
+}
diff --git 
a/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java
 
b/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java
new file mode 100644
index 000000000..cad38e14c
--- /dev/null
+++ 
b/bundles/azure-bundle/src/main/java/org/apache/gravitino/abs/fs/AzureFileSystemProvider.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.gravitino.abs.fs;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableMap;
+import java.io.IOException;
+import java.util.Map;
+import javax.annotation.Nonnull;
+import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider;
+import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils;
+import org.apache.gravitino.storage.ABSProperties;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class AzureFileSystemProvider implements FileSystemProvider {
+
+  @VisibleForTesting public static final String ABS_PROVIDER_SCHEME = "abfss";
+
+  @VisibleForTesting public static final String ABS_PROVIDER_NAME = "abs";
+
+  private static final String ABFS_IMPL = 
"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem";
+
+  private static final String ABFS_IMPL_KEY = "fs.abfss.impl";
+
+  @Override
+  public FileSystem getFileSystem(@Nonnull Path path, @Nonnull Map<String, 
String> config)
+      throws IOException {
+    Configuration configuration = new Configuration();
+
+    Map<String, String> hadoopConfMap =
+        FileSystemUtils.toHadoopConfigMap(config, ImmutableMap.of());
+
+    if (config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)
+        && config.containsKey(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY)) {
+      hadoopConfMap.put(
+          String.format(
+              "fs.azure.account.key.%s.dfs.core.windows.net",
+              config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)),
+          config.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY));
+    }
+
+    if (!config.containsKey(ABFS_IMPL_KEY)) {
+      configuration.set(ABFS_IMPL_KEY, ABFS_IMPL);
+    }
+
+    hadoopConfMap.forEach(configuration::set);
+
+    return FileSystem.get(path.toUri(), configuration);
+  }
+
+  @Override
+  public String scheme() {
+    return ABS_PROVIDER_SCHEME;
+  }
+
+  @Override
+  public String name() {
+    return ABS_PROVIDER_NAME;
+  }
+}
diff --git 
a/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
 
b/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
new file mode 100644
index 000000000..ab864341c
--- /dev/null
+++ 
b/bundles/azure-bundle/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+org.apache.gravitino.abs.fs.AzureFileSystemProvider
\ No newline at end of file
diff --git 
a/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java
 
b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java
new file mode 100644
index 000000000..a76ece32b
--- /dev/null
+++ 
b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/ABSProperties.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.gravitino.storage;
+
+public class ABSProperties {
+
+  // The account name of the Azure Blob Storage.
+  public static final String GRAVITINO_ABS_ACCOUNT_NAME = "abs-account-name";
+
+  // The account key of the Azure Blob Storage.
+  public static final String GRAVITINO_ABS_ACCOUNT_KEY = "abs-account-key";
+}
diff --git a/catalogs/catalog-hadoop/build.gradle.kts 
b/catalogs/catalog-hadoop/build.gradle.kts
index c925d1b92..409a87fb1 100644
--- a/catalogs/catalog-hadoop/build.gradle.kts
+++ b/catalogs/catalog-hadoop/build.gradle.kts
@@ -80,6 +80,7 @@ dependencies {
   testImplementation(project(":bundles:aws-bundle"))
   testImplementation(project(":bundles:gcp-bundle"))
   testImplementation(project(":bundles:aliyun-bundle"))
+  testImplementation(project(":bundles:azure-bundle"))
 
   testImplementation(libs.minikdc)
   testImplementation(libs.hadoop3.minicluster)
diff --git 
a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java
 
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java
new file mode 100644
index 000000000..0da915a7d
--- /dev/null
+++ 
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopABSCatalogIT.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.catalog.hadoop.integration.test;
+
+import static 
org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.net.URI;
+import java.util.Map;
+import org.apache.gravitino.Catalog;
+import org.apache.gravitino.NameIdentifier;
+import org.apache.gravitino.Schema;
+import org.apache.gravitino.abs.fs.AzureFileSystemProvider;
+import org.apache.gravitino.file.Fileset;
+import org.apache.gravitino.integration.test.util.GravitinoITUtils;
+import org.apache.gravitino.storage.ABSProperties;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledIf;
+import org.junit.platform.commons.util.StringUtils;
+
+@EnabledIf("absIsConfigured")
+public class HadoopABSCatalogIT extends HadoopCatalogIT {
+
+  public static final String ABS_ACCOUNT_NAME = 
System.getenv("ABS_ACCOUNT_NAME");
+  public static final String ABS_ACCOUNT_KEY = 
System.getenv("ABS_ACCOUNT_KEY");
+  public static final String ABS_CONTAINER_NAME = 
System.getenv("ABS_CONTAINER_NAME");
+
+  @Override
+  public void startIntegrationTest() throws Exception {
+    // Just overwrite super, do nothing.
+  }
+
+  @BeforeAll
+  public void setup() throws IOException {
+    copyBundleJarsToHadoop("azure-bundle");
+
+    try {
+      super.startIntegrationTest();
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+
+    metalakeName = GravitinoITUtils.genRandomName("CatalogFilesetIT_metalake");
+    catalogName = GravitinoITUtils.genRandomName("CatalogFilesetIT_catalog");
+    schemaName = GravitinoITUtils.genRandomName("CatalogFilesetIT_schema");
+
+    schemaName = GravitinoITUtils.genRandomName(SCHEMA_PREFIX);
+    Configuration conf = new Configuration();
+
+    conf.set(
+        String.format("fs.azure.account.key.%s.dfs.core.windows.net", 
ABS_ACCOUNT_NAME),
+        ABS_ACCOUNT_KEY);
+
+    fileSystem =
+        FileSystem.get(
+            URI.create(
+                String.format(
+                    "abfs://%s@%s.dfs.core.windows.net", ABS_CONTAINER_NAME, 
ABS_ACCOUNT_NAME)),
+            conf);
+
+    createMetalake();
+    createCatalog();
+    createSchema();
+  }
+
+  protected String defaultBaseLocation() {
+    if (defaultBaseLocation == null) {
+      try {
+        Path bucket =
+            new Path(
+                String.format(
+                    "%s://%s@%s.dfs.core.windows.net/%s",
+                    AzureFileSystemProvider.ABS_PROVIDER_SCHEME,
+                    ABS_CONTAINER_NAME,
+                    ABS_ACCOUNT_NAME,
+                    GravitinoITUtils.genRandomName("CatalogFilesetIT")));
+
+        if (!fileSystem.exists(bucket)) {
+          fileSystem.mkdirs(bucket);
+        }
+
+        defaultBaseLocation = bucket.toString();
+      } catch (IOException e) {
+        throw new RuntimeException("Failed to create default base location", 
e);
+      }
+    }
+
+    return defaultBaseLocation;
+  }
+
+  protected void createCatalog() {
+    Map<String, String> map = Maps.newHashMap();
+    map.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME);
+    map.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY);
+    map.put(FILESYSTEM_PROVIDERS, AzureFileSystemProvider.ABS_PROVIDER_NAME);
+    metalake.createCatalog(catalogName, Catalog.Type.FILESET, provider, 
"comment", map);
+
+    catalog = metalake.loadCatalog(catalogName);
+  }
+
+  protected String generateLocation(String filesetName) {
+    return String.format("%s/%s", defaultBaseLocation, filesetName);
+  }
+
+  @Test
+  public void testCreateSchemaAndFilesetWithSpecialLocation() {
+    String localCatalogName = GravitinoITUtils.genRandomName("local_catalog");
+
+    String ossLocation =
+        String.format(
+            "%s://%s@%s.dfs.core.windows.net/%s",
+            AzureFileSystemProvider.ABS_PROVIDER_SCHEME,
+            ABS_CONTAINER_NAME,
+            ABS_ACCOUNT_NAME,
+            GravitinoITUtils.genRandomName("CatalogCatalogIT"));
+    Map<String, String> catalogProps = Maps.newHashMap();
+    catalogProps.put("location", ossLocation);
+    catalogProps.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, 
ABS_ACCOUNT_NAME);
+    catalogProps.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY);
+    catalogProps.put(FILESYSTEM_PROVIDERS, 
AzureFileSystemProvider.ABS_PROVIDER_NAME);
+
+    Catalog localCatalog =
+        metalake.createCatalog(
+            localCatalogName, Catalog.Type.FILESET, provider, "comment", 
catalogProps);
+    Assertions.assertEquals(ossLocation, 
localCatalog.properties().get("location"));
+
+    // Create schema without specifying location.
+    Schema localSchema =
+        localCatalog
+            .asSchemas()
+            .createSchema("local_schema", "comment", ImmutableMap.of("key1", 
"val1"));
+
+    Fileset localFileset =
+        localCatalog
+            .asFilesetCatalog()
+            .createFileset(
+                NameIdentifier.of(localSchema.name(), "local_fileset"),
+                "fileset comment",
+                Fileset.Type.MANAGED,
+                null,
+                ImmutableMap.of("k1", "v1"));
+    Assertions.assertEquals(
+        ossLocation + "/local_schema/local_fileset", 
localFileset.storageLocation());
+
+    // Delete schema
+    localCatalog.asSchemas().dropSchema(localSchema.name(), true);
+
+    // Create schema with specifying location.
+    Map<String, String> schemaProps = ImmutableMap.of("location", ossLocation);
+    Schema localSchema2 =
+        localCatalog.asSchemas().createSchema("local_schema2", "comment", 
schemaProps);
+    Assertions.assertEquals(ossLocation, 
localSchema2.properties().get("location"));
+
+    Fileset localFileset2 =
+        localCatalog
+            .asFilesetCatalog()
+            .createFileset(
+                NameIdentifier.of(localSchema2.name(), "local_fileset2"),
+                "fileset comment",
+                Fileset.Type.MANAGED,
+                null,
+                ImmutableMap.of("k1", "v1"));
+    Assertions.assertEquals(ossLocation + "/local_fileset2", 
localFileset2.storageLocation());
+
+    // Delete schema
+    localCatalog.asSchemas().dropSchema(localSchema2.name(), true);
+
+    // Delete catalog
+    metalake.dropCatalog(localCatalogName, true);
+  }
+
+  private static boolean absIsConfigured() {
+    return StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_NAME"))
+        && StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_KEY"))
+        && StringUtils.isNotBlank(System.getenv("ABS_CONTAINER_NAME"));
+  }
+}
diff --git a/clients/filesystem-hadoop3/build.gradle.kts 
b/clients/filesystem-hadoop3/build.gradle.kts
index 9836c3514..55c0f59a0 100644
--- a/clients/filesystem-hadoop3/build.gradle.kts
+++ b/clients/filesystem-hadoop3/build.gradle.kts
@@ -45,6 +45,7 @@ dependencies {
   testImplementation(project(":bundles:gcp-bundle"))
   testImplementation(project(":bundles:aliyun-bundle"))
   testImplementation(project(":bundles:aws-bundle"))
+  testImplementation(project(":bundles:azure-bundle"))
   testImplementation(libs.awaitility)
   testImplementation(libs.bundles.jetty)
   testImplementation(libs.bundles.jersey)
diff --git 
a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java
 
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java
new file mode 100644
index 000000000..cc16ce920
--- /dev/null
+++ 
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemABSIT.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.gravitino.filesystem.hadoop.integration.test;
+
+import static 
org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import org.apache.gravitino.Catalog;
+import org.apache.gravitino.abs.fs.AzureFileSystemProvider;
+import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils;
+import org.apache.gravitino.integration.test.util.GravitinoITUtils;
+import org.apache.gravitino.storage.ABSProperties;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.condition.EnabledIf;
+import org.junit.platform.commons.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@EnabledIf("absIsConfigured")
+public class GravitinoVirtualFileSystemABSIT extends 
GravitinoVirtualFileSystemIT {
+  private static final Logger LOG = 
LoggerFactory.getLogger(GravitinoVirtualFileSystemABSIT.class);
+
+  public static final String ABS_ACCOUNT_NAME = 
System.getenv("ABS_ACCOUNT_NAME");
+  public static final String ABS_ACCOUNT_KEY = 
System.getenv("ABS_ACCOUNT_KEY");
+  public static final String ABS_CONTAINER_NAME = 
System.getenv("ABS_CONTAINER_NAME");
+
+  @BeforeAll
+  public void startIntegrationTest() {
+    // Do nothing
+  }
+
+  @BeforeAll
+  public void startUp() throws Exception {
+    // Copy the Azure jars to the gravitino server if in deploy mode.
+    copyBundleJarsToHadoop("azure-bundle");
+    // Need to download jars to gravitino server
+    super.startIntegrationTest();
+
+    // This value can be by tune by the user, please change it accordingly.
+    defaultBockSize = 32 * 1024 * 1024;
+
+    // This value is 1 for ABS, 3 for GCS, and 1 for S3A.
+    defaultReplication = 1;
+
+    metalakeName = GravitinoITUtils.genRandomName("gvfs_it_metalake");
+    catalogName = GravitinoITUtils.genRandomName("catalog");
+    schemaName = GravitinoITUtils.genRandomName("schema");
+
+    Assertions.assertFalse(client.metalakeExists(metalakeName));
+    metalake = client.createMetalake(metalakeName, "metalake comment", 
Collections.emptyMap());
+    Assertions.assertTrue(client.metalakeExists(metalakeName));
+
+    Map<String, String> properties = Maps.newHashMap();
+
+    properties.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME);
+    properties.put(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY);
+    properties.put(FILESYSTEM_PROVIDERS, 
AzureFileSystemProvider.ABS_PROVIDER_NAME);
+
+    Catalog catalog =
+        metalake.createCatalog(
+            catalogName, Catalog.Type.FILESET, "hadoop", "catalog comment", 
properties);
+    Assertions.assertTrue(metalake.catalogExists(catalogName));
+
+    catalog.asSchemas().createSchema(schemaName, "schema comment", properties);
+    Assertions.assertTrue(catalog.asSchemas().schemaExists(schemaName));
+
+    conf.set("fs.gvfs.impl", 
"org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem");
+    conf.set("fs.AbstractFileSystem.gvfs.impl", 
"org.apache.gravitino.filesystem.hadoop.Gvfs");
+    conf.set("fs.gvfs.impl.disable.cache", "true");
+    conf.set("fs.gravitino.server.uri", serverUri);
+    conf.set("fs.gravitino.client.metalake", metalakeName);
+
+    conf.set("fs.gvfs.filesystem.providers", 
AzureFileSystemProvider.ABS_PROVIDER_NAME);
+    // Pass this configuration to the real file system
+    conf.set(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME, ABS_ACCOUNT_NAME);
+    conf.set(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY, ABS_ACCOUNT_KEY);
+    conf.set("fs.abfss.impl", 
"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem");
+  }
+
+  @AfterAll
+  public void tearDown() throws IOException {
+    Catalog catalog = metalake.loadCatalog(catalogName);
+    catalog.asSchemas().dropSchema(schemaName, true);
+    metalake.dropCatalog(catalogName, true);
+    client.dropMetalake(metalakeName, true);
+
+    if (client != null) {
+      client.close();
+      client = null;
+    }
+
+    try {
+      closer.close();
+    } catch (Exception e) {
+      LOG.error("Exception in closing CloseableGroup", e);
+    }
+  }
+
+  /**
+   * Remove the `gravitino.bypass` prefix from the configuration and pass it 
to the real file system
+   * This method corresponds to the method 
org.apache.gravitino.filesystem.hadoop
+   * .GravitinoVirtualFileSystem#getConfigMap(Configuration) in the original 
code.
+   */
+  protected Configuration 
convertGvfsConfigToRealFileSystemConfig(Configuration gvfsConf) {
+    Configuration absConf = new Configuration();
+    Map<String, String> map = Maps.newHashMap();
+
+    gvfsConf.forEach(entry -> map.put(entry.getKey(), entry.getValue()));
+
+    Map<String, String> hadoopConfMap = FileSystemUtils.toHadoopConfigMap(map, 
ImmutableMap.of());
+
+    if (gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME) != null
+        && gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY) != null) {
+      hadoopConfMap.put(
+          String.format(
+              "fs.azure.account.key.%s.dfs.core.windows.net",
+              gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_NAME)),
+          gvfsConf.get(ABSProperties.GRAVITINO_ABS_ACCOUNT_KEY));
+    }
+
+    hadoopConfMap.forEach(absConf::set);
+
+    return absConf;
+  }
+
+  protected String genStorageLocation(String fileset) {
+    return String.format(
+        "%s://%s@%s.dfs.core.windows.net/%s",
+        AzureFileSystemProvider.ABS_PROVIDER_SCHEME, ABS_CONTAINER_NAME, 
ABS_ACCOUNT_NAME, fileset);
+  }
+
+  @Disabled("java.lang.UnsupportedOperationException: Append Support not 
enabled")
+  public void testAppend() throws IOException {}
+
+  private static boolean absIsConfigured() {
+    return StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_NAME"))
+        && StringUtils.isNotBlank(System.getenv("ABS_ACCOUNT_KEY"))
+        && StringUtils.isNotBlank(System.getenv("ABS_CONTAINER_NAME"));
+  }
+}
diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md
index 0622574d4..f0fb9bb17 100644
--- a/docs/hadoop-catalog.md
+++ b/docs/hadoop-catalog.md
@@ -76,6 +76,18 @@ In the meantime, you need to place the corresponding bundle 
jar [`gravitino-gcp-
 
 In the meantime, you need to place the corresponding bundle jar 
[`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aliyun-bundle/)
 in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
 
+
+#### Azure Blob Storage fileset
+
+| Configuration item            | Description                                  
                                                                                
                                                                                
                                  | Default value   | Required                  
                | Since version    |
+|-------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|-------------------------------------------|------------------|
+| `filesystem-providers`        | The file system providers to add. Set it to 
`abs` if it's a Azure Blob Storage fileset, or a comma separated string that 
contains `abs` like `oss,abs,s3` to support multiple kinds of fileset including 
`abs`.                                | (none)          | Yes                   
                    | 0.8.0-incubating |
+| `default-filesystem-provider` | The name default filesystem providers of 
this Hadoop catalog if users do not specify the scheme in the URI. Default 
value is `builtin-local`, for Azure Blob Storage, if we set this value, we can 
omit the prefix 'abfss://' in the location. | `builtin-local` | No              
                          | 0.8.0-incubating |
+| `abs-account-name`            | The account name of Azure Blob storage.      
                                                                                
                                                                                
                                  | (none)          | Yes if it's a Azure Blob 
Storage fileset. | 0.8.0-incubating |
+| `abs-account-key`             | The account key of Azure Blob storage.       
                                                                                
                                                                                
                                  | (none)          | Yes if it's a Azure Blob 
Storage fileset. | 0.8.0-incubating |
+
+Similar to the above, you need to place the corresponding bundle jar 
[`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/)
 in the directory `${GRAVITINO_HOME}/catalogs/hadoop/libs`.
+
 :::note
 - Gravitino contains builtin file system providers for local file 
system(`builtin-local`) and HDFS(`builtin-hdfs`), that is to say if 
`filesystem-providers` is not set, Gravitino will still support local file 
system and HDFS. Apart from that, you can set the `filesystem-providers` to 
support other file systems like S3, GCS, OSS or custom file system.
 - `default-filesystem-provider` is used to set the default file system 
provider for the Hadoop catalog. If the user does not specify the scheme in the 
URI, Gravitino will use the default file system provider to access the fileset. 
For example, if the default file system provider is set to `builtin-local`, the 
user can omit the prefix `file://` in the location. 
diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md
index 7a3373092..6ea3a972d 100644
--- a/docs/how-to-use-gvfs.md
+++ b/docs/how-to-use-gvfs.md
@@ -102,6 +102,16 @@ In the meantime, you need to place the corresponding 
bundle jar [`gravitino-gcp-
 
 In the meantime, you need to place the corresponding bundle jar 
[`gravitino-aliyun-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/aliyun-bundle/)
 in the Hadoop environment(typically located in 
`${HADOOP_HOME}/share/hadoop/common/lib/`).
 
+#### Azure Blob Storage fileset
+
+| Configuration item             | Description                                 
                                                                                
                                                                                
    | Default value | Required                                  | Since version 
   |
+|--------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------|------------------|
+| `fs.gvfs.filesystem.providers` | The file system providers to add. Set it to 
`abs` if it's a Azure Blob Storage fileset, or a comma separated string that 
contains `abs` like `oss,abs,s3` to support multiple kinds of fileset including 
`abs`. | (none)        | Yes                                       | 
0.8.0-incubating |
+| `abs-account-name`             | The account name of Azure Blob Storage.     
                                                                                
                                                                                
    | (none)        | Yes if it's a Azure Blob Storage fileset. | 
0.8.0-incubating |
+| `abs-account-key`              | The account key of Azure Blob Storage.      
                                                                                
                                                                                
    | (none)        | Yes if it's a Azure Blob Storage fileset. | 
0.8.0-incubating |
+
+Similar to the above, you need to place the corresponding bundle jar 
[`gravitino-azure-bundle-${version}.jar`](https://repo1.maven.org/maven2/org/apache/gravitino/azure-bundle/)
 in the Hadoop environment(typically located in 
`${HADOOP_HOME}/share/hadoop/common/lib/`).
+
 #### Custom fileset 
 Since 0.7.0-incubating, users can define their own fileset type and configure 
the corresponding properties, for more, please refer to [Custom 
Fileset](./hadoop-catalog.md#how-to-custom-your-own-hcfs-file-system-fileset).
 So, if you want to access the custom fileset through GVFS, you need to 
configure the corresponding properties.
@@ -111,8 +121,6 @@ So, if you want to access the custom fileset through GVFS, 
you need to configure
 | `fs.gvfs.filesystem.providers` | The file system providers. please set it to 
the value of `YourCustomFileSystemProvider#name`            | (none)        | 
Yes      | 0.7.0-incubating |
 | `your-custom-properties`       | The properties will be used to create a 
FileSystem instance in `CustomFileSystemProvider#getFileSystem` | (none)        
| No       | -                |
 
-
-
 You can configure these properties in two ways:
 
 1. Before obtaining the `FileSystem` in the code, construct a `Configuration` 
object and set its properties:
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
index 79f953fa9..4b7441ea2 100644
--- a/gradle/libs.versions.toml
+++ b/gradle/libs.versions.toml
@@ -34,6 +34,7 @@ hive2 = "2.3.9"
 hadoop2 = "2.10.2"
 hadoop3 = "3.3.0"
 hadoop3-gcs = "1.9.4-hadoop3"
+hadoop3-abs = "3.3.0"
 hadoop3-aliyun = "3.3.0"
 hadoop-minikdc = "3.3.0"
 htrace-core4 = "4.1.0-incubating"
@@ -170,6 +171,7 @@ hadoop3-client = { group = "org.apache.hadoop", name = 
"hadoop-client", version.
 hadoop3-minicluster = { group = "org.apache.hadoop", name = 
"hadoop-minicluster", version.ref = "hadoop-minikdc"}
 hadoop3-gcs = { group = "com.google.cloud.bigdataoss", name = "gcs-connector", 
version.ref = "hadoop3-gcs"}
 hadoop3-oss = { group = "org.apache.hadoop", name = "hadoop-aliyun", 
version.ref = "hadoop3-aliyun"}
+hadoop3-abs = { group = "org.apache.hadoop", name = "hadoop-azure", 
version.ref = "hadoop3-abs"}
 htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4", 
version.ref = "htrace-core4" }
 airlift-json = { group = "io.airlift", name = "json", version.ref = 
"airlift-json"}
 airlift-resolver = { group = "io.airlift.resolver", name = "resolver", 
version.ref = "airlift-resolver"}
diff --git a/settings.gradle.kts b/settings.gradle.kts
index 1f3efb495..2cde39c22 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -74,3 +74,4 @@ include("integration-test-common")
 include(":bundles:aws-bundle")
 include(":bundles:gcp-bundle")
 include(":bundles:aliyun-bundle")
+include("bundles:azure-bundle")


Reply via email to