jerryshao commented on code in PR #5538:
URL: https://github.com/apache/gravitino/pull/5538#discussion_r1851690479


##########
clients/client-python/tests/integration/test_gvfs_with_abs.py:
##########
@@ -0,0 +1,373 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import logging
+import os
+from random import randint
+import unittest
+
+
+from adlfs import AzureBlobFileSystem
+
+from tests.integration.test_gvfs_with_hdfs import TestGvfsWithHDFS
+from gravitino import (
+    gvfs,
+    GravitinoClient,
+    Catalog,
+    Fileset,
+)
+from gravitino.exceptions.base import GravitinoRuntimeException
+from gravitino.filesystem.gvfs_config import GVFSConfig
+
+
+logger = logging.getLogger(__name__)
+
+
+def azure_abs_is_prepared():
+    return (
+        os.environ.get("ABS_ACCOUNT_NAME")
+        and os.environ.get("ABS_ACCOUNT_KEY")
+        and os.environ.get("ABS_CONTAINER_NAME")
+    )
+
+
[email protected](azure_abs_is_prepared(), "Azure Blob Storage is not 
prepared.")
+class TestGvfsWithABS(TestGvfsWithHDFS):
+    # Before running this test, please set the make sure aliyun-azure-x.jar 
has been
+    # copy to the $GRAVITINO_HOME/catalogs/hadoop/libs/ directory
+    azure_abs_account_key = os.environ.get("ABS_ACCOUNT_KEY")
+    azure_abs_account_name = os.environ.get("ABS_ACCOUNT_NAME")
+    azure_abs_container_name = os.environ.get("ABS_CONTAINER_NAME")
+
+    metalake_name: str = "TestGvfsWithABS_metalake" + str(randint(1, 10000))
+
+    def setUp(self):
+        self.options = {
+            GVFSConfig.GVFS_FILESYSTEM_AZURE_ACCOUNT_NAME: 
self.azure_abs_account_name,
+            GVFSConfig.GVFS_FILESYSTEM_AZURE_ACCOUNT_KEY: 
self.azure_abs_account_key,
+        }
+
+    def tearDown(self):
+        self.options = {}
+
+    @classmethod
+    def setUpClass(cls):
+        cls._get_gravitino_home()
+
+        cls.hadoop_conf_path = 
f"{cls.gravitino_home}/catalogs/hadoop/conf/hadoop.conf"
+        # restart the server
+        cls.restart_server()
+        # create entity
+        cls._init_test_entities()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls._clean_test_data()
+        # reset server conf in case of other ITs like HDFS has changed it and 
fail
+        # to reset it
+        cls._reset_conf(cls.config, cls.hadoop_conf_path)
+        # restart server
+        cls.restart_server()
+
+    # clear all config in the conf_path
+    @classmethod
+    def _reset_conf(cls, config, conf_path):
+        logger.info("Reset %s.", conf_path)
+        if not os.path.exists(conf_path):
+            raise GravitinoRuntimeException(f"Conf file is not found at 
`{conf_path}`.")
+        filtered_lines = []
+        with open(conf_path, mode="r", encoding="utf-8") as file:
+            origin_lines = file.readlines()
+
+        for line in origin_lines:
+            line = line.strip()
+            if line.startswith("#"):
+                # append annotations directly
+                filtered_lines.append(line + "\n")
+
+        with open(conf_path, mode="w", encoding="utf-8") as file:
+            for line in filtered_lines:
+                file.write(line)
+
+    @classmethod
+    def _init_test_entities(cls):
+        cls.gravitino_admin_client.create_metalake(
+            name=cls.metalake_name, comment="", properties={}
+        )
+        cls.gravitino_client = GravitinoClient(
+            uri="http://localhost:8090";, metalake_name=cls.metalake_name
+        )
+
+        cls.config = {}
+        cls.conf = {}
+        catalog = cls.gravitino_client.create_catalog(
+            name=cls.catalog_name,
+            catalog_type=Catalog.Type.FILESET,
+            provider=cls.catalog_provider,
+            comment="",
+            properties={
+                "filesystem-providers": "abs",
+                "abs-account-name": cls.azure_abs_account_name,
+                "abs-account-key": cls.azure_abs_account_key,
+            },
+        )
+        catalog.as_schemas().create_schema(
+            schema_name=cls.schema_name, comment="", properties={}
+        )
+
+        cls.fileset_storage_location: str = (
+            
f"{cls.azure_abs_container_name}/{cls.catalog_name}/{cls.schema_name}/{cls.fileset_name}"
+        )
+        cls.fileset_gvfs_location = (
+            
f"gvfs://fileset/{cls.catalog_name}/{cls.schema_name}/{cls.fileset_name}"
+        )
+        catalog.as_fileset_catalog().create_fileset(
+            ident=cls.fileset_ident,
+            fileset_type=Fileset.Type.MANAGED,
+            comment=cls.fileset_comment,
+            storage_location=(
+                
f"abfss://{cls.azure_abs_container_name}@{cls.azure_abs_account_name}.dfs.core.windows.net/"
+                f"{cls.catalog_name}/{cls.schema_name}/{cls.fileset_name}"
+            ),
+            properties=cls.fileset_properties,
+        )
+
+        cls.fs = AzureBlobFileSystem(
+            account_name=cls.azure_abs_account_name,
+            account_key=cls.azure_abs_account_key,
+        )
+
+    def test_cat_file(self):
+        cat_dir = self.fileset_gvfs_location + "/test_cat"
+        cat_actual_dir = self.fileset_storage_location + "/test_cat"
+        fs = gvfs.GravitinoVirtualFileSystem(
+            server_uri="http://localhost:8090";,
+            metalake_name=self.metalake_name,
+            options=self.options,
+            **self.conf,
+        )
+
+        self.check_mkdir(cat_dir, cat_actual_dir, fs)
+
+        cat_file = self.fileset_gvfs_location + "/test_cat/test.file"
+        cat_actual_file = self.fileset_storage_location + "/test_cat/test.file"
+        self.fs.touch(cat_actual_file)
+        self.assertTrue(self.fs.exists(cat_actual_file))
+        self.assertTrue(fs.exists(cat_file))
+
+        # test open and write file
+        with fs.open(cat_file, mode="wb") as f:
+            f.write(b"test_cat_file")
+        self.assertTrue(fs.info(cat_file)["size"] > 0)
+
+        # test cat file
+        content = fs.cat_file(cat_file)
+        self.assertEqual(b"test_cat_file", content)
+
+    def check_mkdir(self, gvfs_dir, actual_dir, gvfs_instance):
+        # OSS will not create a directory, so the directory will not exist.
+        self.fs.mkdir(actual_dir)
+        self.assertFalse(self.fs.exists(actual_dir))
+        self.assertFalse(gvfs_instance.exists(gvfs_dir))
+
+    def check_makedirs(self, gvfs_dir, actual_dir, gvfs_instance):
+        self.fs.makedirs(actual_dir)
+        self.assertFalse(self.fs.exists(actual_dir))
+        self.assertFalse(gvfs_instance.exists(gvfs_dir))
+
+    def test_modified(self):
+        modified_dir = self.fileset_gvfs_location + "/test_modified"
+        modified_actual_dir = self.fileset_storage_location + "/test_modified"
+        fs = gvfs.GravitinoVirtualFileSystem(
+            server_uri="http://localhost:8090";,
+            metalake_name=self.metalake_name,
+            options=self.options,
+            **self.conf,
+        )
+
+        self.check_mkdir(modified_dir, modified_actual_dir, fs)
+        # S3 only supports getting the `object` modify time, so the modified 
time will be None

Review Comment:
   Please fully check the code and comment to avoid copy typos.



##########
clients/client-python/gravitino/filesystem/gvfs.py:
##########
@@ -801,6 +818,12 @@ def _strip_storage_protocol(storage_type: StorageType, 
path: str):
         if storage_type == StorageType.LOCAL:
             return path[len(f"{StorageType.LOCAL.value}:") :]
 
+        ## We need to remove the protocol and host from the path for instance
+        # 'wsabs://container@account/path' to 'container/path'

Review Comment:
   Why "wasbs" here?



##########
clients/client-python/gravitino/filesystem/gvfs.py:
##########
@@ -577,6 +579,30 @@ def _convert_actual_path(
                 )
 
             actual_prefix = ops["host"] + ops["path"]
+        elif storage_location.startswith(f"{StorageType.ABS.value}://"):
+            ops = infer_storage_options(storage_location)
+            if "username" not in ops or "host" not in ops or "path" not in ops:
+                raise GravitinoRuntimeException(
+                    f"Storage location:{storage_location} doesn't support now. 
as the username,"

Review Comment:
   "Username, host...are required"



##########
clients/client-python/gravitino/filesystem/gvfs.py:
##########
@@ -577,6 +579,30 @@ def _convert_actual_path(
                 )
 
             actual_prefix = ops["host"] + ops["path"]
+        elif storage_location.startswith(f"{StorageType.ABS.value}://"):
+            ops = infer_storage_options(storage_location)
+            if "username" not in ops or "host" not in ops or "path" not in ops:
+                raise GravitinoRuntimeException(
+                    f"Storage location:{storage_location} doesn't support now. 
as the username,"
+                    f"host and path are required in the storage location."
+                )
+            actual_prefix = 
f"{StorageType.ABS.value}://{ops['username']}@{ops['host']}{ops['path']}"
+
+            # For ABS, the actual path should be the same as the virtual path 
is like
+            # 
'wasbs//[email protected]/test_gvfs_catalog6588/test_gvfs_schema/

Review Comment:
   Why do we specify "wasbs"? Also what's the meaning here?



##########
clients/client-python/tests/integration/test_gvfs_with_abs.py:
##########
@@ -0,0 +1,373 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import logging
+import os
+from random import randint
+import unittest
+
+
+from adlfs import AzureBlobFileSystem
+
+from tests.integration.test_gvfs_with_hdfs import TestGvfsWithHDFS
+from gravitino import (
+    gvfs,
+    GravitinoClient,
+    Catalog,
+    Fileset,
+)
+from gravitino.exceptions.base import GravitinoRuntimeException
+from gravitino.filesystem.gvfs_config import GVFSConfig
+
+
+logger = logging.getLogger(__name__)
+
+
+def azure_abs_is_prepared():
+    return (
+        os.environ.get("ABS_ACCOUNT_NAME")
+        and os.environ.get("ABS_ACCOUNT_KEY")
+        and os.environ.get("ABS_CONTAINER_NAME")
+    )
+
+
[email protected](azure_abs_is_prepared(), "Azure Blob Storage is not 
prepared.")
+class TestGvfsWithABS(TestGvfsWithHDFS):
+    # Before running this test, please set the make sure aliyun-azure-x.jar 
has been

Review Comment:
   Why aliyun?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to