This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new 0f18b638a [#4000] improvement(client-python): Support simple auth for 
PyGVFS (#4001)
0f18b638a is described below

commit 0f18b638a2ce2f946bb8fc50ddfed7e88c1b73e4
Author: xloya <[email protected]>
AuthorDate: Wed Jul 10 13:39:31 2024 +0800

    [#4000] improvement(client-python): Support simple auth for PyGVFS (#4001)
    
    ### What changes were proposed in this pull request?
    
    Support simple auth for gravitino client in PyGVFS. The integration test
    depends on this PR: #3876 #3931 . When #3876 #3931 is merged, I will add
    integration tests and docs for this PR.
    
    ### Why are the changes needed?
    
    Fix: #4000
    
    ### How was this patch tested?
    
    Add UTs and ITs.
    
    ---------
    
    Co-authored-by: xiaojiebao <[email protected]>
---
 clients/client-python/gravitino/filesystem/gvfs.py | 43 +++++++++++++++++++---
 .../gravitino/filesystem/gvfs_config.py            | 29 +++++++++++++++
 .../tests/integration/test_gvfs_with_hdfs.py       | 24 ++++++++++++
 .../tests/unittests/test_gvfs_with_local.py        | 36 ++++++++++++++++--
 docs/how-to-use-gvfs.md                            | 43 ++++++++++++++++++----
 5 files changed, 157 insertions(+), 18 deletions(-)

diff --git a/clients/client-python/gravitino/filesystem/gvfs.py 
b/clients/client-python/gravitino/filesystem/gvfs.py
index a50c97f4c..a2b2461b3 100644
--- a/clients/client-python/gravitino/filesystem/gvfs.py
+++ b/clients/client-python/gravitino/filesystem/gvfs.py
@@ -32,8 +32,10 @@ from pyarrow.fs import HadoopFileSystem
 from readerwriterlock import rwlock
 from gravitino.api.catalog import Catalog
 from gravitino.api.fileset import Fileset
+from gravitino.auth.simple_auth_provider import SimpleAuthProvider
 from gravitino.client.gravitino_client import GravitinoClient
 from gravitino.exceptions.base import GravitinoRuntimeException
+from gravitino.filesystem.gvfs_config import GVFSConfig
 from gravitino.name_identifier import NameIdentifier
 
 PROTOCOL_NAME = "gvfs"
@@ -94,15 +96,44 @@ class GravitinoVirtualFileSystem(fsspec.AbstractFileSystem):
 
     def __init__(
         self,
-        server_uri=None,
-        metalake_name=None,
-        cache_size=20,
-        cache_expired_time=3600,
+        server_uri: str = None,
+        metalake_name: str = None,
+        options: Dict = None,
         **kwargs,
     ):
+        """Initialize the GravitinoVirtualFileSystem.
+        :param server_uri: Gravitino server URI
+        :param metalake_name: Gravitino metalake name
+        :param options: Options for the GravitinoVirtualFileSystem
+        :param kwargs: Extra args for super filesystem
+        """
         self._metalake = metalake_name
-        self._client = GravitinoClient(
-            uri=server_uri, metalake_name=metalake_name, check_version=False
+        auth_type = (
+            GVFSConfig.DEFAULT_AUTH_TYPE
+            if options is None
+            else options.get(GVFSConfig.AUTH_TYPE, 
GVFSConfig.DEFAULT_AUTH_TYPE)
+        )
+        if auth_type == GVFSConfig.DEFAULT_AUTH_TYPE:
+            self._client = GravitinoClient(
+                uri=server_uri,
+                metalake_name=metalake_name,
+                auth_data_provider=SimpleAuthProvider(),
+            )
+        else:
+            raise GravitinoRuntimeException(
+                f"Authentication type {auth_type} is not supported."
+            )
+        cache_size = (
+            GVFSConfig.DEFAULT_CACHE_SIZE
+            if options is None
+            else options.get(GVFSConfig.CACHE_SIZE, 
GVFSConfig.DEFAULT_CACHE_SIZE)
+        )
+        cache_expired_time = (
+            GVFSConfig.DEFAULT_CACHE_EXPIRED_TIME
+            if options is None
+            else options.get(
+                GVFSConfig.CACHE_EXPIRED_TIME, 
GVFSConfig.DEFAULT_CACHE_EXPIRED_TIME
+            )
         )
         self._cache = TTLCache(maxsize=cache_size, ttl=cache_expired_time)
         self._cache_lock = rwlock.RWLockFair()
diff --git a/clients/client-python/gravitino/filesystem/gvfs_config.py 
b/clients/client-python/gravitino/filesystem/gvfs_config.py
new file mode 100644
index 000000000..539b9045a
--- /dev/null
+++ b/clients/client-python/gravitino/filesystem/gvfs_config.py
@@ -0,0 +1,29 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+"""
+
+
+class GVFSConfig:
+    CACHE_SIZE = "cache_size"
+    DEFAULT_CACHE_SIZE = 20
+
+    CACHE_EXPIRED_TIME = "cache_expired_time"
+    DEFAULT_CACHE_EXPIRED_TIME = 3600
+
+    AUTH_TYPE = "auth_type"
+    DEFAULT_AUTH_TYPE = "simple"
diff --git a/clients/client-python/tests/integration/test_gvfs_with_hdfs.py 
b/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
index 53fde1223..4a86e6c0d 100644
--- a/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
+++ b/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
@@ -17,6 +17,9 @@ specific language governing permissions and limitations
 under the License.
 """
 
+# pylint: disable=protected-access
+
+import base64
 import logging
 import os
 import platform
@@ -40,6 +43,7 @@ from gravitino import (
     Catalog,
     Fileset,
 )
+from gravitino.auth.auth_constants import AuthConstants
 from gravitino.exceptions.base import GravitinoRuntimeException
 from tests.integration.integration_test_env import IntegrationTestEnv
 from tests.integration.hdfs_container import HDFSContainer
@@ -186,6 +190,26 @@ class TestGvfsWithHDFS(IntegrationTestEnv):
         except Exception as e:
             logger.error("Clean test data failed: %s", e)
 
+    def test_simple_auth(self):
+        options = {"auth_type": "simple"}
+        current_user = (
+            None if os.environ.get("user.name") is None else 
os.environ["user.name"]
+        )
+        user = "test_gvfs"
+        os.environ["user.name"] = user
+        fs = gvfs.GravitinoVirtualFileSystem(
+            server_uri="http://localhost:8090";,
+            metalake_name=self.metalake_name,
+            options=options,
+        )
+        token = fs._client._rest_client.auth_data_provider.get_token_data()
+        token_string = base64.b64decode(
+            
token.decode("utf-8")[len(AuthConstants.AUTHORIZATION_BASIC_HEADER) :]
+        ).decode("utf-8")
+        self.assertEqual(f"{user}:dummy", token_string)
+        if current_user is not None:
+            os.environ["user.name"] = current_user
+
     def test_ls(self):
         ls_dir = self.fileset_gvfs_location + "/test_ls"
         ls_actual_dir = self.fileset_storage_location + "/test_ls"
diff --git a/clients/client-python/tests/unittests/test_gvfs_with_local.py 
b/clients/client-python/tests/unittests/test_gvfs_with_local.py
index 61ea004a1..a9a4afb5b 100644
--- a/clients/client-python/tests/unittests/test_gvfs_with_local.py
+++ b/clients/client-python/tests/unittests/test_gvfs_with_local.py
@@ -17,8 +17,10 @@ specific language governing permissions and limitations
 under the License.
 """
 
-# pylint: disable=protected-access
+# pylint: disable=protected-access,too-many-lines
 
+import base64
+import os
 import random
 import string
 import time
@@ -34,6 +36,7 @@ from llama_index.core import SimpleDirectoryReader
 
 from gravitino import gvfs
 from gravitino import NameIdentifier
+from gravitino.auth.auth_constants import AuthConstants
 from gravitino.dto.audit_dto import AuditDTO
 from gravitino.dto.fileset_dto import FilesetDTO
 from gravitino.filesystem.gvfs import FilesetContext, StorageType
@@ -77,12 +80,11 @@ class TestLocalFilesystem(unittest.TestCase):
         fileset_virtual_location = "fileset/fileset_catalog/tmp/test_cache"
         local_fs.mkdir(fileset_storage_location)
         self.assertTrue(local_fs.exists(fileset_storage_location))
-
+        options = {"cache_size": 1, "cache_expired_time": 2}
         fs = gvfs.GravitinoVirtualFileSystem(
             server_uri="http://localhost:9090";,
             metalake_name="metalake_demo",
-            cache_size=1,
-            cache_expired_time=1,
+            options=options,
         )
         self.assertTrue(fs.exists(fileset_virtual_location))
         # wait 2 seconds
@@ -95,6 +97,32 @@ class TestLocalFilesystem(unittest.TestCase):
             )
         )
 
+    @patch(
+        "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset",
+        return_value=mock_base.mock_load_fileset(
+            "test_simple_auth", f"{_fileset_dir}/test_simple_auth"
+        ),
+    )
+    def test_simple_auth(self, mock_method1, mock_method2, mock_method3, 
mock_method4):
+        options = {"auth_type": "simple"}
+        current_user = (
+            None if os.environ.get("user.name") is None else 
os.environ["user.name"]
+        )
+        user = "test_gvfs"
+        os.environ["user.name"] = user
+        fs = gvfs.GravitinoVirtualFileSystem(
+            server_uri="http://localhost:9090";,
+            metalake_name="metalake_demo",
+            options=options,
+        )
+        token = fs._client._rest_client.auth_data_provider.get_token_data()
+        token_string = base64.b64decode(
+            
token.decode("utf-8")[len(AuthConstants.AUTHORIZATION_BASIC_HEADER) :]
+        ).decode("utf-8")
+        self.assertEqual(f"{user}:dummy", token_string)
+        if current_user is not None:
+            os.environ["user.name"] = current_user
+
     @patch(
         "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset",
         return_value=mock_base.mock_load_fileset("test_ls", 
f"{_fileset_dir}/test_ls"),
diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md
index 46e0c1b60..c44c97ea7 100644
--- a/docs/how-to-use-gvfs.md
+++ b/docs/how-to-use-gvfs.md
@@ -335,20 +335,25 @@ to recompile the native libraries like `libhdfs` and 
others, and completely repl
 
 ### Configuration
 
-| Configuration item   | Description                                           
                                                                    | Default 
value | Required | Since version |
-|----------------------|---------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
-| `server_uri`         | The Gravitino server uri, e.g. 
`http://localhost:8090`.                                                        
           | (none)        | Yes      | 0.6.0         |.                        
                                                                                
        | (none)        | Yes                                 | 0.6.0         |
-| `metalake_name`      | The metalake name which the fileset belongs to.       
                                                                    | (none)    
    | Yes      | 0.6.0         |.                                               
                                                                 |  (none)      
  | Yes                                 | 0.6.0         | .                     
          | (none)        | Yes      | 0.6.0         |
-| `cache_size`         | The cache capacity of the Gravitino Virtual File 
System.                                                                  | `20` 
         | No       | 0.6.0         |.                                          
                                                                      |  (none) 
       | Yes                                 | 0.6.0         | .                
               | (none)        | Yes      | 0.6.0         |
-| `cache_expired_time` | The value of time that the cache expires after 
accessing in the Gravitino Virtual File System. The value is in `seconds`. | 
`3600`        | No       | 0.6.0         |.                                     
                                                                           |  
(none)        | Yes                                 | 0.6.0         | .         
                      | (none)        | Yes      | 0.6.0         |
+| Configuration item   | Description                                           
                                                                                
       | Default value | Required | Since version |
+|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
+| `server_uri`         | The Gravitino server uri, e.g. 
`http://localhost:8090`.                                                        
                              | (none)        | Yes      | 0.6.0         |.     
                                                                                
                           | (none)        | Yes                                
 | 0.6.0         |
+| `metalake_name`      | The metalake name which the fileset belongs to.       
                                                                                
       | (none)        | Yes      | 0.6.0         |.                            
                                                                                
    |  (none)        | Yes                                 | 0.6.0         | .  
                             | (none)        | Yes      | 0.6.0         |
+| `cache_size`         | The cache capacity of the Gravitino Virtual File 
System.                                                                         
            | `20`          | No       | 0.6.0         |.                       
                                                                                
         |  (none)        | Yes                                 | 0.6.0         
| .                               | (none)        | Yes      | 0.6.0         |
+| `cache_expired_time` | The value of time that the cache expires after 
accessing in the Gravitino Virtual File System. The value is in `seconds`.      
              | `3600`        | No       | 0.6.0         |.
+| `auth_type`          | The auth type to initialize the Gravitino client to 
use with the Gravitino Virtual File System. Currently only supports `simple` 
auth types. | `simple`      | No       | 0.6.0         |.
 
 
 You can configure these properties when obtaining the `Gravitino Virtual 
FileSystem` in Python like this:
 
 ```python
 from gravitino import gvfs
-
-fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090";, 
metalake_name="test_metalake")
+options = {
+    "cache_size": 20,
+    "cache_expired_time": 3600,
+    "auth_type": "simple"
+}
+fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090";, 
metalake_name="test_metalake", options=options)
 ```
 
 ### Usage examples
@@ -530,3 +535,25 @@ reader = SimpleDirectoryReader(
 documents = reader.load_data()
 print(documents)
 ```
+
+### Authentication
+
+Currently, Gravitino Virtual File System in Python only supports one kind of 
authentication types to access Gravitino server: `simple`.
+
+The type of `simple` is the default authentication type in Gravitino Virtual 
File System in Python.
+
+#### How to use authentication
+
+##### Using `simple` authentication
+
+First, make sure that your Gravitino server is also configured to use the 
`simple` authentication mode.
+
+Then, you can configure the authentication like this:
+
+```python
+from gravitino import gvfs
+
+options = {"auth_type": "simple"}
+fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090";, 
metalake_name="test_metalake", options=options)
+print(fs.ls("gvfs://fileset/fileset_catlaog/tmp/test_fileset"))
+```
\ No newline at end of file

Reply via email to