This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 0f18b638a [#4000] improvement(client-python): Support simple auth for
PyGVFS (#4001)
0f18b638a is described below
commit 0f18b638a2ce2f946bb8fc50ddfed7e88c1b73e4
Author: xloya <[email protected]>
AuthorDate: Wed Jul 10 13:39:31 2024 +0800
[#4000] improvement(client-python): Support simple auth for PyGVFS (#4001)
### What changes were proposed in this pull request?
Support simple auth for gravitino client in PyGVFS. The integration test
depends on this PR: #3876 #3931 . When #3876 #3931 is merged, I will add
integration tests and docs for this PR.
### Why are the changes needed?
Fix: #4000
### How was this patch tested?
Add UTs and ITs.
---------
Co-authored-by: xiaojiebao <[email protected]>
---
clients/client-python/gravitino/filesystem/gvfs.py | 43 +++++++++++++++++++---
.../gravitino/filesystem/gvfs_config.py | 29 +++++++++++++++
.../tests/integration/test_gvfs_with_hdfs.py | 24 ++++++++++++
.../tests/unittests/test_gvfs_with_local.py | 36 ++++++++++++++++--
docs/how-to-use-gvfs.md | 43 ++++++++++++++++++----
5 files changed, 157 insertions(+), 18 deletions(-)
diff --git a/clients/client-python/gravitino/filesystem/gvfs.py
b/clients/client-python/gravitino/filesystem/gvfs.py
index a50c97f4c..a2b2461b3 100644
--- a/clients/client-python/gravitino/filesystem/gvfs.py
+++ b/clients/client-python/gravitino/filesystem/gvfs.py
@@ -32,8 +32,10 @@ from pyarrow.fs import HadoopFileSystem
from readerwriterlock import rwlock
from gravitino.api.catalog import Catalog
from gravitino.api.fileset import Fileset
+from gravitino.auth.simple_auth_provider import SimpleAuthProvider
from gravitino.client.gravitino_client import GravitinoClient
from gravitino.exceptions.base import GravitinoRuntimeException
+from gravitino.filesystem.gvfs_config import GVFSConfig
from gravitino.name_identifier import NameIdentifier
PROTOCOL_NAME = "gvfs"
@@ -94,15 +96,44 @@ class GravitinoVirtualFileSystem(fsspec.AbstractFileSystem):
def __init__(
self,
- server_uri=None,
- metalake_name=None,
- cache_size=20,
- cache_expired_time=3600,
+ server_uri: str = None,
+ metalake_name: str = None,
+ options: Dict = None,
**kwargs,
):
+ """Initialize the GravitinoVirtualFileSystem.
+ :param server_uri: Gravitino server URI
+ :param metalake_name: Gravitino metalake name
+ :param options: Options for the GravitinoVirtualFileSystem
+ :param kwargs: Extra args for super filesystem
+ """
self._metalake = metalake_name
- self._client = GravitinoClient(
- uri=server_uri, metalake_name=metalake_name, check_version=False
+ auth_type = (
+ GVFSConfig.DEFAULT_AUTH_TYPE
+ if options is None
+ else options.get(GVFSConfig.AUTH_TYPE,
GVFSConfig.DEFAULT_AUTH_TYPE)
+ )
+ if auth_type == GVFSConfig.DEFAULT_AUTH_TYPE:
+ self._client = GravitinoClient(
+ uri=server_uri,
+ metalake_name=metalake_name,
+ auth_data_provider=SimpleAuthProvider(),
+ )
+ else:
+ raise GravitinoRuntimeException(
+ f"Authentication type {auth_type} is not supported."
+ )
+ cache_size = (
+ GVFSConfig.DEFAULT_CACHE_SIZE
+ if options is None
+ else options.get(GVFSConfig.CACHE_SIZE,
GVFSConfig.DEFAULT_CACHE_SIZE)
+ )
+ cache_expired_time = (
+ GVFSConfig.DEFAULT_CACHE_EXPIRED_TIME
+ if options is None
+ else options.get(
+ GVFSConfig.CACHE_EXPIRED_TIME,
GVFSConfig.DEFAULT_CACHE_EXPIRED_TIME
+ )
)
self._cache = TTLCache(maxsize=cache_size, ttl=cache_expired_time)
self._cache_lock = rwlock.RWLockFair()
diff --git a/clients/client-python/gravitino/filesystem/gvfs_config.py
b/clients/client-python/gravitino/filesystem/gvfs_config.py
new file mode 100644
index 000000000..539b9045a
--- /dev/null
+++ b/clients/client-python/gravitino/filesystem/gvfs_config.py
@@ -0,0 +1,29 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+"""
+
+
+class GVFSConfig:
+ CACHE_SIZE = "cache_size"
+ DEFAULT_CACHE_SIZE = 20
+
+ CACHE_EXPIRED_TIME = "cache_expired_time"
+ DEFAULT_CACHE_EXPIRED_TIME = 3600
+
+ AUTH_TYPE = "auth_type"
+ DEFAULT_AUTH_TYPE = "simple"
diff --git a/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
b/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
index 53fde1223..4a86e6c0d 100644
--- a/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
+++ b/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
@@ -17,6 +17,9 @@ specific language governing permissions and limitations
under the License.
"""
+# pylint: disable=protected-access
+
+import base64
import logging
import os
import platform
@@ -40,6 +43,7 @@ from gravitino import (
Catalog,
Fileset,
)
+from gravitino.auth.auth_constants import AuthConstants
from gravitino.exceptions.base import GravitinoRuntimeException
from tests.integration.integration_test_env import IntegrationTestEnv
from tests.integration.hdfs_container import HDFSContainer
@@ -186,6 +190,26 @@ class TestGvfsWithHDFS(IntegrationTestEnv):
except Exception as e:
logger.error("Clean test data failed: %s", e)
+ def test_simple_auth(self):
+ options = {"auth_type": "simple"}
+ current_user = (
+ None if os.environ.get("user.name") is None else
os.environ["user.name"]
+ )
+ user = "test_gvfs"
+ os.environ["user.name"] = user
+ fs = gvfs.GravitinoVirtualFileSystem(
+ server_uri="http://localhost:8090",
+ metalake_name=self.metalake_name,
+ options=options,
+ )
+ token = fs._client._rest_client.auth_data_provider.get_token_data()
+ token_string = base64.b64decode(
+
token.decode("utf-8")[len(AuthConstants.AUTHORIZATION_BASIC_HEADER) :]
+ ).decode("utf-8")
+ self.assertEqual(f"{user}:dummy", token_string)
+ if current_user is not None:
+ os.environ["user.name"] = current_user
+
def test_ls(self):
ls_dir = self.fileset_gvfs_location + "/test_ls"
ls_actual_dir = self.fileset_storage_location + "/test_ls"
diff --git a/clients/client-python/tests/unittests/test_gvfs_with_local.py
b/clients/client-python/tests/unittests/test_gvfs_with_local.py
index 61ea004a1..a9a4afb5b 100644
--- a/clients/client-python/tests/unittests/test_gvfs_with_local.py
+++ b/clients/client-python/tests/unittests/test_gvfs_with_local.py
@@ -17,8 +17,10 @@ specific language governing permissions and limitations
under the License.
"""
-# pylint: disable=protected-access
+# pylint: disable=protected-access,too-many-lines
+import base64
+import os
import random
import string
import time
@@ -34,6 +36,7 @@ from llama_index.core import SimpleDirectoryReader
from gravitino import gvfs
from gravitino import NameIdentifier
+from gravitino.auth.auth_constants import AuthConstants
from gravitino.dto.audit_dto import AuditDTO
from gravitino.dto.fileset_dto import FilesetDTO
from gravitino.filesystem.gvfs import FilesetContext, StorageType
@@ -77,12 +80,11 @@ class TestLocalFilesystem(unittest.TestCase):
fileset_virtual_location = "fileset/fileset_catalog/tmp/test_cache"
local_fs.mkdir(fileset_storage_location)
self.assertTrue(local_fs.exists(fileset_storage_location))
-
+ options = {"cache_size": 1, "cache_expired_time": 2}
fs = gvfs.GravitinoVirtualFileSystem(
server_uri="http://localhost:9090",
metalake_name="metalake_demo",
- cache_size=1,
- cache_expired_time=1,
+ options=options,
)
self.assertTrue(fs.exists(fileset_virtual_location))
# wait 2 seconds
@@ -95,6 +97,32 @@ class TestLocalFilesystem(unittest.TestCase):
)
)
+ @patch(
+ "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset",
+ return_value=mock_base.mock_load_fileset(
+ "test_simple_auth", f"{_fileset_dir}/test_simple_auth"
+ ),
+ )
+ def test_simple_auth(self, mock_method1, mock_method2, mock_method3,
mock_method4):
+ options = {"auth_type": "simple"}
+ current_user = (
+ None if os.environ.get("user.name") is None else
os.environ["user.name"]
+ )
+ user = "test_gvfs"
+ os.environ["user.name"] = user
+ fs = gvfs.GravitinoVirtualFileSystem(
+ server_uri="http://localhost:9090",
+ metalake_name="metalake_demo",
+ options=options,
+ )
+ token = fs._client._rest_client.auth_data_provider.get_token_data()
+ token_string = base64.b64decode(
+
token.decode("utf-8")[len(AuthConstants.AUTHORIZATION_BASIC_HEADER) :]
+ ).decode("utf-8")
+ self.assertEqual(f"{user}:dummy", token_string)
+ if current_user is not None:
+ os.environ["user.name"] = current_user
+
@patch(
"gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset",
return_value=mock_base.mock_load_fileset("test_ls",
f"{_fileset_dir}/test_ls"),
diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md
index 46e0c1b60..c44c97ea7 100644
--- a/docs/how-to-use-gvfs.md
+++ b/docs/how-to-use-gvfs.md
@@ -335,20 +335,25 @@ to recompile the native libraries like `libhdfs` and
others, and completely repl
### Configuration
-| Configuration item | Description
| Default
value | Required | Since version |
-|----------------------|---------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
-| `server_uri` | The Gravitino server uri, e.g.
`http://localhost:8090`.
| (none) | Yes | 0.6.0 |.
| (none) | Yes | 0.6.0 |
-| `metalake_name` | The metalake name which the fileset belongs to.
| (none)
| Yes | 0.6.0 |.
| (none)
| Yes | 0.6.0 | .
| (none) | Yes | 0.6.0 |
-| `cache_size` | The cache capacity of the Gravitino Virtual File
System. | `20`
| No | 0.6.0 |.
| (none)
| Yes | 0.6.0 | .
| (none) | Yes | 0.6.0 |
-| `cache_expired_time` | The value of time that the cache expires after
accessing in the Gravitino Virtual File System. The value is in `seconds`. |
`3600` | No | 0.6.0 |.
|
(none) | Yes | 0.6.0 | .
| (none) | Yes | 0.6.0 |
+| Configuration item | Description
| Default value | Required | Since version |
+|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
+| `server_uri` | The Gravitino server uri, e.g.
`http://localhost:8090`.
| (none) | Yes | 0.6.0 |.
| (none) | Yes
| 0.6.0 |
+| `metalake_name` | The metalake name which the fileset belongs to.
| (none) | Yes | 0.6.0 |.
| (none) | Yes | 0.6.0 | .
| (none) | Yes | 0.6.0 |
+| `cache_size` | The cache capacity of the Gravitino Virtual File
System.
| `20` | No | 0.6.0 |.
| (none) | Yes | 0.6.0
| . | (none) | Yes | 0.6.0 |
+| `cache_expired_time` | The value of time that the cache expires after
accessing in the Gravitino Virtual File System. The value is in `seconds`.
| `3600` | No | 0.6.0 |.
+| `auth_type` | The auth type to initialize the Gravitino client to
use with the Gravitino Virtual File System. Currently only supports `simple`
auth types. | `simple` | No | 0.6.0 |.
You can configure these properties when obtaining the `Gravitino Virtual
FileSystem` in Python like this:
```python
from gravitino import gvfs
-
-fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090",
metalake_name="test_metalake")
+options = {
+ "cache_size": 20,
+ "cache_expired_time": 3600,
+ "auth_type": "simple"
+}
+fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090",
metalake_name="test_metalake", options=options)
```
### Usage examples
@@ -530,3 +535,25 @@ reader = SimpleDirectoryReader(
documents = reader.load_data()
print(documents)
```
+
+### Authentication
+
+Currently, Gravitino Virtual File System in Python only supports one kind of
authentication types to access Gravitino server: `simple`.
+
+The type of `simple` is the default authentication type in Gravitino Virtual
File System in Python.
+
+#### How to use authentication
+
+##### Using `simple` authentication
+
+First, make sure that your Gravitino server is also configured to use the
`simple` authentication mode.
+
+Then, you can configure the authentication like this:
+
+```python
+from gravitino import gvfs
+
+options = {"auth_type": "simple"}
+fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090",
metalake_name="test_metalake", options=options)
+print(fs.ls("gvfs://fileset/fileset_catlaog/tmp/test_fileset"))
+```
\ No newline at end of file