This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new ce22323f3 [#3758] Improvement(PyGVFS): Support OAuth2 authentication
in Python GVFS (#5030)
ce22323f3 is described below
commit ce22323f38c5e0e6331c1d11bd1c1c5ddcef1425
Author: xloya <[email protected]>
AuthorDate: Wed Oct 9 14:16:16 2024 +0800
[#3758] Improvement(PyGVFS): Support OAuth2 authentication in Python GVFS
(#5030)
### What changes were proposed in this pull request?
Support OAuth2 authentication in PyGVFS so that it can interact with the
Gravitino server. This is supported in Java GVFS and we should also
support it in PyGVFS. This is depended on
https://github.com/apache/gravitino/pull/5026 which refactor the Python
GVFS.
### Why are the changes needed?
Fix: #3758
### How was this patch tested?
Add some UTs.
---
clients/client-python/gravitino/filesystem/gvfs.py | 46 +++++++++++-
.../gravitino/filesystem/gvfs_config.py | 8 ++-
.../tests/unittests/test_gvfs_with_local.py | 81 +++++++++++++++++++++-
docs/how-to-use-gvfs.md | 43 +++++++++---
4 files changed, 164 insertions(+), 14 deletions(-)
diff --git a/clients/client-python/gravitino/filesystem/gvfs.py
b/clients/client-python/gravitino/filesystem/gvfs.py
index 8d98d0a04..e5a565ce0 100644
--- a/clients/client-python/gravitino/filesystem/gvfs.py
+++ b/clients/client-python/gravitino/filesystem/gvfs.py
@@ -32,6 +32,8 @@ from gravitino.audit.caller_context import CallerContext,
CallerContextHolder
from gravitino.audit.fileset_audit_constants import FilesetAuditConstants
from gravitino.audit.fileset_data_operation import FilesetDataOperation
from gravitino.audit.internal_client_type import InternalClientType
+from gravitino.auth.default_oauth2_token_provider import
DefaultOAuth2TokenProvider
+from gravitino.auth.oauth2_token_provider import OAuth2TokenProvider
from gravitino.auth.simple_auth_provider import SimpleAuthProvider
from gravitino.catalog.fileset_catalog import FilesetCatalog
from gravitino.client.gravitino_client import GravitinoClient
@@ -92,16 +94,41 @@ class GravitinoVirtualFileSystem(fsspec.AbstractFileSystem):
"""
self._metalake = metalake_name
auth_type = (
- GVFSConfig.DEFAULT_AUTH_TYPE
+ GVFSConfig.SIMPLE_AUTH_TYPE
if options is None
- else options.get(GVFSConfig.AUTH_TYPE,
GVFSConfig.DEFAULT_AUTH_TYPE)
+ else options.get(GVFSConfig.AUTH_TYPE, GVFSConfig.SIMPLE_AUTH_TYPE)
)
- if auth_type == GVFSConfig.DEFAULT_AUTH_TYPE:
+ if auth_type == GVFSConfig.SIMPLE_AUTH_TYPE:
self._client = GravitinoClient(
uri=server_uri,
metalake_name=metalake_name,
auth_data_provider=SimpleAuthProvider(),
)
+ elif auth_type == GVFSConfig.OAUTH2_AUTH_TYPE:
+ oauth2_server_uri = options.get(GVFSConfig.OAUTH2_SERVER_URI)
+ self._check_auth_config(
+ auth_type, GVFSConfig.OAUTH2_SERVER_URI, oauth2_server_uri
+ )
+
+ oauth2_credential = options.get(GVFSConfig.OAUTH2_CREDENTIAL)
+ self._check_auth_config(
+ auth_type, GVFSConfig.OAUTH2_CREDENTIAL, oauth2_credential
+ )
+
+ oauth2_path = options.get(GVFSConfig.OAUTH2_PATH)
+ self._check_auth_config(auth_type, GVFSConfig.OAUTH2_PATH,
oauth2_path)
+
+ oauth2_scope = options.get(GVFSConfig.OAUTH2_SCOPE)
+ self._check_auth_config(auth_type, GVFSConfig.OAUTH2_SCOPE,
oauth2_scope)
+
+ oauth2_token_provider: OAuth2TokenProvider =
DefaultOAuth2TokenProvider(
+ oauth2_server_uri, oauth2_credential, oauth2_path, oauth2_scope
+ )
+ self._client = GravitinoClient(
+ uri=server_uri,
+ metalake_name=metalake_name,
+ auth_data_provider=oauth2_token_provider,
+ )
else:
raise GravitinoRuntimeException(
f"Authentication type {auth_type} is not supported."
@@ -686,6 +713,19 @@ class
GravitinoVirtualFileSystem(fsspec.AbstractFileSystem):
f"Storage type:{storage_type} doesn't support now."
)
+ @staticmethod
+ def _check_auth_config(auth_type: str, config_key: str, config_value: str):
+ """Check if the config value is null.
+ :param auth_type: The auth type
+ :param config_key: The config key
+ :param config_value: The config value
+ """
+ if config_value is None:
+ raise GravitinoRuntimeException(
+ f"{config_key} should not be null"
+ f" if {GVFSConfig.AUTH_TYPE} is set to {auth_type}."
+ )
+
def _get_fileset_catalog(self, catalog_ident: NameIdentifier):
read_lock = self._catalog_cache_lock.gen_rlock()
try:
diff --git a/clients/client-python/gravitino/filesystem/gvfs_config.py
b/clients/client-python/gravitino/filesystem/gvfs_config.py
index be072a357..eb5733b56 100644
--- a/clients/client-python/gravitino/filesystem/gvfs_config.py
+++ b/clients/client-python/gravitino/filesystem/gvfs_config.py
@@ -24,4 +24,10 @@ class GVFSConfig:
DEFAULT_CACHE_EXPIRED_TIME = 3600
AUTH_TYPE = "auth_type"
- DEFAULT_AUTH_TYPE = "simple"
+ SIMPLE_AUTH_TYPE = "simple"
+
+ OAUTH2_AUTH_TYPE = "oauth2"
+ OAUTH2_SERVER_URI = "oauth2_server_uri"
+ OAUTH2_CREDENTIAL = "oauth2_credential"
+ OAUTH2_PATH = "oauth2_path"
+ OAUTH2_SCOPE = "oauth2_scope"
diff --git a/clients/client-python/tests/unittests/test_gvfs_with_local.py
b/clients/client-python/tests/unittests/test_gvfs_with_local.py
index 22bdccd8c..b4ce39e57 100644
--- a/clients/client-python/tests/unittests/test_gvfs_with_local.py
+++ b/clients/client-python/tests/unittests/test_gvfs_with_local.py
@@ -34,9 +34,19 @@ from fsspec.implementations.local import LocalFileSystem
from gravitino import gvfs, NameIdentifier
from gravitino.auth.auth_constants import AuthConstants
-from gravitino.exceptions.base import GravitinoRuntimeException
+from gravitino.exceptions.base import (
+ GravitinoRuntimeException,
+ IllegalArgumentException,
+ BadRequestException,
+)
from gravitino.filesystem.gvfs_config import GVFSConfig
from tests.unittests import mock_base
+from tests.unittests.auth.mock_base import (
+ mock_jwt,
+ GENERATED_TIME,
+ mock_authentication_with_error_authentication_type,
+ mock_authentication_invalid_grant_error,
+)
def generate_unique_random_string(length):
@@ -106,6 +116,75 @@ class TestLocalFilesystem(unittest.TestCase):
if current_user is not None:
os.environ["user.name"] = current_user
+ def test_oauth2_auth(self, *mock_methods):
+ fs_options = {
+ GVFSConfig.AUTH_TYPE: GVFSConfig.OAUTH2_AUTH_TYPE,
+ GVFSConfig.OAUTH2_SERVER_URI: "http://127.0.0.1:1082",
+ GVFSConfig.OAUTH2_CREDENTIAL: "xx:xx",
+ GVFSConfig.OAUTH2_SCOPE: "test",
+ GVFSConfig.OAUTH2_PATH: "token/test",
+ }
+ # test auth normally
+ mocked_jwt = mock_jwt(
+ sub="gravitino", exp=GENERATED_TIME + 10000, aud="service1"
+ )
+ with patch(
+
"gravitino.auth.default_oauth2_token_provider.DefaultOAuth2TokenProvider._get_access_token",
+ return_value=mocked_jwt,
+ ), patch(
+
"gravitino.auth.default_oauth2_token_provider.DefaultOAuth2TokenProvider._fetch_token",
+ return_value=mocked_jwt,
+ ):
+ fileset_storage_location = f"{self._fileset_dir}/test_oauth2_auth"
+ fileset_virtual_location =
"fileset/fileset_catalog/tmp/test_oauth2_auth"
+ actual_path = fileset_storage_location
+ with patch(
+
"gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location",
+ return_value=actual_path,
+ ):
+ local_fs = LocalFileSystem()
+ local_fs.mkdir(fileset_storage_location)
+ sub_dir_path = f"{fileset_storage_location}/test_1"
+ local_fs.mkdir(sub_dir_path)
+ self.assertTrue(local_fs.exists(sub_dir_path))
+ sub_file_path = f"{fileset_storage_location}/test_file_1.par"
+ local_fs.touch(sub_file_path)
+ self.assertTrue(local_fs.exists(sub_file_path))
+ fs = gvfs.GravitinoVirtualFileSystem(
+ server_uri="http://localhost:9090",
+ metalake_name="metalake_demo",
+ options=fs_options,
+ skip_instance_cache=True,
+ )
+ # should not raise exception
+ self.assertTrue(fs.exists(fileset_virtual_location))
+
+ # test error authentication type
+ with patch(
+ "gravitino.utils.http_client.HTTPClient.post_form",
+ return_value=mock_authentication_with_error_authentication_type(),
+ ):
+ with self.assertRaises(IllegalArgumentException):
+ gvfs.GravitinoVirtualFileSystem(
+ server_uri="http://localhost:9090",
+ metalake_name="metalake_demo",
+ options=fs_options,
+ skip_instance_cache=True,
+ )
+
+ # test bad request
+ with patch(
+ "gravitino.utils.http_client.HTTPClient._make_request",
+ return_value=mock_authentication_invalid_grant_error(),
+ ):
+ with self.assertRaises(BadRequestException):
+ gvfs.GravitinoVirtualFileSystem(
+ server_uri="http://localhost:9090",
+ metalake_name="metalake_demo",
+ options=fs_options,
+ skip_instance_cache=True,
+ )
+
def test_ls(self, *mock_methods):
fileset_storage_location = f"{self._fileset_dir}/test_ls"
fileset_virtual_location = "fileset/fileset_catalog/tmp/test_ls"
diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md
index 3a116928a..7a98271d4 100644
--- a/docs/how-to-use-gvfs.md
+++ b/docs/how-to-use-gvfs.md
@@ -10,7 +10,7 @@ license: "This software is licensed under the Apache License
version 2."
directories, with `fileset` you can manage non-tabular data through Gravitino.
For
details, you can read [How to manage fileset metadata using
Gravitino](./manage-fileset-metadata-using-gravitino.md).
-To use `Fileset` managed by Gravitino, Gravitino provides a virtual file
system layer called
+To use `fileset` managed by Gravitino, Gravitino provides a virtual file
system layer called
the Gravitino Virtual File System (GVFS):
* In Java, it's built on top of the Hadoop Compatible File System(HCFS)
interface.
* In Python, it's built on top of the
[fsspec](https://filesystem-spec.readthedocs.io/en/stable/index.html)
@@ -335,13 +335,17 @@ to recompile the native libraries like `libhdfs` and
others, and completely repl
### Configuration
-| Configuration item | Description
| Default value | Required | Since version |
-|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
-| `server_uri` | The Gravitino server uri, e.g.
`http://localhost:8090`.
| (none) | Yes | 0.6.0 |.
| (none) | Yes
| 0.6.0 |
-| `metalake_name` | The metalake name which the fileset belongs to.
| (none) | Yes | 0.6.0 |.
| (none) | Yes | 0.6.0 | .
| (none) | Yes | 0.6.0 |
-| `cache_size` | The cache capacity of the Gravitino Virtual File
System.
| `20` | No | 0.6.0 |.
| (none) | Yes | 0.6.0
| . | (none) | Yes | 0.6.0 |
-| `cache_expired_time` | The value of time that the cache expires after
accessing in the Gravitino Virtual File System. The value is in `seconds`.
| `3600` | No | 0.6.0 |.
-| `auth_type` | The auth type to initialize the Gravitino client to
use with the Gravitino Virtual File System. Currently only supports `simple`
auth types. | `simple` | No | 0.6.0 |.
+| Configuration item | Description
| Default value | Required | Since
version |
+|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------|---------------|
+| `server_uri` | The Gravitino server uri, e.g.
`http://localhost:8090`.
| (none) | Yes
| 0.6.0 |.
| (none)
| Yes | 0.6.0 |
+| `metalake_name` | The metalake name which the fileset belongs to.
| (none) | Yes | 0.6.0
|.
| (none) | Yes
| 0.6.0 | . | (none)
| Yes [...]
+| `cache_size` | The cache capacity of the Gravitino Virtual File
System.
| `20` | No |
0.6.0 |.
| (none) | Yes
| 0.6.0 | . |
(none) | Yes [...]
+| `cache_expired_time` | The value of time that the cache expires after
accessing in the Gravitino Virtual File System. The value is in `seconds`.
| `3600` | No |
0.6.0 |.
+| `auth_type` | The auth type to initialize the Gravitino client to
use with the Gravitino Virtual File System. Currently supports `simple` and
`oauth2` auth types. | `simple` | No |
0.6.0 |.
+| `oauth2_server_uri` | The auth server URI for the Gravitino client when
using `oauth2` auth type.
| (none) | Yes if you use `oauth2` auth type |
0.7.0 |.
+| `oauth2_credential` | The auth credential for the Gravitino client when
using `oauth2` auth type.
| (none) | Yes if you use `oauth2` auth type |
0.7.0 |.
+| `oauth2_path` | The auth server path for the Gravitino client when
using `oauth2` auth type. Please remove the first slash `/` from the path, for
example `oauth/token`. | (none) | Yes if you use `oauth2` auth type |
0.7.0 |.
+| `oauth2_scope` | The auth scope for the Gravitino client when using
`oauth2` auth type with the Gravitino Virtual File System.
| (none) | Yes if you use `oauth2` auth type |
0.7.0 |.
You can configure these properties when obtaining the `Gravitino Virtual
FileSystem` in Python like this:
@@ -538,7 +542,7 @@ print(documents)
### Authentication
-Currently, Gravitino Virtual File System in Python only supports one kind of
authentication types to access Gravitino server: `simple`.
+Currently, Gravitino Virtual File System in Python supports two kinds of
authentication types to access Gravitino server: `simple` and `oauth2`.
The type of `simple` is the default authentication type in Gravitino Virtual
File System in Python.
@@ -556,4 +560,25 @@ from gravitino import gvfs
options = {"auth_type": "simple"}
fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090",
metalake_name="test_metalake", options=options)
print(fs.ls("gvfs://fileset/fileset_catlaog/tmp/test_fileset"))
+```
+
+##### Using `OAuth` authentication
+
+First, make sure that your Gravitino server is also configured to use the
`oauth2` authentication mode,
+and you have an OAuth server to fetch the token:
[Security](security/security.md).
+
+Then, you can configure the authentication like this:
+
+```python
+from gravitino import gvfs
+
+options = {
+ GVFSConfig.AUTH_TYPE: GVFSConfig.OAUTH2_AUTH_TYPE,
+ GVFSConfig.OAUTH2_SERVER_URI: "http://127.0.0.1:1082",
+ GVFSConfig.OAUTH2_CREDENTIAL: "xx:xx",
+ GVFSConfig.OAUTH2_SCOPE: "test",
+ GVFSConfig.OAUTH2_PATH: "token/test",
+}
+fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090",
metalake_name="test_metalake", options=options)
+print(fs.ls("gvfs://fileset/fileset_catlaog/tmp/test_fileset"))
```
\ No newline at end of file