FANNG1 commented on code in PR #5997:
URL: https://github.com/apache/gravitino/pull/5997#discussion_r1904234764
##########
clients/client-python/gravitino/filesystem/gvfs.py:
##########
@@ -946,13 +1048,54 @@ def _get_s3_filesystem(self):
"AWS endpoint url is not found in the options."
)
- return importlib.import_module("s3fs").S3FileSystem(
- key=aws_access_key_id,
- secret=aws_secret_access_key,
- endpoint_url=aws_endpoint_url,
+ return (
+ sys.maxsize,
+ importlib.import_module("s3fs").S3FileSystem(
+ key=aws_access_key_id,
+ secret=aws_secret_access_key,
+ endpoint_url=aws_endpoint_url,
Review Comment:
according to the pydoc, `aws_endpoint_url` seems not required?
```
endpoint_url : string (None)
Use this endpoint_url, if specified. Needed for connecting to non-AWS
S3 buckets. Takes precedence over `endpoint_url` in client_kwargs.
```
##########
clients/client-python/gravitino/filesystem/gvfs.py:
##########
@@ -946,13 +1048,54 @@ def _get_s3_filesystem(self):
"AWS endpoint url is not found in the options."
)
- return importlib.import_module("s3fs").S3FileSystem(
- key=aws_access_key_id,
- secret=aws_secret_access_key,
- endpoint_url=aws_endpoint_url,
+ return (
+ sys.maxsize,
+ importlib.import_module("s3fs").S3FileSystem(
+ key=aws_access_key_id,
+ secret=aws_secret_access_key,
+ endpoint_url=aws_endpoint_url,
+ ),
)
- def _get_oss_filesystem(self):
+ def _get_oss_filesystem(self, fileset_catalog: Catalog, identifier:
NameIdentifier):
+ # Can get credential from the fileset
+ try:
+ fileset: GenericFileset =
fileset_catalog.as_fileset_catalog().load_fileset(
+ NameIdentifier.of(identifier.namespace().level(2),
identifier.name())
+ )
+ credentials = fileset.support_credentials().get_credentials()
+ except (NoSuchCredentialException, CatalogNotInUseException) as e:
+ logger.warning("Failed to get credentials from fileset: %s", e)
+ credentials = []
+
+ credential = self._get_most_suitable_oss_credential(credentials)
+ if credential is not None:
+ oss_endpoint = fileset_catalog.properties()["oss-endpoint"]
+ expire_time =
self._get_expire_time_by_ratio(credential.expire_time_in_ms())
+ if isinstance(credential, OSSTokenCredential):
+ fs = importlib.import_module("ossfs").OSSFileSystem(
+ key=credential.access_key_id(),
+ secret=credential.secret_access_key(),
+ token=credential.security_token(),
+ endpoint=oss_endpoint,
+ )
+ return (expire_time, fs)
+ if isinstance(credential, OSSSecretKeyCredential):
+ return (
+ expire_time,
+ importlib.import_module("ossfs").OSSFileSystem(
+ key=credential.access_key_id(),
+ secret=credential.secret_access_key(),
+ endpoint=oss_endpoint,
+ ),
+ )
+
+ oss_endpoint_url =
self._options.get(GVFSConfig.GVFS_FILESYSTEM_OSS_ENDPOINT)
Review Comment:
what's the difference of `oss_endpoint_url ` and `oss_endpoint`?
##########
clients/client-python/gravitino/filesystem/gvfs.py:
##########
@@ -946,13 +1048,54 @@ def _get_s3_filesystem(self):
"AWS endpoint url is not found in the options."
)
- return importlib.import_module("s3fs").S3FileSystem(
- key=aws_access_key_id,
- secret=aws_secret_access_key,
- endpoint_url=aws_endpoint_url,
+ return (
+ sys.maxsize,
+ importlib.import_module("s3fs").S3FileSystem(
+ key=aws_access_key_id,
+ secret=aws_secret_access_key,
+ endpoint_url=aws_endpoint_url,
+ ),
)
- def _get_oss_filesystem(self):
+ def _get_oss_filesystem(self, fileset_catalog: Catalog, identifier:
NameIdentifier):
+ # Can get credential from the fileset
+ try:
+ fileset: GenericFileset =
fileset_catalog.as_fileset_catalog().load_fileset(
+ NameIdentifier.of(identifier.namespace().level(2),
identifier.name())
+ )
+ credentials = fileset.support_credentials().get_credentials()
+ except (NoSuchCredentialException, CatalogNotInUseException) as e:
+ logger.warning("Failed to get credentials from fileset: %s", e)
+ credentials = []
+
+ credential = self._get_most_suitable_oss_credential(credentials)
+ if credential is not None:
+ oss_endpoint = fileset_catalog.properties()["oss-endpoint"]
Review Comment:
fileset_catalog.properties().get("oss-endpoint", None)?
##########
clients/client-python/gravitino/filesystem/gvfs.py:
##########
@@ -1001,10 +1173,58 @@ def _get_abs_filesystem(self):
"ABS account key is not found in the options."
)
- return importlib.import_module("adlfs").AzureBlobFileSystem(
- account_name=abs_account_name,
- account_key=abs_account_key,
+ return (
+ sys.maxsize,
+ importlib.import_module("adlfs").AzureBlobFileSystem(
+ account_name=abs_account_name,
+ account_key=abs_account_key,
+ ),
)
+ def _get_most_suitable_s3_credential(self, credentials: List[Credential]):
+ for credential in credentials:
+ # Prefer to use the token credential, if
Review Comment:
please finish the comment
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]