yuqi1129 commented on code in PR #5209:
URL: https://github.com/apache/gravitino/pull/5209#discussion_r1811654523
##########
clients/client-python/gravitino/filesystem/gvfs.py:
##########
@@ -819,5 +840,40 @@ def _get_gcs_filesystem(self):
return importlib.import_module("pyarrow.fs").GcsFileSystem()
+ def _get_s3_filesystem(self):
+ # get All keys from the options that start with 'gravitino.bypass.s3.'
and remove the prefix
+ s3_options = {
+ key[len(GVFSConfig.GVFS_FILESYSTEM_BY_PASS_S3) :]: value
+ for key, value in self._options.items()
+ if key.startswith(GVFSConfig.GVFS_FILESYSTEM_BY_PASS_S3)
+ }
+
+ # get 'aws_access_key_id' from s3_options, if the key is not found,
throw an exception
+ aws_access_key_id =
s3_options.get(GVFSConfig.GVFS_FILESYSTEM_S3_ACCESS_KEY)
+ if aws_access_key_id is None:
+ raise GravitinoRuntimeException(
+ "AWS access key id is not found in the options."
+ )
+
+ # get 'aws_secret_access_key' from s3_options, if the key is not
found, throw an exception
+ aws_secret_access_key =
s3_options.get(GVFSConfig.GVFS_FILESYSTEM_S3_SECRET_KEY)
+ if aws_secret_access_key is None:
+ raise GravitinoRuntimeException(
+ "AWS secret access key is not found in the options."
+ )
+
+ # get 'aws_endpoint_url' from s3_options, if the key is not found,
throw an exception
+ aws_endpoint_url =
s3_options.get(GVFSConfig.GVFS_FILESYSTEM_S3_ENDPOINT)
+ if aws_endpoint_url is None:
+ raise GravitinoRuntimeException(
+ "AWS endpoint url is not found in the options."
+ )
+
+ return importlib.import_module("pyarrow.fs").S3FileSystem(
Review Comment:
> My advice is not to be restricted by the current selection. We should make
the best choice in terms of performance and interface adaptability.
I agree with this point and I also noticed that the filesystem that Pyarrow
supports is very limited. Due to time limitations, I have not completed a
comprehensive survey about it. thanks for your suggestion, I will modify the
code accordingly.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]