mchades commented on code in PR #9570:
URL: https://github.com/apache/gravitino/pull/9570#discussion_r2758663986
##########
clients/client-python/gravitino/filesystem/gvfs_storage_handler.py:
##########
@@ -81,7 +81,6 @@ def get_filesystem_with_expiration(
self,
credentials: List[Credential],
catalog_props: Dict[str, str],
- options: Dict[str, str],
Review Comment:
why remove this?
##########
clients/client-python/gravitino/filesystem/gvfs_base_operations.py:
##########
@@ -754,3 +785,116 @@ def _get_fileset(self, fileset_ident: NameIdentifier):
return fileset
finally:
write_lock.release()
+
+ def _get_fileset_schema(self, schema_ident: NameIdentifier):
+ """Get the schema by the schema identifier from the cache or load it
from the server if the cache is disabled.
+ :param schema_ident: The schema identifier
+ :return: The schema
+ """
+ if not self._enable_fileset_metadata_cache:
+ catalog_ident: NameIdentifier = NameIdentifier.of(
+ schema_ident.namespace().level(0),
schema_ident.namespace().level(1)
+ )
+ catalog: FilesetCatalog = self._get_fileset_catalog(catalog_ident)
+ return catalog.as_schemas().load_schema(schema_ident.name())
+
+ read_lock = self._schema_cache_lock.gen_rlock()
+ try:
+ read_lock.acquire()
+ cache_value: Schema = self._schema_cache.get(schema_ident)
+ if cache_value is not None:
+ return cache_value
+ finally:
+ read_lock.release()
+
+ write_lock = self._schema_cache_lock.gen_wlock()
+ try:
+ write_lock.acquire()
+ cache_value: Schema = self._schema_cache.get(schema_ident)
+ if cache_value is not None:
+ return cache_value
+
+ catalog_ident: NameIdentifier = NameIdentifier.of(
+ schema_ident.namespace().level(0),
schema_ident.namespace().level(1)
+ )
+ catalog: FilesetCatalog = self._get_fileset_catalog(catalog_ident)
+ schema = catalog.as_schemas().load_schema(schema_ident.name())
+ self._schema_cache[schema_ident] = schema
+ return schema
+ finally:
+ write_lock.release()
+
+ def _get_base_location(self, actual_location: str) -> str:
+ """Get the base location (scheme + authority) from the actual location
path.
+ :param actual_location: The actual location path (e.g.,
's3://bucket/path')
+ :return: The base location (e.g., 's3://bucket')
+ """
+ parsed_uri = urlparse(actual_location)
+ scheme = parsed_uri.scheme if parsed_uri.scheme else "file"
+ authority = parsed_uri.netloc if parsed_uri.netloc else ""
+ return f"{scheme}://{authority}"
+
+ def _get_user_defined_configs(self, path: str) -> Dict[str, str]:
+ """Get user defined configurations for a specific path based on the
path's base location
+ (scheme://authority).
+
+ The logic:
+ 1. Extract baseLocation (scheme://authority) from the given path
+ 2. Find config entries like "fs.path.config.<name> = <base_location>"
where the
Review Comment:
what if user specified the `base_location` to `scheme://authority/sub_path`?
##########
clients/client-python/gravitino/filesystem/gvfs_config.py:
##########
@@ -85,3 +85,8 @@ class GVFSConfig:
# The configuration key for whether to enable auto-creation of fileset
location when the
# server-side filesystem ops are disabled and the location does not exist.
The default is true.
GVFS_FILESYSTEM_AUTO_CREATE_LOCATION = "auto_create_location"
+
+ # The configuration prefix for user-defined path-specific configurations.
+ # Format: fs.path.config.<base_location>.<property_name>=<property_value>
+ # Example: fs.path.config.s3.aws-access-key=XXX
+ FS_GRAVITINO_PATH_CONFIG_PREFIX = "fs.path.config."
Review Comment:
I don't see any md file updated in this PR
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]