This is an automated email from the ASF dual-hosted git repository.
mchades pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 55314f3f3c [#8948]Improve(gvfs-python): lazy load client in python
gvfs (#8968)
55314f3f3c is described below
commit 55314f3f3cef48add089352f488214b5d873582d
Author: Junda Yang <[email protected]>
AuthorDate: Wed Oct 29 18:54:06 2025 -0700
[#8948]Improve(gvfs-python): lazy load client in python gvfs (#8968)
### What changes were proposed in this pull request?
Implemented lazy loading for GravitinoClient in Python GVFS
(gvfs_base_operations.py):
- Added double-checked locking pattern using threading.Lock() for
thread-safe lazy initialization
- Client parameters are stored in __init__, but client creation is
deferred until first access via _get_gravitino_client()
- Updated _get_fileset_catalog() to call _get_gravitino_client() instead
of direct _client access
### Why are the changes needed?
Fot Python GVFS fallbacks. This is a corresponding Python GVFS change
based on the java gvfs change
https://github.com/apache/gravitino/commit/0e0a19e0266cb0a87a8137bee012602206299756
Fix: #8948
### Does this PR introduce _any_ user-facing change?
Yes, minor behavioral change:
Configuration validation and connection errors are now deferred until
the first filesystem operation (e.g., exists(), ls()) instead of being
thrown during filesystem initialization
### How was this patch tested?
Unit tests updated and added
---
.../gravitino/filesystem/gvfs_base_operations.py | 34 ++++--
.../tests/integration/test_gvfs_with_hdfs.py | 4 +-
.../tests/unittests/test_gvfs_lazy_loading.py | 127 +++++++++++++++++++++
.../tests/unittests/test_gvfs_with_local.py | 24 +++-
4 files changed, 175 insertions(+), 14 deletions(-)
diff --git a/clients/client-python/gravitino/filesystem/gvfs_base_operations.py
b/clients/client-python/gravitino/filesystem/gvfs_base_operations.py
index 57cb646a98..526666ccb3 100644
--- a/clients/client-python/gravitino/filesystem/gvfs_base_operations.py
+++ b/clients/client-python/gravitino/filesystem/gvfs_base_operations.py
@@ -17,6 +17,7 @@
import logging
import os
import sys
+import threading
import time
from abc import ABC, abstractmethod
from pathlib import PurePosixPath
@@ -88,7 +89,9 @@ class BaseGVFSOperations(ABC):
self._metalake = metalake_name
self._options = options
- request_headers = (
+ # Store client parameters for lazy initialization
+ self._server_uri = server_uri
+ self._request_headers = (
None
if options is None
else {
@@ -102,7 +105,7 @@ class BaseGVFSOperations(ABC):
}
)
- client_config = (
+ self._client_config = (
None
if options is None
else {
@@ -115,9 +118,9 @@ class BaseGVFSOperations(ABC):
}
)
- self._client = create_client(
- options, server_uri, metalake_name, request_headers, client_config
- )
+ # Lazy initialization - client is created on first access
+ self._client = None
+ self._client_lock = threading.Lock()
cache_size = (
GVFSConfig.DEFAULT_CACHE_SIZE
@@ -168,6 +171,23 @@ class BaseGVFSOperations(ABC):
self._current_location_name = self._init_current_location_name()
self._kwargs = kwargs
+ def _get_gravitino_client(self):
+ """Get the GravitinoClient, creating it lazily on first access.
+ Uses double-checked locking for thread-safe lazy initialization.
+ :return: The GravitinoClient instance
+ """
+ if self._client is None:
+ with self._client_lock:
+ if self._client is None:
+ self._client = create_client(
+ self._options,
+ self._server_uri,
+ self._metalake,
+ self._request_headers,
+ self._client_config,
+ )
+ return self._client
+
@property
def current_location_name(self):
return self._current_location_name
@@ -549,7 +569,7 @@ class BaseGVFSOperations(ABC):
def _get_fileset_catalog(self, catalog_ident: NameIdentifier):
if not self._enable_fileset_metadata_cache:
- return self._client.load_catalog(catalog_ident.name())
+ return
self._get_gravitino_client().load_catalog(catalog_ident.name())
read_lock = self._catalog_cache_lock.gen_rlock()
try:
@@ -566,7 +586,7 @@ class BaseGVFSOperations(ABC):
cache_value: FilesetCatalog =
self._catalog_cache.get(catalog_ident)
if cache_value is not None:
return cache_value
- catalog = self._client.load_catalog(catalog_ident.name())
+ catalog =
self._get_gravitino_client().load_catalog(catalog_ident.name())
self._catalog_cache[catalog_ident] = catalog
return catalog
finally:
diff --git a/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
b/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
index 922793a545..976e452508 100644
--- a/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
+++ b/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
@@ -270,7 +270,9 @@ class TestGvfsWithHDFS(IntegrationTestEnv):
metalake_name=self.metalake_name,
options=options,
)
- token =
fs._operations._client._rest_client.auth_data_provider.get_token_data()
+ # Trigger lazy client initialization
+ client = fs._operations._get_gravitino_client()
+ token = client._rest_client.auth_data_provider.get_token_data()
token_string = base64.b64decode(
token.decode("utf-8")[len(AuthConstants.AUTHORIZATION_BASIC_HEADER) :]
).decode("utf-8")
diff --git a/clients/client-python/tests/unittests/test_gvfs_lazy_loading.py
b/clients/client-python/tests/unittests/test_gvfs_lazy_loading.py
new file mode 100644
index 0000000000..ffcb70ae1d
--- /dev/null
+++ b/clients/client-python/tests/unittests/test_gvfs_lazy_loading.py
@@ -0,0 +1,127 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import unittest
+from unittest import mock
+
+from gravitino import gvfs
+
+
+class TestGVFSLazyLoading(unittest.TestCase):
+ """
+ Unit tests for lazy loading behavior of GravitinoClient in GVFS.
+
+ These tests verify that the GravitinoClient is created lazily on first
access
+ rather than eagerly during filesystem initialization, improving startup
performance
+ and reducing resource usage when the filesystem is created but never used.
+ """
+
+ def test_client_not_created_during_init(self):
+ """
+ Test that GravitinoClient is not created during filesystem
initialization.
+
+ Verifies the lazy loading behavior - the client should remain None
after
+ the filesystem is constructed, and only be created when first accessed.
+ """
+ fs = gvfs.GravitinoVirtualFileSystem(
+ server_uri="http://localhost:9090",
+ metalake_name="metalake_demo",
+ skip_instance_cache=True,
+ )
+
+ # Client should still be None (not initialized yet)
+ # pylint: disable=protected-access
+ self.assertIsNone(
+ fs._operations._client,
+ "GravitinoClient should not be created during filesystem
initialization",
+ )
+
+ @mock.patch("gravitino.filesystem.gvfs_base_operations.create_client")
+ def test_client_created_on_first_access(self, mock_create_client):
+ """
+ Test that GravitinoClient is created lazily on first access.
+
+ Verifies that the client is initialized when _get_gravitino_client()
is called
+ for the first time, implementing the lazy initialization pattern.
+ """
+ # Mock the client creation to avoid network calls
+ mock_client = mock.MagicMock()
+ mock_create_client.return_value = mock_client
+
+ fs = gvfs.GravitinoVirtualFileSystem(
+ server_uri="http://localhost:9090",
+ metalake_name="metalake_demo",
+ skip_instance_cache=True,
+ )
+
+ # pylint: disable=protected-access
+ # Initially, client should be None
+ self.assertIsNone(fs._operations._client)
+
+ # Access the client - should trigger initialization
+ client = fs._operations._get_gravitino_client()
+
+ # After access, both the returned client and internal _client should
be set
+ self.assertIsNotNone(
+ client, "Client returned from _get_gravitino_client() should not
be None"
+ )
+ self.assertIsNotNone(
+ fs._operations._client,
+ "Internal _client field should be initialized after first access",
+ )
+
+ # Verify create_client was called exactly once
+ mock_create_client.assert_called_once()
+
+ @mock.patch("gravitino.filesystem.gvfs_base_operations.create_client")
+ def test_same_client_instance_returned(self, mock_create_client):
+ """
+ Test that the same GravitinoClient instance is returned on multiple
calls.
+
+ Verifies the singleton pattern - multiple calls to
_get_gravitino_client()
+ should return the exact same object instance, not create new clients.
+ """
+ # Mock the client creation to avoid network calls
+ mock_client = mock.MagicMock()
+ mock_create_client.return_value = mock_client
+
+ fs = gvfs.GravitinoVirtualFileSystem(
+ server_uri="http://localhost:9090",
+ metalake_name="metalake_demo",
+ skip_instance_cache=True,
+ )
+
+ # pylint: disable=protected-access
+ # Get client twice
+ client1 = fs._operations._get_gravitino_client()
+ client2 = fs._operations._get_gravitino_client()
+
+ # Both should be the same instance (use 'is' for identity check)
+ self.assertIs(
+ client1,
+ client2,
+ "Multiple calls to _get_gravitino_client() should return the same
instance",
+ )
+
+ # Also verify it's the same as the internal _client field
+ self.assertIs(
+ client1,
+ fs._operations._client,
+ "Returned client should be the same as the internal _client field",
+ )
+
+ # Verify create_client was called exactly once (not twice)
+ mock_create_client.assert_called_once()
diff --git a/clients/client-python/tests/unittests/test_gvfs_with_local.py
b/clients/client-python/tests/unittests/test_gvfs_with_local.py
index be8ebc447c..d6db08138c 100644
--- a/clients/client-python/tests/unittests/test_gvfs_with_local.py
+++ b/clients/client-python/tests/unittests/test_gvfs_with_local.py
@@ -93,7 +93,9 @@ class TestLocalFilesystem(unittest.TestCase):
options=options,
skip_instance_cache=True,
)
- headers = fs._operations._client._rest_client.request_headers
+ # Trigger lazy client initialization
+ client = fs._operations._get_gravitino_client()
+ headers = client._rest_client.request_headers
self.assertEqual(headers["k1"], "v1")
def test_request_timeout(self, *mock_methods):
@@ -102,7 +104,9 @@ class TestLocalFilesystem(unittest.TestCase):
metalake_name="metalake_demo",
skip_instance_cache=True,
)
- self.assertEqual(fs._operations._client._rest_client.timeout, TIMEOUT)
+ # Trigger lazy client initialization
+ client = fs._operations._get_gravitino_client()
+ self.assertEqual(client._rest_client.timeout, TIMEOUT)
options = {
f"{GVFSConfig.GVFS_FILESYSTEM_CLIENT_CONFIG_PREFIX}request_timeout": 60,
@@ -113,7 +117,9 @@ class TestLocalFilesystem(unittest.TestCase):
options=options,
skip_instance_cache=True,
)
- self.assertEqual(fs._operations._client._rest_client.timeout, 60)
+ # Trigger lazy client initialization
+ client = fs._operations._get_gravitino_client()
+ self.assertEqual(client._rest_client.timeout, 60)
def test_cache(self, *mock_methods):
fileset_storage_location = f"{self._fileset_dir}/test_cache"
@@ -163,7 +169,9 @@ class TestLocalFilesystem(unittest.TestCase):
options=options,
skip_instance_cache=True,
)
- token =
fs._operations._client._rest_client.auth_data_provider.get_token_data()
+ # Trigger lazy client initialization
+ client = fs._operations._get_gravitino_client()
+ token = client._rest_client.auth_data_provider.get_token_data()
token_string = base64.b64decode(
token.decode("utf-8")[len(AuthConstants.AUTHORIZATION_BASIC_HEADER) :]
).decode("utf-8")
@@ -225,12 +233,14 @@ class TestLocalFilesystem(unittest.TestCase):
return_value=mock_authentication_with_error_authentication_type(),
):
with self.assertRaises(IllegalArgumentException):
- gvfs.GravitinoVirtualFileSystem(
+ fs = gvfs.GravitinoVirtualFileSystem(
server_uri=self._server_uri,
metalake_name=self._metalake_name,
options=fs_options,
skip_instance_cache=True,
)
+ # Trigger lazy client initialization to get the exception
+ fs._operations._get_gravitino_client()
# test bad request
with patch(
@@ -238,12 +248,14 @@ class TestLocalFilesystem(unittest.TestCase):
return_value=mock_authentication_invalid_grant_error(),
):
with self.assertRaises(BadRequestException):
- gvfs.GravitinoVirtualFileSystem(
+ fs = gvfs.GravitinoVirtualFileSystem(
server_uri=self._server_uri,
metalake_name=self._metalake_name,
options=fs_options,
skip_instance_cache=True,
)
+ # Trigger lazy client initialization to get the exception
+ fs._operations._get_gravitino_client()
def test_ls(self, *mock_methods):
fileset_storage_location = f"{self._fileset_dir}/test_ls"