This is an automated email from the ASF dual-hosted git repository.

mchades pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new 55314f3f3c [#8948]Improve(gvfs-python): lazy load client in python 
gvfs (#8968)
55314f3f3c is described below

commit 55314f3f3cef48add089352f488214b5d873582d
Author: Junda Yang <[email protected]>
AuthorDate: Wed Oct 29 18:54:06 2025 -0700

    [#8948]Improve(gvfs-python): lazy load client in python gvfs (#8968)
    
    ### What changes were proposed in this pull request?
    
    Implemented lazy loading for GravitinoClient in Python GVFS
    (gvfs_base_operations.py):
    - Added double-checked locking pattern using threading.Lock() for
    thread-safe lazy initialization
    - Client parameters are stored in __init__, but client creation is
    deferred until first access via _get_gravitino_client()
    - Updated _get_fileset_catalog() to call _get_gravitino_client() instead
    of direct _client access
    
    
    ### Why are the changes needed?
    
    Fot Python GVFS fallbacks. This is a corresponding Python GVFS change
    based on the java gvfs change
    
https://github.com/apache/gravitino/commit/0e0a19e0266cb0a87a8137bee012602206299756
    
    Fix: #8948
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, minor behavioral change:
    Configuration validation and connection errors are now deferred until
    the first filesystem operation (e.g., exists(), ls()) instead of being
    thrown during filesystem initialization
    
    ### How was this patch tested?
    
    Unit tests updated and added
---
 .../gravitino/filesystem/gvfs_base_operations.py   |  34 ++++--
 .../tests/integration/test_gvfs_with_hdfs.py       |   4 +-
 .../tests/unittests/test_gvfs_lazy_loading.py      | 127 +++++++++++++++++++++
 .../tests/unittests/test_gvfs_with_local.py        |  24 +++-
 4 files changed, 175 insertions(+), 14 deletions(-)

diff --git a/clients/client-python/gravitino/filesystem/gvfs_base_operations.py 
b/clients/client-python/gravitino/filesystem/gvfs_base_operations.py
index 57cb646a98..526666ccb3 100644
--- a/clients/client-python/gravitino/filesystem/gvfs_base_operations.py
+++ b/clients/client-python/gravitino/filesystem/gvfs_base_operations.py
@@ -17,6 +17,7 @@
 import logging
 import os
 import sys
+import threading
 import time
 from abc import ABC, abstractmethod
 from pathlib import PurePosixPath
@@ -88,7 +89,9 @@ class BaseGVFSOperations(ABC):
         self._metalake = metalake_name
         self._options = options
 
-        request_headers = (
+        # Store client parameters for lazy initialization
+        self._server_uri = server_uri
+        self._request_headers = (
             None
             if options is None
             else {
@@ -102,7 +105,7 @@ class BaseGVFSOperations(ABC):
             }
         )
 
-        client_config = (
+        self._client_config = (
             None
             if options is None
             else {
@@ -115,9 +118,9 @@ class BaseGVFSOperations(ABC):
             }
         )
 
-        self._client = create_client(
-            options, server_uri, metalake_name, request_headers, client_config
-        )
+        # Lazy initialization - client is created on first access
+        self._client = None
+        self._client_lock = threading.Lock()
 
         cache_size = (
             GVFSConfig.DEFAULT_CACHE_SIZE
@@ -168,6 +171,23 @@ class BaseGVFSOperations(ABC):
         self._current_location_name = self._init_current_location_name()
         self._kwargs = kwargs
 
+    def _get_gravitino_client(self):
+        """Get the GravitinoClient, creating it lazily on first access.
+        Uses double-checked locking for thread-safe lazy initialization.
+        :return: The GravitinoClient instance
+        """
+        if self._client is None:
+            with self._client_lock:
+                if self._client is None:
+                    self._client = create_client(
+                        self._options,
+                        self._server_uri,
+                        self._metalake,
+                        self._request_headers,
+                        self._client_config,
+                    )
+        return self._client
+
     @property
     def current_location_name(self):
         return self._current_location_name
@@ -549,7 +569,7 @@ class BaseGVFSOperations(ABC):
 
     def _get_fileset_catalog(self, catalog_ident: NameIdentifier):
         if not self._enable_fileset_metadata_cache:
-            return self._client.load_catalog(catalog_ident.name())
+            return 
self._get_gravitino_client().load_catalog(catalog_ident.name())
 
         read_lock = self._catalog_cache_lock.gen_rlock()
         try:
@@ -566,7 +586,7 @@ class BaseGVFSOperations(ABC):
             cache_value: FilesetCatalog = 
self._catalog_cache.get(catalog_ident)
             if cache_value is not None:
                 return cache_value
-            catalog = self._client.load_catalog(catalog_ident.name())
+            catalog = 
self._get_gravitino_client().load_catalog(catalog_ident.name())
             self._catalog_cache[catalog_ident] = catalog
             return catalog
         finally:
diff --git a/clients/client-python/tests/integration/test_gvfs_with_hdfs.py 
b/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
index 922793a545..976e452508 100644
--- a/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
+++ b/clients/client-python/tests/integration/test_gvfs_with_hdfs.py
@@ -270,7 +270,9 @@ class TestGvfsWithHDFS(IntegrationTestEnv):
             metalake_name=self.metalake_name,
             options=options,
         )
-        token = 
fs._operations._client._rest_client.auth_data_provider.get_token_data()
+        # Trigger lazy client initialization
+        client = fs._operations._get_gravitino_client()
+        token = client._rest_client.auth_data_provider.get_token_data()
         token_string = base64.b64decode(
             
token.decode("utf-8")[len(AuthConstants.AUTHORIZATION_BASIC_HEADER) :]
         ).decode("utf-8")
diff --git a/clients/client-python/tests/unittests/test_gvfs_lazy_loading.py 
b/clients/client-python/tests/unittests/test_gvfs_lazy_loading.py
new file mode 100644
index 0000000000..ffcb70ae1d
--- /dev/null
+++ b/clients/client-python/tests/unittests/test_gvfs_lazy_loading.py
@@ -0,0 +1,127 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import unittest
+from unittest import mock
+
+from gravitino import gvfs
+
+
+class TestGVFSLazyLoading(unittest.TestCase):
+    """
+    Unit tests for lazy loading behavior of GravitinoClient in GVFS.
+
+    These tests verify that the GravitinoClient is created lazily on first 
access
+    rather than eagerly during filesystem initialization, improving startup 
performance
+    and reducing resource usage when the filesystem is created but never used.
+    """
+
+    def test_client_not_created_during_init(self):
+        """
+        Test that GravitinoClient is not created during filesystem 
initialization.
+
+        Verifies the lazy loading behavior - the client should remain None 
after
+        the filesystem is constructed, and only be created when first accessed.
+        """
+        fs = gvfs.GravitinoVirtualFileSystem(
+            server_uri="http://localhost:9090";,
+            metalake_name="metalake_demo",
+            skip_instance_cache=True,
+        )
+
+        # Client should still be None (not initialized yet)
+        # pylint: disable=protected-access
+        self.assertIsNone(
+            fs._operations._client,
+            "GravitinoClient should not be created during filesystem 
initialization",
+        )
+
+    @mock.patch("gravitino.filesystem.gvfs_base_operations.create_client")
+    def test_client_created_on_first_access(self, mock_create_client):
+        """
+        Test that GravitinoClient is created lazily on first access.
+
+        Verifies that the client is initialized when _get_gravitino_client() 
is called
+        for the first time, implementing the lazy initialization pattern.
+        """
+        # Mock the client creation to avoid network calls
+        mock_client = mock.MagicMock()
+        mock_create_client.return_value = mock_client
+
+        fs = gvfs.GravitinoVirtualFileSystem(
+            server_uri="http://localhost:9090";,
+            metalake_name="metalake_demo",
+            skip_instance_cache=True,
+        )
+
+        # pylint: disable=protected-access
+        # Initially, client should be None
+        self.assertIsNone(fs._operations._client)
+
+        # Access the client - should trigger initialization
+        client = fs._operations._get_gravitino_client()
+
+        # After access, both the returned client and internal _client should 
be set
+        self.assertIsNotNone(
+            client, "Client returned from _get_gravitino_client() should not 
be None"
+        )
+        self.assertIsNotNone(
+            fs._operations._client,
+            "Internal _client field should be initialized after first access",
+        )
+
+        # Verify create_client was called exactly once
+        mock_create_client.assert_called_once()
+
+    @mock.patch("gravitino.filesystem.gvfs_base_operations.create_client")
+    def test_same_client_instance_returned(self, mock_create_client):
+        """
+        Test that the same GravitinoClient instance is returned on multiple 
calls.
+
+        Verifies the singleton pattern - multiple calls to 
_get_gravitino_client()
+        should return the exact same object instance, not create new clients.
+        """
+        # Mock the client creation to avoid network calls
+        mock_client = mock.MagicMock()
+        mock_create_client.return_value = mock_client
+
+        fs = gvfs.GravitinoVirtualFileSystem(
+            server_uri="http://localhost:9090";,
+            metalake_name="metalake_demo",
+            skip_instance_cache=True,
+        )
+
+        # pylint: disable=protected-access
+        # Get client twice
+        client1 = fs._operations._get_gravitino_client()
+        client2 = fs._operations._get_gravitino_client()
+
+        # Both should be the same instance (use 'is' for identity check)
+        self.assertIs(
+            client1,
+            client2,
+            "Multiple calls to _get_gravitino_client() should return the same 
instance",
+        )
+
+        # Also verify it's the same as the internal _client field
+        self.assertIs(
+            client1,
+            fs._operations._client,
+            "Returned client should be the same as the internal _client field",
+        )
+
+        # Verify create_client was called exactly once (not twice)
+        mock_create_client.assert_called_once()
diff --git a/clients/client-python/tests/unittests/test_gvfs_with_local.py 
b/clients/client-python/tests/unittests/test_gvfs_with_local.py
index be8ebc447c..d6db08138c 100644
--- a/clients/client-python/tests/unittests/test_gvfs_with_local.py
+++ b/clients/client-python/tests/unittests/test_gvfs_with_local.py
@@ -93,7 +93,9 @@ class TestLocalFilesystem(unittest.TestCase):
             options=options,
             skip_instance_cache=True,
         )
-        headers = fs._operations._client._rest_client.request_headers
+        # Trigger lazy client initialization
+        client = fs._operations._get_gravitino_client()
+        headers = client._rest_client.request_headers
         self.assertEqual(headers["k1"], "v1")
 
     def test_request_timeout(self, *mock_methods):
@@ -102,7 +104,9 @@ class TestLocalFilesystem(unittest.TestCase):
             metalake_name="metalake_demo",
             skip_instance_cache=True,
         )
-        self.assertEqual(fs._operations._client._rest_client.timeout, TIMEOUT)
+        # Trigger lazy client initialization
+        client = fs._operations._get_gravitino_client()
+        self.assertEqual(client._rest_client.timeout, TIMEOUT)
 
         options = {
             
f"{GVFSConfig.GVFS_FILESYSTEM_CLIENT_CONFIG_PREFIX}request_timeout": 60,
@@ -113,7 +117,9 @@ class TestLocalFilesystem(unittest.TestCase):
             options=options,
             skip_instance_cache=True,
         )
-        self.assertEqual(fs._operations._client._rest_client.timeout, 60)
+        # Trigger lazy client initialization
+        client = fs._operations._get_gravitino_client()
+        self.assertEqual(client._rest_client.timeout, 60)
 
     def test_cache(self, *mock_methods):
         fileset_storage_location = f"{self._fileset_dir}/test_cache"
@@ -163,7 +169,9 @@ class TestLocalFilesystem(unittest.TestCase):
             options=options,
             skip_instance_cache=True,
         )
-        token = 
fs._operations._client._rest_client.auth_data_provider.get_token_data()
+        # Trigger lazy client initialization
+        client = fs._operations._get_gravitino_client()
+        token = client._rest_client.auth_data_provider.get_token_data()
         token_string = base64.b64decode(
             
token.decode("utf-8")[len(AuthConstants.AUTHORIZATION_BASIC_HEADER) :]
         ).decode("utf-8")
@@ -225,12 +233,14 @@ class TestLocalFilesystem(unittest.TestCase):
             return_value=mock_authentication_with_error_authentication_type(),
         ):
             with self.assertRaises(IllegalArgumentException):
-                gvfs.GravitinoVirtualFileSystem(
+                fs = gvfs.GravitinoVirtualFileSystem(
                     server_uri=self._server_uri,
                     metalake_name=self._metalake_name,
                     options=fs_options,
                     skip_instance_cache=True,
                 )
+                # Trigger lazy client initialization to get the exception
+                fs._operations._get_gravitino_client()
 
         # test bad request
         with patch(
@@ -238,12 +248,14 @@ class TestLocalFilesystem(unittest.TestCase):
             return_value=mock_authentication_invalid_grant_error(),
         ):
             with self.assertRaises(BadRequestException):
-                gvfs.GravitinoVirtualFileSystem(
+                fs = gvfs.GravitinoVirtualFileSystem(
                     server_uri=self._server_uri,
                     metalake_name=self._metalake_name,
                     options=fs_options,
                     skip_instance_cache=True,
                 )
+                # Trigger lazy client initialization to get the exception
+                fs._operations._get_gravitino_client()
 
     def test_ls(self, *mock_methods):
         fileset_storage_location = f"{self._fileset_dir}/test_ls"

Reply via email to