This is an automated email from the ASF dual-hosted git repository.

jli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 99539c786e fix(initialization): prevent startup failures when database 
tables don't   exist (#34584)
99539c786e is described below

commit 99539c786eac61aab7e54a05aae1d3d826b70bce
Author: Elizabeth Thompson <eschu...@gmail.com>
AuthorDate: Mon Aug 11 10:49:52 2025 -0700

    fix(initialization): prevent startup failures when database tables don't   
exist (#34584)
---
 superset/initialization/__init__.py     |  63 ++++-
 tests/unit_tests/initialization_test.py | 430 ++++++++++++++++++++++++++++++++
 2 files changed, 484 insertions(+), 9 deletions(-)

diff --git a/superset/initialization/__init__.py 
b/superset/initialization/__init__.py
index 9ac70a4ad1..51adf27cca 100644
--- a/superset/initialization/__init__.py
+++ b/superset/initialization/__init__.py
@@ -36,6 +36,7 @@ from flask_babel import lazy_gettext as _, refresh
 from flask_compress import Compress
 from flask_session import Session
 from sqlalchemy import inspect
+from sqlalchemy.exc import OperationalError
 from werkzeug.middleware.proxy_fix import ProxyFix
 
 from superset.constants import CHANGE_ME_SECRET_KEY
@@ -81,12 +82,36 @@ class SupersetAppInitializer:  # pylint: 
disable=too-many-public-methods
         self.superset_app = app
         self.config = app.config
         self.manifest: dict[Any, Any] = {}
+        self._db_uri_cache: str | None = None  # Cache for valid database URIs
 
     @deprecated(details="use self.superset_app instead of self.flask_app")  # 
type: ignore
     @property
     def flask_app(self) -> SupersetApp:
         return self.superset_app
 
+    @property
+    def database_uri(self) -> str:
+        """Lazy property for database URI to avoid early config access 
issues"""
+        # If we have a cached valid value, return it
+        if self._db_uri_cache is not None:
+            return self._db_uri_cache
+
+        # Try to get the URI from config
+        uri = self.config.get("SQLALCHEMY_DATABASE_URI", "")
+
+        # Check if this is a fallback value that indicates config isn't ready
+        if uri and not any(
+            fallback in uri.lower()
+            for fallback in ["nouser", "nopassword", "nohost", "nodb"]
+        ):
+            # Valid URI - cache it and return
+            self._db_uri_cache = uri
+            return uri
+
+        # Return the fallback value without caching
+        # This allows retry on next access when config might be ready
+        return uri
+
     def pre_init(self) -> None:
         """
         Called before all other init tasks are complete
@@ -484,13 +509,35 @@ class SupersetAppInitializer:  # pylint: 
disable=too-many-public-methods
         This is called during app initialization but checks table existence
         to handle cases where the app starts before database migration.
         """
-        inspector = inspect(db.engine)
+        # Check if database URI is a fallback value before trying to connect
+        db_uri = self.database_uri
+        if not db_uri or any(
+            fallback in db_uri.lower()
+            for fallback in ["nouser", "nopassword", "nohost", "nodb"]
+        ):
+            logger.warning(
+                "Database URI appears to be a fallback value. "
+                "Skipping database-dependent initialization. "
+                "This may indicate the workspace context is not ready yet."
+            )
+            return
 
-        # Check if core tables exist (use 'dashboards' as proxy for Superset 
tables)
-        if not inspector.has_table("dashboards"):
+        try:
+            inspector = inspect(db.engine)
+
+            # Check if core tables exist (use 'dashboards' as proxy for 
Superset tables)
+            if not inspector.has_table("dashboards"):
+                logger.debug(
+                    "Superset tables not yet created. Skipping 
database-dependent "
+                    "initialization. These features will be initialized after "
+                    "migration."
+                )
+                return
+        except OperationalError as e:
             logger.debug(
-                "Superset tables not yet created. Skipping database-dependent "
-                "initialization. These features will be initialized after 
migration."
+                "Error inspecting database tables. Skipping database-dependent 
"
+                "initialization: %s",
+                e,
             )
             return
 
@@ -600,7 +647,7 @@ class SupersetAppInitializer:  # pylint: 
disable=too-many-public-methods
                 # Simple connection test
                 db.engine.execute("SELECT 1")
         except Exception:
-            db_uri = self.config.get("SQLALCHEMY_DATABASE_URI", "")
+            db_uri = self.database_uri
             safe_uri = make_url_safe(db_uri) if db_uri else "Not configured"
             print(
                 f"{Fore.RED}ERROR: Cannot connect to database {safe_uri}\n"
@@ -651,9 +698,7 @@ class SupersetAppInitializer:  # pylint: 
disable=too-many-public-methods
         set_isolation_level_to = None
 
         if not isolation_level:
-            backend = make_url_safe(
-                self.config["SQLALCHEMY_DATABASE_URI"]
-            ).get_backend_name()
+            backend = make_url_safe(self.database_uri).get_backend_name()
             if backend in ("mysql", "postgresql"):
                 set_isolation_level_to = "READ COMMITTED"
 
diff --git a/tests/unit_tests/initialization_test.py 
b/tests/unit_tests/initialization_test.py
new file mode 100644
index 0000000000..bc955b4b31
--- /dev/null
+++ b/tests/unit_tests/initialization_test.py
@@ -0,0 +1,430 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from unittest.mock import MagicMock, patch
+
+from sqlalchemy.exc import OperationalError
+
+from superset.initialization import SupersetAppInitializer
+
+
+class TestSupersetAppInitializer:
+    @patch("superset.initialization.db")
+    @patch("superset.initialization.inspect")
+    @patch("superset.initialization.logger")
+    def test_init_database_dependent_features_skips_when_no_tables(
+        self, mock_logger, mock_inspect_func, mock_db
+    ):
+        """Test that initialization is skipped when core tables don't exist."""
+        # Setup
+        mock_app = MagicMock()
+        app_initializer = SupersetAppInitializer(mock_app)
+        mock_inspector = MagicMock()
+        mock_inspector.has_table.return_value = False
+        mock_inspect_func.return_value = mock_inspector
+        mock_db.engine = MagicMock()
+
+        # Execute
+        app_initializer._init_database_dependent_features()
+
+        # Assert
+        mock_inspect_func.assert_called_once_with(mock_db.engine)
+        mock_inspector.has_table.assert_called_once_with("dashboards")
+        mock_logger.debug.assert_called_once_with(
+            "Superset tables not yet created. Skipping database-dependent "
+            "initialization. These features will be initialized after "
+            "migration."
+        )
+
+    @patch("superset.initialization.db")
+    @patch("superset.initialization.inspect")
+    @patch("superset.initialization.logger")
+    def test_init_database_dependent_features_handles_operational_error(
+        self, mock_logger, mock_inspect_func, mock_db
+    ):
+        """Test that OperationalError during inspection is handled 
gracefully."""
+        # Setup
+        mock_app = MagicMock()
+        app_initializer = SupersetAppInitializer(mock_app)
+        error_msg = "Cannot connect to database"
+        mock_inspect_func.side_effect = OperationalError(error_msg, None, None)
+        mock_db.engine = MagicMock()
+
+        # Execute
+        app_initializer._init_database_dependent_features()
+
+        # Assert
+        mock_inspect_func.assert_called_once_with(mock_db.engine)
+        mock_logger.debug.assert_called_once()
+        call_args = mock_logger.debug.call_args
+        assert "Error inspecting database tables" in call_args[0][0]
+        # The error is passed as second argument with %s formatting
+        assert str(call_args[0][1]) == str(OperationalError(error_msg, None, 
None))
+
+    @patch("superset.initialization.db")
+    @patch("superset.initialization.inspect")
+    @patch("superset.initialization.feature_flag_manager")
+    @patch("superset.initialization.register_sqla_event_listeners")
+    @patch("superset.initialization.logger")
+    @patch("superset.commands.theme.seed.SeedSystemThemesCommand")
+    def test_init_database_dependent_features_initializes_when_tables_exist(
+        self,
+        mock_seed_themes_command,
+        mock_logger,
+        mock_register_listeners,
+        mock_feature_flag_manager,
+        mock_inspect_func,
+        mock_db,
+    ):
+        """Test that features are initialized when database tables exist."""
+        # Setup
+        mock_app = MagicMock()
+        app_initializer = SupersetAppInitializer(mock_app)
+        mock_inspector = MagicMock()
+        mock_inspector.has_table.return_value = True
+        mock_inspect_func.return_value = mock_inspector
+        mock_db.engine = MagicMock()
+        mock_feature_flag_manager.is_feature_enabled.return_value = True
+        mock_seed_themes = MagicMock()
+        mock_seed_themes_command.return_value = mock_seed_themes
+
+        # Execute
+        app_initializer._init_database_dependent_features()
+
+        # Assert
+        mock_inspect_func.assert_called_once_with(mock_db.engine)
+        # Check both tables are checked
+        assert mock_inspector.has_table.call_count == 2
+        mock_inspector.has_table.assert_any_call("dashboards")
+        mock_inspector.has_table.assert_any_call("themes")
+        mock_feature_flag_manager.is_feature_enabled.assert_called_with(
+            "TAGGING_SYSTEM"
+        )
+        mock_register_listeners.assert_called_once()
+        # Should seed themes
+        mock_seed_themes_command.assert_called_once()
+        mock_seed_themes.run.assert_called_once()
+        # Should not log skip message when tables exist
+        mock_logger.debug.assert_not_called()
+
+    @patch("superset.initialization.db")
+    @patch("superset.initialization.inspect")
+    @patch("superset.initialization.feature_flag_manager")
+    @patch("superset.initialization.register_sqla_event_listeners")
+    @patch("superset.commands.theme.seed.SeedSystemThemesCommand")
+    def test_init_database_dependent_features_skips_tagging_when_disabled(
+        self,
+        mock_seed_themes_command,
+        mock_register_listeners,
+        mock_feature_flag_manager,
+        mock_inspect_func,
+        mock_db,
+    ):
+        """Test that tagging system is not initialized when feature flag is 
disabled."""
+        # Setup
+        mock_app = MagicMock()
+        app_initializer = SupersetAppInitializer(mock_app)
+        mock_inspector = MagicMock()
+        mock_inspector.has_table.return_value = True
+        mock_inspect_func.return_value = mock_inspector
+        mock_db.engine = MagicMock()
+        mock_feature_flag_manager.is_feature_enabled.return_value = False
+        mock_seed_themes = MagicMock()
+        mock_seed_themes_command.return_value = mock_seed_themes
+
+        # Execute
+        app_initializer._init_database_dependent_features()
+
+        # Assert
+        mock_feature_flag_manager.is_feature_enabled.assert_called_with(
+            "TAGGING_SYSTEM"
+        )
+        mock_register_listeners.assert_not_called()
+        # Check both tables are checked
+        assert mock_inspector.has_table.call_count == 2
+        mock_inspector.has_table.assert_any_call("dashboards")
+        mock_inspector.has_table.assert_any_call("themes")
+
+    @patch("superset.initialization.db")
+    @patch("superset.initialization.inspect")
+    @patch("superset.initialization.logger")
+    def 
test_init_database_dependent_features_handles_inspector_error_in_has_table(
+        self, mock_logger, mock_inspect_func, mock_db
+    ):
+        """Test that OperationalError from has_table check is handled 
gracefully."""
+        # Setup
+        mock_app = MagicMock()
+        app_initializer = SupersetAppInitializer(mock_app)
+        mock_inspector = MagicMock()
+        error_msg = "Table check failed"
+        mock_inspector.has_table.side_effect = OperationalError(error_msg, 
None, None)
+        mock_inspect_func.return_value = mock_inspector
+        mock_db.engine = MagicMock()
+
+        # Execute
+        app_initializer._init_database_dependent_features()
+
+        # Assert
+        mock_inspect_func.assert_called_once_with(mock_db.engine)
+        mock_inspector.has_table.assert_called_once_with("dashboards")
+        # Should handle the error gracefully
+        mock_logger.debug.assert_called_once()
+        call_args = mock_logger.debug.call_args
+        assert "Error inspecting database tables" in call_args[0][0]
+
+    def test_database_uri_lazy_property(self):
+        """Test database_uri property uses lazy initialization with smart 
caching."""
+        # Setup
+        mock_app = MagicMock()
+        test_uri = "postgresql://user:pass@host:5432/testdb"
+        mock_app.config = {"SQLALCHEMY_DATABASE_URI": test_uri}
+        app_initializer = SupersetAppInitializer(mock_app)
+
+        # Ensure cache is None initially
+        assert app_initializer._db_uri_cache is None
+
+        # First access should set the cache (valid URI)
+        uri = app_initializer.database_uri
+        assert uri == test_uri
+        assert app_initializer._db_uri_cache is not None
+        assert app_initializer._db_uri_cache == test_uri
+
+        # Second access should use cache (not call config.get again)
+        # Change the config to verify cache is being used
+        mock_app.config["SQLALCHEMY_DATABASE_URI"] = "different_uri"
+        uri2 = app_initializer.database_uri
+        assert (
+            uri2 == test_uri
+        )  # Should still return cached value (not "different_uri")
+
+    def test_database_uri_lazy_property_with_missing_config(self):
+        """Test that database_uri property returns empty string when config 
missing."""
+        # Setup
+        mock_app = MagicMock()
+        mock_app.config = {}  # Empty config
+        app_initializer = SupersetAppInitializer(mock_app)
+
+        # Should return empty string when config key doesn't exist
+        uri = app_initializer.database_uri
+        assert uri == ""
+        # Empty string is a fallback value, so it should NOT be cached
+        assert app_initializer._db_uri_cache is None
+
+    def test_database_uri_prevents_nouser_fallback(self):
+        """Test that lazy initialization prevents nouser fallback during 
deployment."""
+        # Setup - simulate deployment scenario where config is loaded properly
+        mock_app = MagicMock()
+
+        # Config is properly loaded with real database URI (not nouser)
+        mock_app.config = {
+            "SQLALCHEMY_DATABASE_URI": 
"postgresql://realuser:realpass@realhost:5432/realdb"
+        }
+        app_initializer = SupersetAppInitializer(mock_app)
+
+        # Access the database URI - should get the real URI, not nouser
+        uri = app_initializer.database_uri
+        assert uri == "postgresql://realuser:realpass@realhost:5432/realdb"
+        assert "nouser" not in uri
+        assert "nopassword" not in uri
+        assert "nohost" not in uri
+        assert "nodb" not in uri
+
+        # Verify cache is working
+        assert app_initializer._db_uri_cache is not None
+        assert (
+            app_initializer._db_uri_cache
+            == "postgresql://realuser:realpass@realhost:5432/realdb"
+        )
+
+    @patch("superset.initialization.make_url_safe")
+    @patch("superset.initialization.db")
+    def test_set_db_default_isolation_uses_lazy_property(
+        self, mock_db, mock_make_url_safe
+    ):
+        """Test that set_db_default_isolation uses the lazy database_uri 
property."""
+        # Setup
+        mock_app = MagicMock()
+        test_uri = "postgresql://user:pass@host:5432/testdb"
+        mock_app.config = {
+            "SQLALCHEMY_DATABASE_URI": test_uri,
+            "SQLALCHEMY_ENGINE_OPTIONS": {},
+        }
+        app_initializer = SupersetAppInitializer(mock_app)
+
+        # Mock make_url_safe to return a URL with postgresql backend
+        mock_url = MagicMock()
+        mock_url.get_backend_name.return_value = "postgresql"
+        mock_make_url_safe.return_value = mock_url
+
+        # Mock db.engine
+        mock_engine = MagicMock()
+        mock_db.engine = mock_engine
+
+        # Execute
+        app_initializer.set_db_default_isolation()
+
+        # Assert that make_url_safe was called with the lazy property value
+        mock_make_url_safe.assert_called_once_with(test_uri)
+
+        # Should set isolation level to READ COMMITTED for postgresql
+        mock_engine.execution_options.assert_called_once_with(
+            isolation_level="READ COMMITTED"
+        )
+
+        # Verify the cache was created
+        assert app_initializer._db_uri_cache is not None
+        assert app_initializer._db_uri_cache == test_uri
+
+    @patch("superset.initialization.make_url_safe")
+    @patch("superset.initialization.db")
+    def test_set_db_default_isolation_with_empty_uri(self, mock_db, 
mock_make_url_safe):
+        """Test that set_db_default_isolation handles empty URI gracefully."""
+        # Setup
+        mock_app = MagicMock()
+        mock_app.config = {
+            "SQLALCHEMY_DATABASE_URI": "",  # Empty URI
+            "SQLALCHEMY_ENGINE_OPTIONS": {},
+        }
+        app_initializer = SupersetAppInitializer(mock_app)
+
+        # Mock make_url_safe to return a URL with no backend
+        mock_url = MagicMock()
+        mock_url.get_backend_name.return_value = None
+        mock_make_url_safe.return_value = mock_url
+
+        # Execute
+        app_initializer.set_db_default_isolation()
+
+        # Should handle empty URI gracefully
+        mock_make_url_safe.assert_called_once_with("")
+
+        # Should not set isolation level for empty/unknown backend
+        mock_db.engine.execution_options.assert_not_called()
+
+    def test_database_uri_doesnt_cache_fallback_values(self):
+        """Test that fallback values like 'nouser' are not cached."""
+        # Setup
+        mock_app = MagicMock()
+
+        # Initially return the fallback nouser URI
+        config_dict = {
+            "SQLALCHEMY_DATABASE_URI": 
"postgresql://nouser:nopassword@nohost:5432/nodb"
+        }
+        mock_app.config = config_dict
+        app_initializer = SupersetAppInitializer(mock_app)
+
+        # First access returns fallback but shouldn't cache it
+        uri1 = app_initializer.database_uri
+        assert uri1 == "postgresql://nouser:nopassword@nohost:5432/nodb"
+        assert app_initializer._db_uri_cache is None  # Should NOT be cached
+
+        # Now config is properly loaded - update the same dict
+        config_dict["SQLALCHEMY_DATABASE_URI"] = (
+            "postgresql://realuser:realpass@realhost:5432/realdb"
+        )
+
+        # Second access should get the new value since fallback wasn't cached
+        uri2 = app_initializer.database_uri
+        assert uri2 == "postgresql://realuser:realpass@realhost:5432/realdb"
+        assert app_initializer._db_uri_cache is not None  # Now it should be 
cached
+        assert (
+            app_initializer._db_uri_cache
+            == "postgresql://realuser:realpass@realhost:5432/realdb"
+        )
+
+        # Third access should use cache even if config changes again
+        config_dict["SQLALCHEMY_DATABASE_URI"] = (
+            "postgresql://different:uri@host:5432/db"
+        )
+        uri3 = app_initializer.database_uri
+        assert (
+            uri3 == "postgresql://realuser:realpass@realhost:5432/realdb"
+        )  # Still cached value
+
+    def test_database_uri_caches_valid_uri(self):
+        """Test that valid URIs are properly cached."""
+        # Setup
+        mock_app = MagicMock()
+        valid_uri = "postgresql://validuser:validpass@validhost:5432/validdb"
+        mock_app.config = {"SQLALCHEMY_DATABASE_URI": valid_uri}
+        app_initializer = SupersetAppInitializer(mock_app)
+
+        # First access should cache valid URI
+        uri1 = app_initializer.database_uri
+        assert uri1 == valid_uri
+        assert app_initializer._db_uri_cache is not None
+        assert app_initializer._db_uri_cache == valid_uri
+
+        # Change config
+        mock_app.config = {
+            "SQLALCHEMY_DATABASE_URI": "postgresql://changed:uri@host:5432/db"
+        }
+
+        # Second access should still return cached value
+        uri2 = app_initializer.database_uri
+        assert uri2 == valid_uri  # Still the cached value, not the changed one
+
+    def test_database_uri_fallback_patterns(self):
+        """Test that various fallback patterns are not cached."""
+        # Test various fallback patterns
+        fallback_uris = [
+            "postgresql://nouser:nopassword@nohost:5432/nodb",
+            "mysql://NOUSER:NOPASSWORD@NOHOST:3306/NODB",
+            "postgresql://noUser:pass@host:5432/db",  # Contains 'nouser' 
(case insens.)
+            "sqlite:///nohost.db",  # Contains 'nohost'
+            "",  # Empty string
+        ]
+
+        for fallback_uri in fallback_uris:
+            # Create a fresh initializer for each test
+            mock_app = MagicMock()
+            mock_app.config = {"SQLALCHEMY_DATABASE_URI": fallback_uri}
+            app_initializer = SupersetAppInitializer(mock_app)
+
+            uri = app_initializer.database_uri
+
+            # Should return the value but not cache it
+            assert uri == fallback_uri
+            assert app_initializer._db_uri_cache is None, (
+                f"Should not cache: {fallback_uri}"
+            )
+
+    @patch("superset.initialization.logger")
+    @patch("superset.initialization.inspect")
+    @patch("superset.initialization.db")
+    def test_init_database_dependent_features_skips_with_fallback_uri(
+        self, mock_db, mock_inspect, mock_logger
+    ):
+        """Test that database-dependent features are skipped when URI is a 
fallback."""
+        # Setup
+        mock_app = MagicMock()
+        # Set a fallback URI that would cause connection to fail
+        mock_app.config = {
+            "SQLALCHEMY_DATABASE_URI": 
"postgresql://nouser:nopassword@nohost:5432/nodb"
+        }
+        app_initializer = SupersetAppInitializer(mock_app)
+
+        # Execute
+        app_initializer._init_database_dependent_features()
+
+        # Assert - should not try to inspect database
+        mock_inspect.assert_not_called()
+        # Should log warning about fallback URI
+        mock_logger.warning.assert_called_once()
+        warning_message = mock_logger.warning.call_args[0][0]
+        assert "fallback value" in warning_message
+        assert "workspace context" in warning_message

Reply via email to